POLS 1600

Quantifying uncertainty:
Confidence Intervals &
Hypothesis Tests

Updated Apr 22, 2025

Overview

Sampling distributions and standard errors

Estimators, estimates, and statistics

Estimator: A rule for calculating an estimate of our parameter of interest.
Estimate: The value produced by some estimator for some parameter from some data. Often called $\hat{θ}$
Unbiased estimators: $E (\hat{θ}) = E (θ)$ On average, the estimates produced by some estimator will be centered around the truth
Consistent estimates: $lim_{n \to \infty} {\hat{θ}}_{N} = θ$ As the sample size increases, the estimates from an estimator converge in probability to the parameter value
Statistic: A summary of the data (mean, regression coefficient, $R^{2}$ ). An estimator without a specified target of inference

Sampling distributions

Treat the 2024 NES pilot as the population
Take repeated samples of size N = 10, 30, 300
For each sample of size N, calculate the sample mean of age
Plot the distribution of sample means (i.e. the sampling distribution)

# Load Data
load(url("https://pols1600.paultesta.org/files/data/nes24.rda"))

# ---- Population ----

# Population average
mu_age <- mean(df$age, na.rm=T)
# Population standard deviation
sd_age <- sd(df$age, na.rm = T)

# ---- Function to Take Repeated Samples From Data ----

sample_data_fn <- function(
    dat=df, var=age, samps=1000, sample_size=10,
    resample = F){
  if(resample == F){
  df <- tibble(
  sim = 1:samps,
  distribution = "Sampling",
  size = sample_size,
  sample_from = "Population",
  pop_mean = dat %>% pull(!!enquo(var)) %>% mean(., na.rm=T),
  pop_sd = dat %>% pull(!!enquo(var)) %>% sd(., na.rm=T),
  se_asymp = pop_sd / sqrt(size),
  ll_asymp = pop_mean - 1.96*se_asymp,
  ul_asymp = pop_mean + 1.96*se_asymp,
) %>% 
  mutate(
    sample = purrr::map(sim, ~ slice_sample(dat %>% select(!!enquo(var)), n = sample_size, replace = F)),
    sample_mean = purrr::map_dbl(sample, \(x) x %>% pull(!!enquo(var)) %>% mean(.,na.rm=T)),
    ll = sample_mean - 1.96*sd(sample_mean),
    ul = sample_mean + 1.96*sd(sample_mean)
  )
  }
  if(resample == T){
    df <- tibble(
  sim = 1:samps,
  distribution = "Resampling",
  size = sample_size,
  sample_from = "Sample",
  pop_mean = dat %>% pull(!!enquo(var)) %>% mean(., na.rm=T),
  pop_sd = dat %>% pull(!!enquo(var)) %>% sd(., na.rm=T),
  se_asymp = pop_sd / sqrt(size),
  ll_asymp = pop_mean - 1.96*se_asymp,
  ul_asymp = pop_mean + 1.96*se_asymp,
) %>% 
  mutate(
    sample = purrr::map(sim, ~ slice_sample(dat %>% select(!!enquo(var)), n = sample_size, replace = T)),
    sample_mean = purrr::map_dbl(sample, \(x) x %>% pull(!!enquo(var)) %>% mean(.,na.rm=T))
  )
  }
  return(df)
}

# ---- Plot Single Distribution -----

plot_distribution <- function(the_pop,the_samp, the_var, ...){
  mu_pop <- the_pop %>% pull(!!enquo(the_var)) %>% mean(., na.rm=T)
  mu_samp <- the_samp %>% pull(!!enquo(the_var)) %>% mean(., na.rm=T)
  ll <- the_pop %>% pull(!!enquo(the_var)) %>% as.numeric() %>%  min(., na.rm=T)
  ul <- the_pop %>% pull(!!enquo(the_var)) %>% as.numeric() %>% max(., na.rm=T)
  p<- the_samp %>% 
    ggplot(aes(!!enquo(the_var)))+
    geom_density()+
    geom_rug()+
    theme_void()+
    geom_vline(xintercept = mu_samp, col = "red")+
    geom_vline(xintercept = mu_pop, col = "grey40",linetype = "dashed")+
    xlim(ll,ul)
  return(p)
}

# ---- Plot multiple distributions ----

plot_samples <- function(pop, x, variable,n_rows = 4, ...){
  sample_plots <- x$sample[1:(4*n_rows)] %>% 
  purrr::map( \(x) plot_distribution(the_pop=pop, the_samp = x, 
                                     the_var = !!enquo(variable)))
  p <- wrap_elements(wrap_plots(sample_plots[1:(4*n_rows)], ncol=4))
  return(p)
  
}

# ---- Plot Combined Figure ----

plot_figure_fn <- function(
    d=df, 
    v=age, 
    sim=1000, 
    size=10,
    rows = 4){
  # Population average
  mu <- d %>% pull(!!enquo(v)) %>% mean(., na.rm=T)
  sd <- d %>% pull(!!enquo(v)) %>% sd(., na.rm=T)
  se <- sd/sqrt(size)
  # Range
  ll <- d %>% pull(!!enquo(v)) %>% as.numeric() %>%  min(., na.rm=T)
  ul <- d %>% pull(!!enquo(v)) %>% as.numeric() %>% max(., na.rm=T)
  # Population standard deviation
  # Sample data
  samp_df <- sample_data_fn(dat=d, var = !!enquo(v), samps = sim, sample_size = size)
  # Plot Population
  p_pop <- d %>%
    ggplot(aes(!!enquo(v)))+
      geom_density(col ="grey60")+
      geom_rug(col = "grey60", )+
      geom_vline(xintercept = mu, col="grey40", linetype="dashed")+
      theme_void()+
      labs(title ="Population")+
      xlim(ll,ul)+
      theme(plot.title = element_text(hjust = 0))

  
  p_samps <- plot_samples(pop=d, x= samp_df,variable = !!enquo(v),
                          n_rows = rows)
  p_samps <- p_samps + 
    ggtitle(paste("Repeated samples of size N =",size,"from the population"))+
    theme(plot.title = element_text(hjust = 0.5), 
          plot.background = element_rect(
            fill = NA, colour = 'black', linewidth = 2)
          )
  
  
  p_dist <- samp_df %>% 
  ggplot(aes(sample_mean))+
  geom_density(col="red",aes(y= after_stat(ndensity)))+
  geom_rug(col="red")+
  geom_density(data = df, aes(!!enquo(v), y= after_stat(ndensity)),
               col="grey60")+
  geom_vline(xintercept = mu, col="grey40", linetype="dashed")+
  xlim(ll,ul)+
  theme_void()+
    labs(
      title = "Sampling Distribution"
    )+  theme(plot.title = element_text(hjust = 0))
  
  range_upper_df <- tibble(
  x = seq( ((ll+ul)/2 -5), ((ll+ul)/2 +5), length.out = 20),
  xend = seq(ll-5, ul+5, length.out = 20),
  y = rep(9, 20),
  yend = rep(1, 20)
)
p_upper <- range_upper_df %>% 
  ggplot(aes(x=x, xend = xend, y=y,yend=yend))+
  geom_segment(
    arrow = arrow(length = unit(0.05, "npc"))
  )+
  theme_void()+
  coord_fixed(ylim=c(0,10),
              xlim =c(ll-5,ul+5),clip="off")
  # Lower
  range_df <- samp_df %>% 
  summarise(
    min = min(sample_mean),
    max = max(sample_mean),
    mean = mean(sample_mean)
  )
  
  plot_df <- tibble(
  id = 1:50,
  # x = sort(rnorm(50, mu, sd)),
  x = sort(runif(50, ll, ul)),
  xend = sort(rnorm(50, mu, se)),
  y = 9,
  yend = 1
)

p_lower <- plot_df %>%
  ggplot(aes(x,y, group =id))+
  geom_segment(aes(xend=xend, yend=yend),
               col = "red",arrow = arrow(length = unit(0.05, "npc"))
               )+
  theme_void()+
  coord_fixed(ylim=c(0,10),xlim = c(ll,ul),clip="off")

  
  design <-"##AAAA##
            ##AAAA##
            ##AAAA##
            BBBBBBBB
            BBBBBBBB
            #CCCCCC#
            #CCCCCC#
            #CCCCCC#
            #CCCCCC#
            DDDDDDDD
            DDDDDDDD
            ##EEEE##
            ##EEEE##
            ##EEEE##"
  
  fig <- p_pop / p_upper / p_samps / p_lower / p_dist +
    plot_layout(design = design)
  return(fig)


  
  
  
}

# ---- Samples and Figures Varying Sample Size ----
## N = 10
set.seed(1234)
samp_n10 <- sample_data_fn(sample_size  = 10, samps = 1000)
set.seed(1234)
fig_n10 <- plot_figure_fn(v=age,size = 10)

## N = 30
set.seed(1234)
samp_n30 <- sample_data_fn(sample_size  = 30, samps = 1000)
set.seed(1234)
fig_n30 <- plot_figure_fn(size = 30,rows=4)

## N = 300
set.seed(1234)
samp_n300 <- sample_data_fn(sample_size  = 300, samps = 1000)
set.seed(1234)
fig_n300 <- plot_figure_fn(size = 300)

Standard errors

The standard error (SE) is simply the standard deviation of the sampling distribution.
The SE decreases as the sample size increases (by the LLN):
Approximately 95% of the sample means will be within 2 SEs of the population mean (CLT)

se_df <- tibble(
  `Sample Size` = factor(paste("N =",c(10,30, 300))),
  se = c(sd(samp_n10$sample_mean),
         sd(samp_n30$sample_mean),
         sd(samp_n300$sample_mean)),
  SE = paste("SE =", round(se,2)),
  ll = mu_age,
  ul = mu_age + se,
  y = c(.3,.3,.45),
  yend = y
)

ci_df <- tibble(
  `Sample Size` = factor(paste("N =",c(10,30, 300))),
  se = c(sd(samp_n10$sample_mean),
         sd(samp_n30$sample_mean),
         sd(samp_n300$sample_mean)),
  mu = mu_age,
  ll = round(mu_age - 1.96 *se,2),
  ul = round(mu_age + 1.96 *se,2),
  ci = paste("95 % Coverage Interval [",ll,";",ul,"]",sep=""),
  y = c(.3,.3,.45),
  yend = y
)
sim_df <- samp_n10 %>% 
  bind_rows(samp_n30) %>% 
  bind_rows(samp_n300) %>% 
  mutate(
    `Sample Size` = factor(paste("N =",size))
    ) %>% 
  left_join(ci_df) %>% 
  mutate(
    Coverage = case_when(
      sample_mean > ll_asymp & sample_mean < ul_asymp  & size == 10~ "#F8766D",
      sample_mean > ll_asymp & sample_mean < ul_asymp  & size == 30~ "#00BA38",
      sample_mean > ll_asymp & sample_mean < ul_asymp  & size == 300~ "#619CFF",
      T ~ "grey"
    )
  )



fig_se <- sim_df %>% 
  ggplot(aes(sample_mean, col = `Sample Size`))+
  geom_density()+
  geom_rug()+
  geom_vline(xintercept = mu_age, linetype = "dashed")+
  theme_minimal()+
  facet_wrap(~`Sample Size`, ncol=1)+
  ylim(0,.5)+
  guides(col="none")+
  geom_segment(
    data = se_df,
    aes(x= ll, xend =ul, y = y, yend = yend)
  )+
  geom_text(
    data = se_df,
    aes(x = ul, y =y, label = SE),
    hjust = -.25
  ) +
  labs(
    y = "",
    x = "Sampling Distributions of Sample Means",
    title = "Standard Errors decrease with Sample Size"
  )

fig_coverage <- sim_df %>% 
  ggplot(aes(sample_mean,col=`Sample Size`))+
  geom_density()+
  geom_rug(col=sim_df$Coverage)+
  geom_vline(xintercept = mu_age, linetype = "dashed")+
  theme_minimal()+
  facet_wrap(~`Sample Size`, ncol=1)+
  ylim(0,.55)+
  guides(col="none")+
  geom_segment(
    data = ci_df,
    aes(x= ll, xend =ul, y = y, yend = yend)
  )+
  geom_text(
    data = ci_df,
    aes(x = mu, y =y, label = ci),
    hjust = .5,
    nudge_y =.1
  ) +
  labs(
    y = "",
    x = "Sampling Distributions of Sample Means",
    title = "Approximately 95% of sample means are within 2 SE of the population mean"
  )

Calculating standard errors

Simulation:
- Treat sample as population
- Sample with replacement (“bootstrapping”)
- Estimate SE from standard deviation of resampling distribution (“plug-in principle”)
Analytic
- Characterize sampling distribution from sample mean and variance via asymptotic theory (the LLT and CLT)
- For a sample mean, $\bar{x}$

$S E_{\bar{x}} = \frac{σ_{x}}{\sqrt{(} n)}$

plot_resampling_fn <- function(d=df, v=age, sim=1000, size=10,rows=3){
  # Population average
  mu <- d %>% pull(!!enquo(v)) %>% mean(., na.rm=T)
  # Population standard deviation and SE
  sd <- d %>% pull(!!enquo(v)) %>% sd(., na.rm=T)
  se <- sd/sqrt(size)
  # Range
  ll <- d %>% pull(!!enquo(v)) %>% as.numeric() %>%  min(., na.rm=T)
  ul <- d %>% pull(!!enquo(v)) %>% as.numeric() %>% max(., na.rm=T)
  # Resampling with replace
  # Draw 1 Sample
  sample <- sample_data_fn(dat=d, var = !!enquo(v), samps = 1, sample_size = size, resample = F)
  samp_df <- as.data.frame(sample$sample)
  # Resample from sample with replacement
  resamp_df <- sample_data_fn(dat=samp_df, var = !!enquo(v), samps = sim, sample_size = size, resample = T)
  # Plot Population
  p_pop <- d %>%
    ggplot(aes(!!enquo(v)))+
      geom_density(col ="grey60")+
      geom_rug(col = "grey60", )+
      geom_vline(xintercept = mu, col="grey40", linetype="dashed")+
      theme_void()+
      labs(title ="Population")+
      xlim(ll,ul)+
      theme(plot.title = element_text(hjust = 0))

  p_samp <- plot_distribution(the_pop = d,
                              the_samp = samp_df,
                              the_var = age)+
    labs(title ="Sample")+
      xlim(ll,ul)+
      theme(plot.title = element_text(hjust = 0))
  
  p_samps <- plot_samples(pop=d, x= resamp_df,variable = !!enquo(v), n_rows =rows)
  p_samps <- p_samps + 
    ggtitle(paste("Repeated samples with replacement\nof size N =",size,"from sample"))+
    theme(plot.title = element_text(hjust = 0.5), 
          plot.background = element_rect(
            fill = NA, colour = 'black', linewidth = 2)
          )
  
  # Resampling Distribution
  
  
  p_dist <- resamp_df %>% 
  ggplot(aes(sample_mean))+
  geom_density(col="red",aes(y= after_stat(ndensity)))+
  geom_rug(col="red")+
  geom_density(data = df, aes(!!enquo(v), y= after_stat(ndensity)),
               col="grey60")+
  geom_vline(xintercept = unique(resamp_df$pop_mean), col="red", linetype="solid")+
  geom_vline(xintercept = mu, col="grey40", linetype="dashed")+
  xlim(ll,ul)+
  theme_void()+
    labs(
      title = "Reampling Distribution"
    )+  theme(plot.title = element_text(hjust = 0))
  
   range_upper_df <- tibble(
  x = seq( ((ll+ul)/2 -5), ((ll+ul)/2 +5), length.out = 20),
  xend = seq(ll-5, ul+5, length.out = 20),
  y = rep(9, 20),
  yend = rep(1, 20)
)
p_upper <- range_upper_df %>% 
  ggplot(aes(x=x, xend = xend, y=y,yend=yend))+
  geom_segment(
    arrow = arrow(length = unit(0.05, "npc"))
  )+
  theme_void()+
  coord_fixed(ylim=c(0,10),
              xlim =c(ll-5,ul+5),clip="off")
  # Lower
  range_df <- resamp_df %>% 
  summarise(
    min = min(sample_mean),
    max = max(sample_mean),
    mean = mean(sample_mean)
  )
  
  plot_df <- tibble(
  id = 1:50,
  # x = sort(rnorm(50, mu, sd)),
  x = sort(runif(50, ll, ul)),
  xend = sort(rnorm(50, unique(resamp_df$pop_mean), se)),
  y = 9,
  yend = 1
)

p_lower <- plot_df %>%
  ggplot(aes(x,y, group =id))+
  geom_segment(aes(xend=xend, yend=yend),
               col = "red",arrow = arrow(length = unit(0.05, "npc"))
               )+
  theme_void()+
  coord_fixed(ylim=c(0,10),xlim = c(ll,ul),clip="off")

  
  design <-"##AAAA##
            ##AAAA##
            ##AAAA##
            ##BBBB##
            ##BBBB##
            ##BBBB##            
            CCCCCCCC
            CCCCCCCC
            #DDDDDD#
            #DDDDDD#
            #DDDDDD#
            #DDDDDD#
            EEEEEEEE
            EEEEEEEE
            ##FFFF##
            ##FFFF##
            ##FFFF##"
  
  fig <- p_pop / p_samp /p_upper / p_samps / p_lower / p_dist +
    plot_layout(design = design)
  return(fig)


  
  
  
}
set.seed(123)
resamp_n10 <- sample_data_fn(
  dat = sample_data_fn(samps = 1, sample_size = 10, resample = T)$sample %>%  as.data.frame(),
  sample_size = 10, 
  resample = T)
set.seed(123)
fig_n10_bs <- plot_resampling_fn(size=10)

set.seed(12345)
resamp_n30 <- sample_data_fn(
  dat = sample_data_fn(samps = 1, sample_size = 30, resample = T)$sample %>%  as.data.frame(),
  samps = 1000, sample_size = 30, resample = T)

set.seed(12345)
fig_n30_bs <- plot_resampling_fn(size=30)

set.seed(1234)
resamp_n300 <- sample_data_fn(
  dat = sample_data_fn(samps = 1, sample_size = 300, resample = T)$sample %>%  as.data.frame(),
  samps = 1000, sample_size = 300, resample = T)
set.seed(1234)
fig_n300_bs <- plot_resampling_fn(size=300)

Bootstrap SE	Analytic SE
5.74	5.61
2.75	3.24
1.07	1.02

Calculating a confidence interval

Choose level of confidence $(1 - α) \times 100$
- $α = 0.05$ , corresponds to a 95% confidence level.
Derive the sampling distribution of the estimator
- Simulation: bootstrap re-sampling
- Analytically: computing its mean and variance.
Compute the standard error
Compute the critical value $z_{α / 2}$
- as the $1.96 = Φ (z_{0.5 / 2})$ for a 95% CI
Compute the lower and upper confidence limits
- lower limit = $\hat{θ} - z_{α / 2} \times S E$
- upper limit = $\hat{θ} + z_{α / 2} \times S E$

resamp_df <- 
  resamp_n10 %>% 
  bind_rows(resamp_n30) %>% 
  bind_rows(resamp_n300) %>% 
  mutate(
    `Sample Size` = factor(paste("N =",size))
    )

resamp_ci_df <- tibble(
  `Sample Size` = factor(paste("N =",c(10,30,300))),
  mu = unique(resamp_df$pop_mean),
  ll = unique(resamp_df$ll_asymp),
  ul = unique(resamp_df$ul_asymp),
  y = c(.3, .3,.5)
)

fig_ci1 <- resamp_df %>% 
  ggplot(aes(sample_mean,
             col = `Sample Size`))+
  geom_density()+
  geom_rug()+
  geom_vline(xintercept = mu_age, linetype = "dashed")+
  geom_vline(data = resamp_ci_df,
             aes(xintercept = mu,
                 col = `Sample Size`))+
  geom_segment(data = resamp_ci_df,
               aes(x = ll, xend =ul, y = y, yend =y,
                   col = `Sample Size`))+
  facet_wrap(~`Sample Size`, ncol=1)+
  theme_minimal()+
  labs(
    y = "",
    x = "Resampling Distribution",
    title = "95% Confidence Intervals"
  )
  

samp_ci_df <- samp_n10 %>% 
  bind_rows(samp_n30) %>% 
  bind_rows(samp_n300) %>% 
  mutate(
    `Sample Size` = factor(paste("N =",size))
    ) %>% 
  mutate(
    Coverage = case_when(
      pop_mean > ll & pop_mean < ul ~ "red",
      T ~ "black"
    )
  )

fig_ci2 <- samp_ci_df %>% 
  filter(sim %in% 1:100) %>% 
  filter(size == 10) %>% 
  ggplot(aes(y = sample_mean, x= sim))+
  geom_pointrange(aes(ymin = ll, ymax =ul, col=Coverage))+
  geom_hline(yintercept = mu_age, linetype = "dashed")+
  coord_flip()+
  theme_minimal()+
  guides(col = "none")+
  facet_wrap(~`Sample Size`)

fig_ci3 <- samp_ci_df %>% 
  filter(sim %in% 1:100) %>% 
  ggplot(aes(y = sample_mean, x= sim))+
  geom_pointrange(aes(ymin = ll, ymax =ul, col=Coverage))+
  geom_hline(yintercept = mu_age, linetype = "dashed")+
  coord_flip()+
  theme_minimal()+
  guides(col = "none")+
  facet_wrap(~`Sample Size`)

Interpreting confidence intervals

Confidence intervals give a range of values that are likely to include the true value of the parameter $θ$ with probability $(1 - α) \times 100 %$
- $α = 0.05$ corresponds to a “95-percent confidence interval”
Our “confidence” is about the interval
In repeated sampling, we expect that $(1 - α) \times 100 %$ of the intervals we construct would contain the truth.
For any one interval, the truth, $θ$ , either falls within in the lower and upper bounds of the interval or it does not.

How do we do hypothesis testing?

Posit a hypothesis (e.g. $β = 0$ )
Calculate the test statistic (e.g. $(\hat{β} - β) / s e_{β}$ )
Derive the distribution of the test statistic under the null via simulation or asymptotic theory
Compare the test statistic to the distribution under the null
Calculate p-value (Two Sided vs One sided tests)
Reject or fail to reject/retain our hypothesis based on some threshold of statistical significance (e.g. p < 0.05)

Outcomes of hypothesis tests

Two conclusions from of a hypothesis test: we can reject or fail to reject a hypothesis test.
We never “accept” a hypothesis, since there are, in theory, an infinite number of other hypotheses we could have tested.

Our decision can produce four outcomes and two types of error:

	Reject $H_{0}$	Fail to Reject $H_{0}$
$H_{0}$ is true	False Positive	Correct!
$H_{0}$ is false	Correct!	False Negative

Type 1 Errors: False Positive Rate (p < 0.05)
Type 2 Errors: False negative rate (1 - Power of test)

Quantifying uncertainty in regression

How do income and education shape political participation?

Let’s fit the following model

$y = β_{0} + β_{1} income + β_{2} education + ϵ$

m1 <- lm_robust(dv_participation ~   education + income, df)

And unpack the output

tidy(m1) %>% 
  mutate_if(is.numeric, \(x) round(x, 3)) -> m1_sum
m1_sum

         term estimate std.error statistic p.value conf.low conf.high   df
1 (Intercept)    0.312     0.080     3.910   0.000    0.155     0.468 1684
2   education    0.167     0.024     6.891   0.000    0.119     0.214 1684
3      income    0.007     0.010     0.671   0.502   -0.014     0.028 1684
           outcome
1 dv_participation
2 dv_participation
3 dv_participation

htmlreg(m1,include.ci=F)

Statistical models
	Model 1
(Intercept)	0.31^***
	(0.08)
education	0.17^***
	(0.02)
income	0.01
	(0.01)
R²	0.04
Adj. R²	0.04
Num. obs.	1687
RMSE	1.29
^*p < 0.001; ^p < 0.01; ^*p < 0.05

htmlreg(m1,include.ci=T)

Statistical models
	Model 1
(Intercept)	0.31^*
	[ 0.16; 0.47]
education	0.17^*
	[ 0.12; 0.21]
income	0.01
	[-0.01; 0.03]
R²	0.04
Adj. R²	0.04
Num. obs.	1687
RMSE	1.29
^* 0 outside the confidence interval.

m1_coefplot <- m1_sum %>% 
  ggplot(aes(term, estimate))+
  geom_pointrange(aes(ymin = conf.low, ymax =conf.high))+
  geom_hline(yintercept = 0, linetype = "dashed")+
  coord_flip()+
  labs(
    y = "Estimate",
    x = "",
    title = "Coefficient plot"
  )+
  theme_minimal()

The estimate column are the regression coefficients, $β$

Recall, lm_robust() calculates these:

$\hat{β} = (X^{'} X)^{- 1} X^{'} y$

Tip

$β$ s describe substantive relationships between predictors (income, education) and the outcome (political participation)

coef(m1)

(Intercept)      income   education 
0.311609712 0.007034253 0.166755964

X <- model.matrix(m1,data=df)
y <- model.frame(m1)$dv_participation
betas <- solve(t(X)%*%X)%*%t(X)%*%y
betas

                   [,1]
(Intercept) 0.311609712
income      0.007034253
education   0.166755964

A unit increases in education is associated with about 0.16 more acts of political participation, while a unit increase in income is associated with 0.007 more acts of participation.

Note that both income and education are measured with ordinal scales

get_value_labels(df$educ)

    No HS credential High school graduate         Some college 
                   1                    2                    3 
       2-year degree        4-year degree            Post-grad 
                   4                    5                    6

Such that it might be unreasonable to assume cardinality (going from a 1 to 2 is the same as going from a 3 to 4)

Consider treating as factor / recoding variable

Standard errors & confidence intervals

The default standard errors from lm_robust() are calculated as follows

$S E_{β} = (X^{'} X)^{- 1} X^{'} diag [\frac{e_{i}^{2}}{1 - h_{i i}}] X (X^{'} X)^{- 1}$

Which we could also obtain via bootstrapping.

The confidence intervals are calculated as follows:

$C I = β \pm 1.96 \times S E_{β}$

# 0 Set seed
set.seed(123)

# 1,000 bootstrap samples
boot <- modelr::bootstrap(df %>% select(dv_participation, income, education), 1000)
# Estimate Boostrapped Models
m1_bs <- purrr::map(boot$strap, ~ lm_robust(dv_participation ~  income + education, data = .))

# Tidy coefficients
m1_bs_df <- map_df(m1_bs, tidy, .id = "id")
m1_asymp_df <- tidy(m1) %>% 
  mutate(
    term = factor(term)
  ) %>% 
  select(term,estimate, std.error,conf.low, conf.high) %>% 
  mutate(
    ll = conf.low,
    ul = conf.high,
    y = 1.1,
    type = "Analytic"
  )

m1_bs_ci_df <- m1_bs_df %>%
  mutate(
    term = factor(term)
  ) %>% 
  group_by(term) %>% 
  summarise(
  beta = mean(estimate,na.rm=T),
  se = sd(estimate,na.rm=T)
  ) %>% 
  mutate(
  ll = beta - 1.96*se,
  ul = beta + 1.96*se,
  y = 1.05,
  type = "Bootstrap"
) 

# Compare SEs

compare_m1_se_tab <-
  tibble(
    `Predictor` = m1_bs_ci_df$term,
    Estimate = m1_asymp_df$estimate,
    `SE` = m1_asymp_df$std.error,
     `CI` = paste("[", round(m1_asymp_df$ll,2),
                  "; ", round(m1_asymp_df$ul,2),"]",
                  sep =""),
    `SE ` = m1_bs_ci_df$se,
    `CI ` = paste("[", round(m1_bs_ci_df$ll,2),
                  "; ", round(m1_bs_ci_df$ul,2),"]",
                  sep =""),
  )


# Figure
fig_m1_bs <- m1_bs_df %>% 
  ggplot(aes(estimate))+
  geom_density(aes(y=after_stat(ndensity)))+
  geom_rug()+
  geom_vline(xintercept = 0, linetype = "dashed")+
  facet_wrap(~term,scales = "free")+
  theme_minimal()+
  ylim(0, 1.2)+
  geom_vline(
    data = m1_asymp_df,
    aes(xintercept = estimate)
  ) +
  geom_segment(
    data = m1_bs_ci_df,
    aes(x = ll, xend = ul,
        y = y, yend = y,
        col = "Bootstrap")
    
  ) +
  geom_segment(
    data = m1_asymp_df,
    aes(x = ll, xend = ul,
        y = y, yend = y,
        col = "Analytic")
    
  ) +
  labs(
    col = "Confidence Interval",
    x = "Bootstrapped Sampling Distribution\n of Coefficients"
  )

		Analytic		Bootstrap
Predictor	Estimate	SE	CI	SE	CI
(Intercept)	0.3116	0.0797	[0.16; 0.47]	0.0805	[0.15; 0.47]
education	0.1668	0.0242	[0.12; 0.21]	0.0248	[0.12; 0.22]
income	0.0070	0.0105	[-0.01; 0.03]	0.0107	[-0.01; 0.03]

Test statistics and p-values

The test statistic (“t-stat”) reported by lm() and lm_robust() is our observed coefficient, $\hat{β}$ minus our hypothesized value $β$ (e.g. 0), divided by the standard error of $\hat{β}$ .

$t = \frac{\hat{β} - β}{{\hat{S E}}_{\hat{β}}} \sim Students’s t with n - k degrees of freedom$ Which follows a $t$ distribution – like a Normal with “heavier tails” (e.g. more probability assigned to extreme values)

# Calculate t-stats

t_stat_df <- tibble(
  x= seq(-3,3,length.out = 20),
  p = dt(x,df=m1$df[1] )
)


m1_tstat_educ <- t_stat_df %>% 
  ggplot(aes(x=x,y=p))+
  stat_function(
    fun= dt, 
    args = list(df = m1$df[1]),
    geom = "line",
    xlim = c(
      min(c(-3, abs(m1$statistic[2])*-1 -1)),
      max(c(3, abs(m1$statistic[2])+1))
      )
  )+
  stat_function(
    fun= dt, 
    args = list(df = m1$df[1]),
    geom = "area",
    fill = "blue",
    alpha = .5,
    xlim = c(m1$statistic[2],4)
  )+
  stat_function(
    fun= dt, 
    args = list(df = m1$df[1]),
    geom = "area",
    fill = "blue",
    alpha = .5,
    xlim = c(-4, abs(m1$statistic[2])*-1)
  )+
  geom_vline(xintercept = m1$statistic[2],
             col = "blue",
             linetype = "dashed")+
   geom_vline(xintercept = m1$statistic[2]*-1,
             col = "blue",
             linetype = "dashed")+
  theme_minimal()+
  labs(
    title = "Education",
    subtitle = paste("t-stat = ",round(m1$statistic[2],3),
    "\nPr(>|t|) = ",
    format(round(m1$p.value[2],3),nsmall = 3),
    sep = ""
    ),
    x = "Distribution of t-stat under the Null"
  )

m1_tstat_income <- t_stat_df %>% 
  ggplot(aes(x=x,y=p))+
  stat_function(
    fun= dt, 
    args = list(df = m1$df[1]),
    geom = "line",
    xlim = c(
      min(c(-3, abs(m1$statistic[3])*-1 -1)),
      max(c(3, abs(m1$statistic[3])+1))
      )
  )+
  stat_function(
    fun= dt, 
    args = list(df = m1$df[1]),
    geom = "area",
    fill = "blue",
    alpha = .5,
    xlim = c(m1$statistic[3],4)
  )+
  stat_function(
    fun= dt, 
    args = list(df = m1$df[1]),
    geom = "area",
    fill = "blue",
    alpha = .5,
    xlim = c(-4, abs(m1$statistic[3])*-1)
  )+
  geom_vline(xintercept = m1$statistic[3],
             col = "blue",
             linetype = "dashed")+
   geom_vline(xintercept = m1$statistic[3]*-1,
             col = "blue",
             linetype = "dashed")+
  theme_minimal()+
  labs(
    title = "Income",
    subtitle = paste("t-stat = ",round(m1$statistic[3],3),
    "\nPr(>|t|) = ",
    format(round(m1$p.value[3],3),nsmall = 3),
    sep = ""
    ),
    x = "Distribution of t-stat under the Null"
  )

fig_pvalue <- m1_tstat_educ + m1_tstat_income

# Compare Pvalues

compare_m1_pvalue <-
  tibble(
    `Predictor` = m1_bs_ci_df$term,
    Estimate = m1_asymp_df$estimate,
    SE = m1_sum$std.error,
    `t-stat` = m1_sum$statistic,
     `Pr(>abs(t))` = format(round(m1_sum$p.value,3), nsmall=3)
  )

Predictor	Estimate	SE	t-stat	Pr(>abs(t))
(Intercept)	0.312	0.080	3.910	0.000
education	0.167	0.024	6.891	0.000
income	0.007	0.010	0.671	0.502

[1] 4

Predicted values

Let’s explore whether income and education condition each other’s relationship with participation using the following interaction model

$y = β_{0} + β_{1} educ + β_{2} inc + β_{3} educ \times inc + ϵ$

To help our interpretations we’ll produce plots of predicted values of participation, at varying levels of income and education.

# Fit model
m2 <- lm_robust(dv_participation ~ education*income, df)


# Regression Table
m2_tab <- htmlreg(
  m2, 
  include.ci = F,
  digits = 3,
  stars = c(0.05, 0.10)
                    )

# Predicted values

# Data frame of values we want to make predictions at
pred_df <-expand_grid(
  income = sort(unique(df$income)),
  education = quantile(df$education, na.rm = T)[c(2,4)]
)

# Combine model predictions
pred_df <- cbind(pred_df, predict(m2, pred_df,
                                  interval = "confidence")$fit)

# Plot predicted values
fig_m2_pred <- pred_df %>% 
  mutate(
    Education = ifelse(education == 2, "High school","College")
  ) %>% 
  ggplot(aes(income, fit, group=Education))+
  geom_ribbon(aes(ymin = lwr, ymax = upr,
                  fill = Education),
              alpha=.5)+
  geom_line()+
  theme_minimal()+
  labs(y = "Predicted Participation",
       x = "Income",
       title = "")

Statistical models
	Model 1
(Intercept)	0.060
	(0.151)
education	0.242^**
	(0.050)
income	0.048^**
	(0.024)
education:income	-0.011^*
	(0.006)
R²	0.042
Adj. R²	0.040
Num. obs.	1687
RMSE	1.286
^*p < 0.05; ^p < 0.1

Lab 8: Robust SEs and FEs

Last week we explored the effects of:
- Cluster robust standard errors
- Fixed effects for state and year
Today, we examine the interaction models at the heart of the Grumbach and Hill’s claims

# Load data

load(url("https://pols1600.paultesta.org/files/data/cps_clean.rda"))

# Recode data


presidential_elections <- seq(1980, 2016, by = 4)
 
cps %>% 
  mutate(
    age_group = fct_relevel(age_group, "65+"),
    SDR = ifelse(sdr == 1, "SDR","non-SDR"),
    election_type = ifelse(year %in% presidential_elections, "General","Midterm"),
  ) -> cps

# ---- m1: Simple OLS regression ----
m1 <- lm_robust(dv_voted ~ sdr, 
                data = cps,
                se_type = "classical",
                try_cholesky = T)

# ---- m2: Simple OLS with robust standard errors ----
m2 <- lm_robust(dv_voted ~ sdr, 
                data = cps,
                se_type = "stata",
                try_cholesky = T)

# ---- m3: Two-way Fixed Effects for State and Year ----
m3 <- lm_robust(dv_voted ~ sdr,
                data = cps,
                fixed_effects = ~ st + year,
                se_type = "stata",
                try_cholesky = T)

# ---- m4: TWFE for State and Year and cluster robust SEs ----

m4 <- lm_robust(dv_voted ~ sdr,
                data = cps,
                fixed_effects = ~ st + year,
                se_type = "stata",
                clusters = st,
                try_cholesky = T)

Statistical models
	Model 1	Model 2	Model 3	Model 4
(Intercept)	0.54787^***	0.54787^***
	(0.00037)	(0.00038)
sdr	0.06159^***	0.06159^***	0.00671^***	0.00671
	(0.00110)	(0.00108)	(0.00185)	(0.01405)
R²	0.00157	0.00157	0.02845	0.02845
Adj. R²	0.00157	0.00157	0.02841	0.02841
Num. obs.	1988501	1988501	1988501	1988501
RMSE	0.49658	0.49658	0.48986	0.48986
N Clusters				49
^*p < 0.001; ^p < 0.01; ^*p < 0.05

Fixed effects drastically reduce the overall size of the effect of SDR
Robust standard errors change our estimates of uncertainty.
Adjusting for:
- Simple non-constant error variance $\to$ no difference
- Cluster $\to$ non-significant result
Generally prefer conservative models (e.g. m4)

SDR and Age

Grumbach and Hill, however, are interested in a different question, namely, whether the effects of same day registration vary by age.

Formally, we we can describe these models using an interaction model:

$y_{i s t} = β_{0} + \overset{FE State}{\overset{⏞}{α_{s}}} + \underset{FE Year}{\underset{⏟}{γ_{t}}} + \overset{ME of SDR for 65+}{\overset{⏞}{β_{1} s d r_{s t}}} + \underset{Δ in ME of SDR for Age}{\underset{⏟}{\sum_{k = 18 - 14}^{k = 55 - 64} β_{k} s d r_{s t} \times a g e_{i s t}}} + \overset{Controls}{\overset{⏞}{X β}} + ϵ_{i s t}$ Where the marginal effect of SDR varies based on the age of the respondent.

Marginal Effects: Interactions

Now consider a multiple regression with an interaction between two variables $x$ and $z$

$y = β_{0} + β_{1} x + β_{2} z + β_{3} x * z$

Taking the partial derivative of y with respect to x $(\frac{\partial y}{\partial x})$ , there are now two terms with $x$ in them:

$\begin{aligned} y & = β_{0} + β_{1} x + β_{2} z + β_{3} x * z \\ \frac{\partial y}{\partial x} & = 0 + β_{1} * (1) + 0 + β_{3} * (1) * z \\ \frac{\partial y}{\partial x} & = β_{1} + β_{3} z \end{aligned}$

And the marginal effect of $x$ now depends on value of $z$ at which we evaluate the model.

Mariginal Effects: Grumbach and Hill

In G&H’s models, the ME of SDR varies with age.

Since 65+ is the excluded/reference category:

$Marginal Effect of SDR for 65+ = β_{1}$

For 18-24 year olds the quantity of interest is the sum of:

$Marginal Effect of SDR for 18-24 = β_{1} + β_{s d r \times 18 - 24}$

The SEs of the Marginal effects is function of the variances and covariances of both $β_{1}$ and $β_{s d r \times a g e}$

Lab 8: Robust SEs and FEs

Let’s estimate the interaction models used by G&H and recreate figure 3 calculating the correct standard errrors for the interactions

m1gh <- lm_robust(dv_voted ~ sdr*age_group, 
                  data = cps,
                  fixed_effects = ~ st + year,
                  se_type = "stata",
                  clusters = st,
                  try_cholesky = T
                  )


m2gh <- lm_robust(dv_voted ~ sdr*age_group +
                    factor(race) + is_female +
                    income + education, 
                  data = cps,
                  fixed_effects = ~ st + year,
                  se_type = "stata",
                  clusters = st,
                  try_cholesky = T
                  )


# ---- Function to calculate marginal effect and SE of interactions ----

me_fn <- function(mod, cohort, ci=0.95){
  # Confidence Level for CI
  alpha <- 1-ci
  z <- qnorm(1-alpha/2)
  
  # Age (Always one for indicator of specific cohort)
  age <- 1
  
  # Variance Covariance Matrix from Model
  cov <- vcov(mod)
  
  # coefficient for SDR (Marginal Effect for reference category: 65+)
  b1 <- coef(mod)["sdr"]
  
  # If age is one of the interactions
  if(cohort %in% c("18-24","25-34","35-44","45-54","55-64")){
    # get the name of the specific interaction
    the_int <- paste("sdr:age_group",cohort,sep="")
    # the coefficient on the interaction
    b2 <- coef(mod)[the_int]
    # Calculate marginal effect for age cohort
    me <- b1 + b2*age
    me_se <- sqrt(cov["sdr","sdr"] + age^2*cov[the_int,the_int] + 2*age*cov["sdr",the_int])
    ll <- me - z*me_se
    ul <- me + z*me_se
  }
  if(!cohort %in% c("18-24","25-34","35-44","45-54","55-64")){
    me <- b1 
    me_se <- mod$std.error["sdr"]
    ll <- mod$conf.low["sdr"]
    ul <- mod$conf.high["sdr"]
  }

  # scale results to be percentage points
  res <- tibble(
    Age = cohort,
    Effect = me*100,
    SE = me_se*100,
    ll = ll*100,
    ul = ul*100
  )
  return(res)


}


## List of age cohorts
the_age_groups <- levels(cps$age_group)

## Model 1: No controls
## Estimate Marginal effect for each age cohort
the_age_groups %>% 
  purrr::map_df(~me_fn(m1gh, cohort=.)) %>% 
  # Add labels for plotting
  mutate(
    Age = factor(Age),
    Model = "No controls"
  ) -> fig3_no_controls

## Model 3: Controls for Education, Income, Race,and Sex 
## Estimate Marginal effect for each age cohort
the_age_groups %>% 
  purrr::map_df(~me_fn(m2gh, cohort=.)) %>% 
  # Add labels for plotting
  mutate(
    Age = factor(Age),
    Model = "With controls"
  ) -> fig3_controls
  
## Combine estimates into data frame for plotting
fig3_df <- fig3_no_controls %>% bind_rows(fig3_controls)

## Recreate Figure 3


fig3_df %>% 
  ggplot(aes(Age, Effect))+
  geom_point()+
  geom_linerange(aes(ymin = ll, ymax =ul))+
  geom_hline(yintercept = 0, linetype = "dashed")+
  facet_wrap(~Model)+
  theme_minimal() -> fig3

Statistical models
	Model 1	Model 2
sdr	0.0120	0.0004
	(0.0162)	(0.0128)
age_group18-24	-0.3518^***	-0.4125^***
	(0.0061)	(0.0051)
age_group25-34	-0.2149^***	-0.3198^***
	(0.0060)	(0.0040)
age_group35-44	-0.1026^***	-0.2179^***
	(0.0063)	(0.0038)
age_group45-54	-0.0431^***	-0.1450^***
	(0.0059)	(0.0038)
age_group55-64	0.0064	-0.0642^***
	(0.0047)	(0.0036)
sdr:age_group18-24	0.0142	0.0472^***
	(0.0108)	(0.0081)
sdr:age_group25-34	0.0003	0.0219^**
	(0.0137)	(0.0076)
sdr:age_group35-44	-0.0142	0.0074
	(0.0162)	(0.0087)
sdr:age_group45-54	-0.0147	0.0001
	(0.0120)	(0.0068)
sdr:age_group55-64	-0.0149	-0.0108
	(0.0096)	(0.0062)
factor(race)200		0.0518^***
		(0.0081)
factor(race)300		-0.0548^***
		(0.0118)
factor(race)650		-0.1619^***
		(0.0305)
factor(race)651		-0.1784^***
		(0.0090)
factor(race)652		-0.1277^***
		(0.0188)
factor(race)700		-0.0916^***
		(0.0236)
factor(race)801		0.0130
		(0.0110)
factor(race)802		-0.0233^*
		(0.0097)
factor(race)803		-0.0406
		(0.0277)
factor(race)804		-0.0566
		(0.0403)
factor(race)805		0.0548
		(0.0291)
factor(race)806		-0.0432
		(0.0520)
factor(race)807		-0.0486
		(0.0574)
factor(race)808		-0.0593
		(0.0473)
factor(race)809		-0.1125^***
		(0.0271)
factor(race)810		0.0169
		(0.0340)
factor(race)811		-0.0932
		(0.1396)
factor(race)812		-0.0474
		(0.0891)
factor(race)813		-0.0445^***
		(0.0076)
factor(race)814		-0.0316
		(0.1223)
factor(race)815		-0.1124
		(0.0601)
factor(race)816		0.2548
		(0.1910)
factor(race)817		0.1245
		(0.1979)
factor(race)818		0.8195^***
		(0.0074)
factor(race)819		-0.0831
		(0.1869)
factor(race)820		0.0325
		(0.0592)
factor(race)830		-0.1871^**
		(0.0667)
is_female		0.0240^***
		(0.0017)
income		0.0093^***
		(0.0002)
education		0.0895^***
		(0.0016)
R²	0.0864	0.1707
Adj. R²	0.0864	0.1706
Num. obs.	1980510	1616508
RMSE	0.4748	0.4515
N Clusters	49	49
^*p < 0.001; ^p < 0.01; ^*p < 0.05

POLS 1600 Quantifying uncertainty: Confidence Intervals & Hypothesis Tests Updated Apr 22, 2025

POLS 1600
Overview
Class Plan
Announcements
Setup: Packages for today
Goals:
Sampling distributions and standard errors
Populations and samples
Estimators, estimates, and statistics
Distrubtions and standard errors
Sampling distributions
Standard errors
How do we calculate a standard error from a single sample?
Calculating standard errors
Confidence intervals
Confidence intervals
Calculating a confidence...
Interpreting confidence intervals
Hypothesis testing
What is a hypothesis test
What is a test statistic?
What is a null hypothesis?
What is a p-value?
How do we do hypothesis testing?
Outcomes of hypothesis tests
Quantifying uncertainty in regression
Quantifying uncertainty in regression
Estimates
Standard errors &...
Test statistics and p-values
Predicted values
Review: Lab 8
Lab 8: Robust SEs and FEs
SDR and Age
Marginal Effects: General
Marginal Effects: Interactions
Marginal Effects: SEs for Interactions
Mariginal Effects: Grumbach and Hill
Lab 8: Robust SEs and FEs
References