This .Rmd file replicates all analyses from Incentives can spur Covid-19 vaccination uptake and saves tex tables and pdf figures as outputs. Data used can be downloaded here.

All code and input files are available at https://github.com/wzb-ipi/covid_hesitancy_2021

1 Housekeeping

knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, comment=NA)
options(qwraps2_markup = "markdown")

Packages: The next code chunk uses pacman to install and load all packages.

if (!require(pacman)) install.packages("pacman")

pacman::p_load(
  rgdal,       # geo data
  geojsonsf,   # read json
  rgeos,       # geo data
  sf,          # geo data
  gridExtra,   # plot arrange
  gtrendsR,    # google trends
  aod,         # hypothesis testing (1.3.1)
  car,         # linear hypothesis testing for causal tree (3.0-2)
  corrplot,    # Correlations (0.84)
  DeclareDesign, 
  dplyr,       # Data manipulation (0.8.0.1)
  evtree,      # evolutionary learning of globally optimal trees (1.0-7)
  fastDummies,
  fBasics,     # Summary statistics (3042.89)
  ggplot2,
  ggpubr,
  grf,         # Generalized random forests (0.10.2)
  haven,       # load sav
  interplot,   # Interactions plotting
  kableExtra,  # Prettier RMarkdown (1.0.1)
  knitr,
  labelled,
  psych,       # Correlation p-values (1.8.12)
  purrr,
  rpart,       # Classification and regression trees, or CART (4.1-13)
  reshape2,
  rpart.plot,  # Plotting trees (3.0.6)
  readr,       # Reading csv files (1.3.1)
  sjlabelled,
  stats,
  summarytools,
  texreg,
  tidyverse,
  tidyselect,
  tidyr,       # Database operations (0.8.3)
  treeClust,   # Predicting leaf position for causal trees (1.1-7)
  tibble)      # Modern alternative to data frames (2.1.1)

# Number of trees for causal forests
N_trees = 1000

# Set seed for reproducibility
set.seed(201911)

Paths: The next code chunk provides paths for pulling data and saving figures and tables.

# PNAS figures
fig_1_AB_path <- "output/fig_1_AB.pdf"
fig_2_ABC_path <- "output/fig_2_ABC.pdf"

# Interim figures
fig_1_path <- "output/figure_1.pdf"
fig_2_path <- "output/figure_2.pdf"
fig_3_path <- "output/figure_3.pdf"
fig_4_path <- "output/figure_4.pdf"
google_plot_path <- "output/figure_5.pdf"
corr_matrix_plot_path <- "output/figure_6.pdf"
correlates_plot_path  <- "output/figure_7.pdf"
undecided_cf_path     <- "output/figure_8.pdf"
blps_plot_path <- "output/figure_9.pdf"
lm_plot_path <- "output/figure_10.pdf"

tab_1_path <- "output/table_1.tex"
tab_2_path <- "output/table_2.tex"
tab_3_path <- "output/table_3.tex"

# variable names and labels
var_list <- read.csv("input/vars.csv") %>% arrange(order)

covariate_names <- var_list$new_name[var_list$covariate==1]
  
# Labels for treatments
treatments <- treatment_levels <- c("financialA","financialB", "negative", "transaction")
treatment_levels_diff <- c("financialA_diff","financialB_diff", "negative_diff", "transaction_diff")
treatment_labels <- c("Financial 25", "Financial 50", "Freedoms", "Local Doctors")

statuses <- c("Acceptant", "Refusals", "Undecided")


# Training fraction
train_fraction <- 1  # for causal forests: currently all data used

2 Prepare Data

2.1 Import data

Import data, get labels for treatment, rename variables, generate differences.

Raw data has treatments recorded in a single vector with 12 values. These are now transformed into the underlying factors.

# get labels
Z_labels <- read.csv("input/Z_labels.csv") %>% dplyr::select(-X)
kable(Z_labels, booktabs = TRUE)

Z_1_1	negative_t1	financialA_t1	financialB_t1	transaction_t1
1	0	0	0	0
2	0	0	0	1
3	0	1	0	0
4	0	0	1	0
5	1	0	0	0
6	1	0	0	1
7	1	1	0	1
8	1	0	1	1
9	0	1	0	1
10	0	0	1	1
11	1	1	0	0
12	1	0	1	0

df <- read.csv("input/wave_1.csv") %>%
  rowid_to_column("ID")  %>%
  mutate(
    Z_1_1 = as.numeric(paste(c_0031)),
    outcome_t1 = v_74 / 10,
    Z_1_2 = as.numeric(paste(c_0032)),
    outcome_t2 = v_77 / 10
  ) %>%
  left_join(Z_labels) %>%
  left_join(
    Z_labels %>%
      rename(
        Z_1_2 = Z_1_1,
        negative_t2 = negative_t1,
        transaction_t2 = transaction_t1,
        financialA_t2 = financialA_t1,
        financialB_t2 = financialB_t1
      )
  ) %>%

  # rename covariates
  rename_at(
    vars(var_list$var_name[var_list$rename == 1]), 
    ~ var_list$new_name[var_list$rename == 1]) %>%
  
  # transformations
  mutate(
    outcome_diff     = outcome_t2 - outcome_t1,
    negative_diff    = negative_t2 - negative_t1,
    financialA_diff  = financialA_t2 - financialA_t1,
    financialB_diff  = financialB_t2 - financialB_t1,
    transaction_diff = transaction_t2 - transaction_t1,
    days = (date - min(date) +1)/21, # Normalized to 0-1
    status = ifelse(
      is.na(vaccination.intent),
      "Vaccinated",
      paste(vaccination.intent)
    ),
    status = dplyr::recode(
      status,
      "1" = "Acceptant",
      "2" = "Refusing",
      "3" = "Undecided"
    ),
    eduyears = (eduyears - min(eduyears, na.rm = TRUE))/max(eduyears, na.rm = TRUE),
    household.larger = (household.size > 2)*1,
    covid.media = 1-(covid.media-1)/4, 
    vaccinated = 1*(vaccination == 1),
    network.vaccinated = (network.vaccinated-1)/4,
    covid.infection = 1*(covid.infection == 1),
    covid.infection.proximity = (covid.infection.proximity-1)/2,
    covid.information = 1- (covid.information-1)/4,
    covid.rules.mask = 1- (covid.rules.mask-1)/5,
    covid.rules.distance = 1- (covid.rules.distance-1)/5,
    support.distance = 1-(support.distance-1)/4, 
    covid.income = 1-(covid.income-1)/4, 
    voted = 1*(voter.turnout ==1),
    age2 = (age - min(age))/(max(age) - min(age)),
    male = 1*(gender == 2), 
    political.interest = 1 - (political.interest-1)/3,
    left.right = (left.right-1)/10,
    CDU.CSU = 1*(party.id <= 2),
    CDU.CSU = ifelse(is.na(CDU.CSU), 0, CDU.CSU),
    SPD = 1*(party.id ==3),
    SPD = ifelse(is.na(SPD), 0, SPD),
    AfD = 1*(party.id ==4),
    AfD = ifelse(is.na(AfD), 0, AfD),
    Greens = 1*(party.id ==5),
    Greens = ifelse(is.na(Greens), 0, Greens),
    FDP = 1*(party.id ==6),
    FDP = ifelse(is.na(FDP), 0, FDP),
    Left = 1*(party.id ==7),
    Left = ifelse(is.na(Left), 0, Left),
    No.party = 1*(party.id == 9),
    No.party = ifelse(is.na(No.party), 0, No.party),
    solidarity = solidarity/10,
    international.solidarity = international.solidarity/10,
    EU.support = EU.support/10,
    migration.support = migration.support/10,
    Employed = 1*(Employment.Status==1),
    citizenship = (citizenship==3)*1,
    Country.of.birth  = 1*(Country.of.birth==2),
    risk = risk/10,
    trust = trust/10,
    trust.government = trust.government/4,
    trust.experts = trust.experts/4,
    trust.country = trust.country/4,
    trust.media =  trust.media/4,
    trust.healthcare = trust.healthcare/4,
    acceptant = 1*(vaccination.intent == 1),
    acceptant = ifelse(is.na(vaccination.intent), 1, acceptant),
    refusing = vaccination.intent == 2,
    refusing = ifelse(is.na(vaccination.intent), 0, refusing),
    undecided = 1*(vaccination.intent == 3),
    undecided = ifelse(is.na(vaccination.intent), 0, undecided),
    health = (health - 1)/6,
    health2 = 1*(health2 == 1),
    covid.surveys = ifelse(covid.surveys == 99, NA, covid.surveys),
    east.west =ifelse(federal.state== "4" |#"Brandenburg"
                                  federal.state== "8" | # Mecklenburg-Vorpommern
                                  federal.state== "13"| # Sachsen
                                  federal.state== "14"| # Sachsen-Anhalt
                                  federal.state== "16"  # Thüringen
                                  , 1, 0)
  )

2.2 Stacked dataframe

The wide dataframe is used for first differences analysis. In addition we make use of a stacked data frame for fixed effects analysis.

# Stack
df_stacked <- 
  bind_rows(mutate(df, t = 1), mutate(df, t = 2)) %>%
  mutate(
    outcome     = ifelse(t == 1, outcome_t1, outcome_t2),
    negative    = ifelse(t == 1, negative_t1, negative_t2),
    financialA  = ifelse(t == 1, financialA_t1, financialA_t2),
    financialB  = ifelse(t == 1, financialB_t1, financialB_t2),
    transaction = ifelse(t == 1, transaction_t1, transaction_t2),
    )

2.3 Summary statistics

The next blocks calculate key summary statistics.

# covariate_names[!(covariate_names %in% names(df))]

small_df <- 
  df %>% dplyr::select(all_of(covariate_names)) %>% 
  # select_if(is.numeric) %>% 
  drop_na()

# Make a data.frame containing summary statistics of interest

summ_stats <-
  small_df %>% 
  fBasics::basicStats() %>%
  t() %>%
  as.data.frame() %>%
  dplyr::select("Mean", "Stdev", "Minimum", "1. Quartile", "Median",  "3. Quartile", "Maximum") %>% 
  rename('Lower quartile'= '1. Quartile', 'Upper quartile' ='3. Quartile') 

# Add in labels
summ_stats <- summ_stats %>% 
  mutate(Variable = factor(rownames(summ_stats), var_list$new_name, var_list$label)) %>% relocate(Variable)

Variable	Mean	Stdev	Minimum	Lower quartile	Median	Upper quartile	Maximum
Days since survey start	0.57	0.19	0.05	0.38	0.57	0.71	1
Male respondent	0.50	0.50	0.00	0.00	0.00	1.00	1
Age (percentile)	0.49	0.27	0.00	0.26	0.51	0.72	1
Non German Citizen	0.03	0.16	0.00	0.00	0.00	0.00	1
Born outside of Germany	0.06	0.24	0.00	0.00	0.00	0.00	1
Migration background of parents	0.23	0.42	0.00	0.00	0.00	0.00	1
In former East Germany	0.15	0.36	0.00	0.00	0.00	0.00	1
Household size 3 or more	0.33	0.47	0.00	0.00	0.00	1.00	1
Employed	0.44	0.50	0.00	0.00	0.00	1.00	1
Years of Education	0.64	0.24	0.00	0.50	0.50	0.80	1
Previously had Covid	0.03	0.17	0.00	0.00	0.00	0.00	1
Members of network infected	0.40	0.36	0.00	0.00	0.50	0.50	1
Good general health	0.65	0.23	0.00	0.50	0.67	0.83	1
Covid pre-existing conditions	0.41	0.49	0.00	0.00	0.00	1.00	1
Share of network vaccinated	0.25	0.24	0.00	0.00	0.25	0.50	1
Respondent has been vaccinated	0.06	0.25	0.00	0.00	0.00	0.00	1
Refusals	0.16	0.37	0.00	0.00	0.00	0.00	1
Undecided	0.17	0.38	0.00	0.00	0.00	0.00	1
Acceptant	0.67	0.47	0.00	0.00	1.00	1.00	1
Seeks Covid information	0.73	0.32	0.00	0.50	0.75	1.00	1
Believes media exaggerates risk	0.42	0.32	0.00	0.25	0.50	0.75	1
Compliance: mask rules	0.90	0.16	0.00	0.80	1.00	1.00	1
Compliance: distance rule	0.82	0.19	0.00	0.80	0.80	1.00	1
Support distancing even for vaccinated	0.71	0.30	0.00	0.50	0.75	1.00	1
Income loss due to Covid (.5 = no change)	0.59	0.19	0.00	0.50	0.50	0.75	1
Political interest	0.65	0.29	0.00	0.33	0.67	1.00	1
Voted last elections	0.83	0.38	0.00	1.00	1.00	1.00	1
Right leaning	0.44	0.18	0.00	0.30	0.50	0.50	1
AfD	0.07	0.26	0.00	0.00	0.00	0.00	1
Conservatives	0.20	0.40	0.00	0.00	0.00	0.00	1
Liberals	0.05	0.23	0.00	0.00	0.00	0.00	1
Green Party	0.16	0.37	0.00	0.00	0.00	0.00	1
Left Party	0.08	0.26	0.00	0.00	0.00	0.00	1
Social Democrats	0.14	0.34	0.00	0.00	0.00	0.00	1
No party identification	0.27	0.44	0.00	0.00	0.00	1.00	1
Willingness to take risks	0.41	0.24	0.00	0.20	0.40	0.60	1
General Trust	0.41	0.25	0.00	0.20	0.40	0.60	1
Trust state government	0.44	0.29	0.00	0.25	0.50	0.75	1
Trust experts	0.64	0.26	0.00	0.50	0.75	0.75	1
Trust federal government	0.45	0.27	0.00	0.25	0.50	0.75	1
Trust media	0.38	0.25	0.00	0.25	0.50	0.50	1
Trust healthcare	0.57	0.26	0.00	0.50	0.50	0.75	1
Values social solidarity	0.58	0.23	0.00	0.50	0.50	0.70	1
Values international solidarity	0.58	0.31	0.00	0.40	0.50	0.80	1
EU support	0.52	0.29	0.00	0.30	0.50	0.70	1
Migration support	0.48	0.27	0.00	0.30	0.50	0.70	1
Distance to Vaccination Center	0.13	0.10	0.00	0.06	0.11	0.18	1

Pairwise correlations.

png 
  2

Around 65 % in the sample would get vaccinated, 17 % would not, and 18 % remain undecided.

fig_1 <- df %>% ggplot(aes(status)) + 
  geom_hline(yintercept=0.7, linetype="longdash", lwd=0.35, colour = "#B55555") +
  geom_bar(aes(y = (..count..)/sum(..count..)),width = 0.6,alpha = 0.5) + 
  scale_y_continuous(labels=scales::percent) +
  theme_bw(base_size=16)+
  theme(axis.title.y = element_blank())+
  ylab("Percent (%)")+
  coord_flip()
fig_1

pdf(fig_1_path, height = 3, width = 6)
fig_1
dev.off()

png 
  2

Concerns of the hesitent and undecided

fig_2 <- 
  df %>% dplyr::select(ID, starts_with("fear.")) %>%
  mutate_if(is.character, as.numeric) %>% 
  melt(variable.name = "Group", id.vars = "ID") %>%  
  mutate(Group = 
           factor(Group,
                  c("fear.longterm", "fear.admission", "fear.effectiveness", "fear.side.effects", "fear.no.danger"),
                  c("Longterm Consequences", "Doubt approval process", "Doubt effectiveness", "Side Effects", "Covid not dangerous")))%>% 
  group_by(Group) %>%
  summarize(mean = 100*mean(value, na.rm = TRUE))  %>%
  ggplot() +
  geom_bar(aes(Group, mean), stat = "identity", width = 0.6,alpha = 0.5)+
  theme_bw(base_size=18)+
  ylab("Percent (%)")+
  theme(axis.title.y = element_blank())+
  coord_flip()
 fig_2

pdf(fig_2_path, height = 4, width = 8)
fig_2
dev.off()

png 
  2

2.4 Correlates of Hesitancy

family_order <- var_list %>% group_by(family) %>% slice(1) %>% arrange(Family_order) %>% pull(family)

correlates <- 
list("acceptant", "refusing", "undecided") %>% 
lapply(function(y)
  lapply(setdiff(covariate_names, tolower(statuses)), function(x)
lm_robust(as.formula(paste(y, "~", x)), data = df) %>% tidy %>% dplyr::mutate(y=y, x=x)) %>% bind_rows) %>% bind_rows %>% dplyr::filter(term!="(Intercept)" & !(x %in% c("acceptant", "refusing", "undecided"))) %>%
  dplyr::mutate(x = factor(x, rev(var_list$new_name), rev(var_list$label))) %>%
  left_join(var_list %>% dplyr::rename(term = new_name) %>% dplyr::select(term, family)) %>%
  dplyr::mutate(family = factor(family, family_order))


fig_cov <- 
  correlates %>% 
dplyr::mutate(y = factor(y, c("acceptant", "undecided", "refusing" ))) %>%
dplyr::filter(x!="federal.state") %>%
ggplot(aes(estimate    , x)) + geom_point() + facet_grid(family ~ y, scales = "free_y") + ylab("") + xlab("How much more or less likely to be [acceptant / undecided / refusing] at baseline")  + 
  geom_vline(xintercept = 0, color = "red") +
  theme_bw() +
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = .25)

fig_cov

pdf(correlates_plot_path, height =9, width = 8)
fig_cov
dev.off()

png 
  2

3 Main Results

3.1 Background

We conduct analysis of a 2x2x3 factorial survey experiment, with factors assigned with independent probabilities. Each respondent receives two vignettes successively. Our population of interest consists of all German citizens over the age of 18. For this study, we rely on a representative sample of 20,000 citizens across Germany. The online-survey was rolled out between March 12 and March 14, 2021.

Factor \(Z\)	Level \(L\)
Negative incentives	Control: There are no special regulations for vaccinated people even when the Corona incidence is high. For example, they cannot travel again, visit cinemas, restaurants or concerts and are still subject to contact restrictions.
	Treatment: Special regulations apply to vaccinated people. For example, even when the Corona incidence is high, they can travel again, visit cinemas, restaurants or concerts and are not subject to any contact restrictions
Reduction Transaction Cost	Control: Eligible citizens can get vaccinated against Corona at the nearest vaccination center, but not from their family doctor.
	Treatment: Eligible citizens can have themselves vaccinated against Corona at the nearest vaccination center or their family doctor.
Financial renumeration	Control: Citizens who are vaccinated will not receive any allowance after receiving the vaccination.
	Treatment: Citizens who get vaccinated receive an expense allowance of {Level: 25} euros after receiving the vaccination.
	Treatment: Citizens who get vaccinated receive an expense allowance of {Level: 50} euros after receiving the vaccination.

3.2 Check strategy

We demonstrate that first differences and fixed effects effects analyses produce essentially identical results.

# Analysis

lm_robust(
  outcome_diff ~ negative_diff + financialA_diff + financialB_diff + transaction_diff,
  data=df, se_type = "stata") %>%  
  tidy %>%
  kable(caption = "Basic first differences analysis", digits = 2, booktabs = TRUE)

Basic first differences analysis
term	estimate	statistic	conf.low	conf.high	df	outcome
(Intercept)	0.00	-3.43	-0.01	0.00	20495	outcome_diff
negative_diff	0.03	15.52	0.02	0.03	20495	outcome_diff
financialA_diff	0.01	5.40	0.01	0.01	20495	outcome_diff
financialB_diff	0.02	11.30	0.02	0.03	20495	outcome_diff
transaction_diff	0.03	17.99	0.03	0.03	20495	outcome_diff

lm_robust(
  outcome ~ negative + financialA + financialB + transaction,
  data=df_stacked, se_type = "stata", fixed_effects =  ~ID) %>%  
  tidy %>%
  kable(caption = "Basic Fixed effects analysis", digits = 3, booktabs = TRUE)

Basic Fixed effects analysis
term	estimate	std.error	statistic	conf.low	conf.high	df	outcome
negative	0.025	0.002	15.500	0.022	0.028	20496	outcome
financialA	0.010	0.002	5.441	0.006	0.013	20496	outcome
financialB	0.022	0.002	11.257	0.018	0.026	20496	outcome
transaction	0.030	0.002	18.113	0.027	0.033	20496	outcome

# The analysis function takes a dataframe, demeans within the (differenced) Xs 
our_analysis <- function(dff)
  lm_robust(
  outcome_diff ~ negative*financialA*transaction + negative*financialB*transaction,
  data = dff %>%
    mutate(negative = negative_diff - mean(negative_diff),
           financialA = financialA_diff- mean(financialA_diff),
           financialB = financialB_diff - mean(financialB_diff),
           transaction = transaction_diff - mean(transaction_diff)), 
  se_type = "stata")  
  

our_analysis(df) %>% tidy %>%
  kable(caption = "Basic Fixed effects analysis with interactions", digits = 3)

Basic Fixed effects analysis with interactions
term	estimate	std.error	statistic	p.value	conf.low	conf.high	df	outcome
(Intercept)	-0.004	0.001	-3.752	0.000	-0.006	-0.002	20488	outcome_diff
negative	0.025	0.002	15.214	0.000	0.022	0.028	20488	outcome_diff
financialA	0.010	0.002	5.343	0.000	0.006	0.013	20488	outcome_diff
transaction	0.030	0.002	18.078	0.000	0.026	0.033	20488	outcome_diff
financialB	0.022	0.002	11.236	0.000	0.018	0.026	20488	outcome_diff
negative:financialA	0.006	0.003	2.092	0.036	0.000	0.012	20488	outcome_diff
negative:transaction	0.002	0.003	0.734	0.463	-0.003	0.007	20488	outcome_diff
financialA:transaction	-0.001	0.003	-0.385	0.700	-0.006	0.004	20488	outcome_diff
negative:financialB	0.006	0.003	1.820	0.069	0.000	0.012	20488	outcome_diff
transaction:financialB	-0.002	0.003	-0.595	0.552	-0.007	0.004	20488	outcome_diff
negative:financialA:transaction	-0.003	0.004	-0.609	0.542	-0.011	0.006	20488	outcome_diff
negative:transaction:financialB	-0.006	0.005	-1.294	0.196	-0.015	0.003	20488	outcome_diff

3.3 Main analysis

statuses <- unique(df$status)

model_list <- lapply(statuses, function(j) our_analysis(df %>% dplyr::filter(status == j)))
names(model_list) <- statuses
model_list$All <- our_analysis(df)

by_status <- 
  lapply(model_list, function(model) tidy(model)) %>% 
  bind_rows(.id = "status") %>%
  mutate(status = factor(status, c("All", statuses)))

# Table version

fileConn <- file(tab_2_path)
writeLines(texreg(model_list, float.pos = "h!", include.ci = FALSE, caption = "Main results, with interactions. Individual fixed effects modelled by regressing differences in responses across two vignetters on differences in treatments and interactions. All treatments are centered on zero for each group.",
                  custom.coef.map =  list("(Intercept)" = "Constant (No incentives)",
                                          "financialA" = "25 Euro incentive",
                                          "financialB" = "50 Euro incentive",
                                          "negative" = "Freedoms",
                                          "transaction" = "Local doctors",
                                          
"negative:financialA" = "Freedoms * 25 Euros",
"negative:financialB" = "Freedoms * 50 Euros",
"negative:transaction" = "Freedoms * Local doctors",
"financialA:transaction" = "Local doctors * 25 Euros",
"transaction:financialB" = "Local doctors * 50 Euros",
"negative:financialA:transaction" = "Freedoms * Local doctors * 25 Euros",
"negative:transaction:financialB" ="Freedoms * Local doctors * 50 Euros"), digits = 3), fileConn)
close(fileConn)


# Figure Version
figure_1A <-
  
  by_status %>% 
  dplyr::filter(term %in% c("negative", "financialA", "financialB", "transaction")) %>% 
  dplyr::filter(status != "Vaccinated") %>%
  mutate(Treatment = factor(term, treatment_levels, treatment_labels)) %>%
  
  ggplot(aes(Treatment, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Estimated effect on prob vaccination (0-1)") + theme(legend.position="bottom")

figure_1A

3.4 Additional vaccinated subgroup

# Figure Version
figure_subgroups <-
  
  by_status %>% 
  dplyr::filter(term %in% c("negative", "financialA", "financialB", "transaction")) %>% 
  #dplyr::filter(status != "Vaccinated") %>%
  mutate(Treatment = factor(term, treatment_levels, treatment_labels)) %>%
  
  ggplot(aes(Treatment, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Change in probability of willingness to get vaccinated (0-1)")

figure_subgroups

3.5 By round

# analysis by period
df_stacked2 <- bind_rows(
  df_stacked %>% dplyr::filter(status != "Vaccinated"),
  df_stacked %>% dplyr::filter(status != "Vaccinated") %>% mutate(status = "All"))

out <-   lapply(1:2, function(period)
              df_stacked2 %>% dplyr::filter(t==period) %>%
                split(.$status) %>%
                map(~lm_robust(outcome ~ negative + financialA + financialB + transaction, data = .)) %>%
                map(tidy) %>%
  bind_rows(.id = "status") %>% mutate(t=period)) %>%
  bind_rows() %>%
  dplyr::filter(term %in% treatments) %>% 
  dplyr::filter(status != "Vaccinated") %>%
  mutate(t = factor(t)) %>%
  mutate(status = factor(status, c("All", "Acceptant", "Refusing", "Undecided"))) %>%
  mutate(term = factor(term, treatments, treatment_labels))

out %>%
  ggplot(aes(term, estimate, color = t, shape = t)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1) +
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+ facet_wrap(~status, ncol = 2) +
  ylab("Effect on vaccination probability (0-1)")

# Analysis first round
  first <-
  df %>%
  split(.$status) %>%
  map(~lm_robust(outcome_t1 ~ negative_t1 + financialA_t1 + financialB_t1 + transaction_t1, data = .)) %>%
  map(tidy) %>%
  bind_rows(.id = "status")%>% 
  dplyr::filter(term %in% c("negative_t1", "financialA_t1", "financialB_t1", "transaction_t1")) %>% 
  dplyr::filter(status != "Vaccinated") 

  first<-first %>%
  ggplot(aes(term, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Change in probability of willingness to get vaccinated (0-1)")+
  ggtitle("First Vignette") 

# Analysis second round
  second<-df %>%
  split(.$status) %>%
  map(~lm_robust(outcome_t2~ negative_t2 + financialA_t2 + financialB_t2 + transaction_t2, data = .)) %>%
  map(tidy) %>%
  bind_rows(.id = "status")%>% 
  dplyr::filter(term %in% c("negative_t2", "financialA_t2", "financialB_t2", "transaction_t2")) %>% 
  dplyr::filter(status != "Vaccinated") %>%
  ggplot(aes(term, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Change in probability of willingness to get vaccinated (0-1)")+
  ggtitle("Second Vignette") 
  
# Analysis first round
  ggarrange(first, second)

3.6 Consistency bias

# effect of lag:
# Note that lag treatment is negatively correlated with current treatment

lag_out <-  
  bind_rows(
    df %>% dplyr::filter(status != "Vaccinated"),
    df %>% dplyr::filter(status != "Vaccinated") %>% mutate(status = "All"))  %>% 
                split(.$status) %>%
                map(~lm_robust(outcome_t2 ~ negative_t1 + financialA_t1 + financialB_t1 + transaction_t1, data = .)) %>%
                map(tidy) %>%
  bind_rows(.id = "status") %>%
  dplyr::filter(term %in% paste0(treatments, "_t1")) %>% 
  dplyr::filter(status != "Vaccinated") %>%
  mutate(status = factor(status, c("All", "Acceptant", "Refusing", "Undecided"))) %>%
  mutate(term = factor(term, paste0(treatments, "_t1"), treatment_labels))

lag_out %>%
  ggplot(aes(term, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1) +
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Effect of t1 treatment on t2 vaccination probability (0-1)")

c(
  cor(df$financialA_t1, df$financialA_t2),
  cor(df$financialB_t1, df$financialB_t2),
  cor(df$negative_t1, df$negative_t2),
  cor(df$transaction_t1, df$transaction_t2))

[1] -0.12590611 -0.07113368 -0.13421823 -0.06858775

Accounting for current:

lm_robust(outcome_t2 ~ 
            negative_t1 + financialA_t1 + financialB_t1 + transaction_t1 +
                                 financialA_t2*transaction_t2*negative_t2+
                                 financialB_t2*transaction_t2*negative_t2, 
          data = df %>% dplyr::filter(status != "Vaccinated")) %>% tidy %>%
  kable(digits = 2)

term	estimate	std.error	statistic	p.value	conf.low	conf.high	df	outcome
(Intercept)	0.66	0.01	60.95	0.00	0.64	0.68	19140	outcome_t2
negative_t1	0.01	0.01	1.64	0.10	0.00	0.02	19140	outcome_t2
financialA_t1	-0.01	0.01	-1.46	0.14	-0.02	0.00	19140	outcome_t2
financialB_t1	-0.01	0.01	-0.95	0.34	-0.02	0.01	19140	outcome_t2
transaction_t1	0.01	0.01	1.80	0.07	0.00	0.02	19140	outcome_t2
financialA_t2	0.00	0.01	0.22	0.83	-0.02	0.03	19140	outcome_t2
transaction_t2	0.05	0.01	3.75	0.00	0.02	0.07	19140	outcome_t2
negative_t2	0.02	0.01	1.53	0.13	-0.01	0.04	19140	outcome_t2
financialB_t2	0.02	0.01	1.92	0.06	0.00	0.05	19140	outcome_t2
financialA_t2:transaction_t2	-0.02	0.02	-0.96	0.34	-0.05	0.02	19140	outcome_t2
financialA_t2:negative_t2	0.01	0.02	0.42	0.67	-0.03	0.04	19140	outcome_t2
transaction_t2:negative_t2	0.00	0.02	0.21	0.83	-0.03	0.04	19140	outcome_t2
transaction_t2:financialB_t2	-0.01	0.02	-0.40	0.69	-0.04	0.03	19140	outcome_t2
negative_t2:financialB_t2	0.00	0.02	0.04	0.97	-0.03	0.04	19140	outcome_t2
financialA_t2:transaction_t2:negative_t2	0.00	0.03	-0.03	0.98	-0.05	0.05	19140	outcome_t2
transaction_t2:negative_t2:financialB_t2	-0.02	0.03	-0.87	0.38	-0.07	0.03	19140	outcome_t2

lag_out_2 <-  
  bind_rows(
    df %>% dplyr::filter(status != "Vaccinated"),
    df %>% dplyr::filter(status != "Vaccinated") %>% mutate(status = "All"))  %>% 
                split(.$status) %>%
                map(~lm_robust(outcome_t2 ~ negative_t1 + financialA_t1 + financialB_t1 + transaction_t1 +
                                 financialA_t2*transaction_t2*negative_t2+
                                 financialB_t2*transaction_t2*negative_t2, data = .)) %>%
                map(tidy) %>%
  bind_rows(.id = "status") %>%
  dplyr::filter(term %in% paste0(treatments, "_t1")) %>% 
  dplyr::filter(status != "Vaccinated") %>%
  mutate(status = factor(status, c("All", "Acceptant", "Refusing", "Undecided"))) %>%
  mutate(term = factor(term, paste0(treatments, "_t1"), treatment_labels))

lag_out_2 %>%
  ggplot(aes(term, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1) +
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Effect of t1 treatment on t2 vaccination probability (0-1)")  + ylim(c(NA, .075))

Check for change in levels. Did average acceptance go up between measurements?

lm_robust(outcome_diff ~ 1, data = df) %>% tidy %>% kable(digits = 2)

term	estimate	std.error	statistic	p.value	conf.low	conf.high	df	outcome
(Intercept)	0	0	-3.61	0	-0.01	0	20499	outcome_diff

3.7 Omnibus treatment

The all or nothing comparison uses a small share of data, but shows evidence of a 14 point effect for the undecided.

Note that no fixed effects used here since we generally have only one observation per person.

data_omnibus <- lapply(statuses, function(s)
  df_stacked %>%
      mutate(
        omnibus = negative * financialB * transaction,
        zip = (1-negative) * (1-financialA) * (1-financialB) * (1-transaction)) %>%
      dplyr::filter((omnibus==1) | (zip==1)) %>%
      dplyr::filter(status == s)) 

names(data_omnibus) <- statuses 

lapply(data_omnibus, function(d)
table(table(d$ID))) %>% bind_cols %>% mutate(freq = 1:2) %>% relocate(freq) %>%
  kable(caption = "Most subjects appear only once", booktabs = TRUE)

Most subjects appear only once
freq	Acceptant	Refusing	Vaccinated	Undecided
1	3772	983	422	1105
2	154	49	21	43

omnibus <- lapply(data_omnibus, function(d) lm_robust(outcome ~ omnibus, data = d, cluster = ID, se_type = "stata")) 
  
  omnibus[[4]] %>% tidy %>%
  kable(digits = 2, caption = "All or nothing comparisons", booktabs = TRUE)

All or nothing comparisons
term	estimate	std.error	statistic	p.value	conf.low	conf.high	df	outcome
(Intercept)	0.40	0.01	43.83	0	0.38	0.41	1147	outcome
omnibus	0.13	0.01	10.23	0	0.11	0.16	1147	outcome

fileConn<-file(tab_3_path)
writeLines(texreg(omnibus, float.pos = "h!", include.ci = FALSE, caption = "\\label{omnibus} Effects of receiving all treatments compared to all control conditions", custom.coef.map =  list("(Intercept)" = "Constant (No incentives)","omnibus"
= "Maximal incentives")), fileConn)
close(fileConn)

3.8 Aggregate effects

policy_experiments <- list(
  experiment_1 =
    df_stacked %>%
      mutate(
        Z = negative * financialB * transaction,
        zip = (1-negative) * (1-financialA) * (1-financialB) * (1-transaction)) %>%
      dplyr::filter((Z==1) | (zip==1)) %>% 
  group_by(Z) %>% 
  mutate(N = n()) %>% ungroup() %>% 
  group_by(status, Z) %>% 
  summarize(n = n()/mean(N), share_pop_vaccinated = n()*mean(outcome)/mean(N)),
  
  experiment_2 = 
    df_stacked %>%
      dplyr::filter(financialA==0 & negative ==1 & transaction ==1) %>%
  group_by(financialB) %>% 
  mutate(N = n()) %>% ungroup() %>% 
  group_by(status, financialB) %>% 
  summarize(n = n()/mean(N), share_pop_vaccinated = n()*mean(outcome)/mean(N)) %>% arrange(financialB) %>% mutate(Z = financialB)) %>% bind_rows(.id = "experiment") %>%
  

  mutate(
  Z = 
  (experiment == "experiment_1") *  Z +
  (experiment == "experiment_2") *  (Z + .5),
  Z = factor(Z, c(0, .5, 1), c("No incentives", "Local doctors + Freedoms", "All incentives")))  %>% 
  dplyr::filter(!is.na(Z))

# set colors to be consistent
group.colors <- c(Acceptant = "#7CAE00", Refusing = "#00BFC4", Undecided ="#C77CFF", Vaccinated = "#F8766D")

figure_1B <- policy_experiments %>%
  ggplot(aes(fill=status, y=share_pop_vaccinated, x=Z)) + 
  geom_bar(position="stack", stat="identity",width = 0.5,alpha = 0.7) + 
  scale_fill_manual(values=group.colors)+
  ylab("Share of population vaccinated") + 
  theme_bw(base_size=14)+
  ylim(0:1) +
  xlab("Incentives") + theme(legend.position="bottom")

figure_1B

policy_experiments %>% group_by(Z) %>% arrange(status) %>% mutate(cumul = cumsum(share_pop_vaccinated)) %>% slice(n())

# A tibble: 3 x 7
# Groups:   Z [3]
  experiment   status     Z                 n share_pop_vaccin~ financialB cumul
  <chr>        <chr>      <fct>         <dbl>             <dbl>      <int> <dbl>
1 experiment_1 Vaccinated No incentiv~ 0.0674            0.0582         NA 0.674
2 experiment_2 Vaccinated Local docto~ 0.0678            0.0606          0 0.738
3 experiment_1 Vaccinated All incenti~ 0.0687            0.0636         NA 0.750

3.9 Export Fig 1

Fig 1 in the paper has two panels. Exported here.

fig_1_AB <- ggarrange(
  figure_1A + theme(plot.margin=unit(c(1,1,1.5,1.2),"cm")), 
  figure_1B + theme(plot.margin=unit(c(1,1,1.5,1.2),"cm")),
                    labels = c("A", "B"),
                    ncol = 2, nrow = 1)

pdf(fig_1_AB_path, width = 12.5, height = 6)
fig_1_AB
dev.off()

png 
  2

fig_1_AB

3.10 Notes

Share of hesitant, undecided, that are AFD supports or support no party

df %>% 
  group_by(status) %>% 
  summarize(AfD = mean(AfD), 
            no_party = mean(No.party), 
            trust_experts = mean(trust.experts), 
            media_exaggerates = mean(covid.media), 
            fear_longterm = mean(fear.longterm, na.rm = TRUE)) %>%
  kable(digits = 2, booktabs = TRUE)

status	AfD	no_party	trust_experts	media_exaggerates	fear_longterm
Acceptant	0.03	0.21	0.71	0.33	NaN
Refusing	0.20	0.37	0.42	0.72	0.64
Undecided	0.08	0.39	0.56	0.51	0.68
Vaccinated	0.04	0.24	0.68	0.38	NaN

4 Heterogeneity

For heterogeneity analysis we draw on the Causal Forests approach described in Athey, Tibshrani and Wager (Annals of Statistics, 2019) , Nie and Wager, 2017

4.1 Helpers

A function to prep heterogeneous effects data for a given treatment, removing others.

num_tiles <- 4  # ntiles = CATE is above / below the median

# Note this is set up so that we can swap in the difference outcome easily

het_df <- function(treatment_name = "transaction_diff", 
                   outcome_name = "outcome_diff",
                   data = df) {
  data$W = data[treatment_name][[1]]
  data$Y = data[outcome_name][[1]]
  data %>% 
    dplyr::select(Y, W, ID, all_of(covariate_names)) %>%
    drop_na() %>%
    mutate_if(is.factor, as.numeric)  %>%
    # mutate(federal.state =  as.numeric(factor(federal.state))) %>%
    # Trick to render W binary
    dplyr::mutate(
      Y = ifelse(W<0, -Y, Y),
      W = W^2)  
}

A function to fit the forest and save outputs: input dataframe should have a privileged Y and W.

# heterogeneous treatment effects 

f_cf <- function(df, n_trees = 1000) {
  
  # rule of thumb: num.trees = number of individuals
  
  df_forests <- sample_frac(df, replace=FALSE, size=train_fraction)
  X <- as.matrix(df_forests[, covariate_names])
  
  cf <-
    causal_forest(
    X = X,
    Y = df_forests$Y,
    W = df_forests$W,
    num.trees=n_trees) 

  ### Predict point estimates and standard errors (training set, out-of-bag)
  oob_pred      <- predict(cf, estimate.variance=TRUE)
  oob_tauhat_cf <- oob_pred$predictions
  oob_tauhat_cf_se <- sqrt(oob_pred$variance.estimates)
  
  var_imp        <- c(variable_importance(cf)) 
  names(var_imp) <- covariate_names
  var_imp <- var_imp %>% sort(decreasing=TRUE)
  
  df_forests$cate  <- oob_tauhat_cf
  df_forests$ntile <- factor(ntile(oob_tauhat_cf, n=num_tiles))

# Standard model estimates by quantile
estimated_sample_ate <- 
  lm_robust(Y ~ ntile + ntile:W, data=df_forests) %>% 
  tidy() %>% 
  dplyr::filter(stringr::str_detect(term, ":W"))

# AIPW estimates by quantile
estimated_aipw_ate <- 
  lapply(
  seq(num_tiles), function(w) 
    average_treatment_effect(cf, subset = df_forests$ntile == w)
  ) %>% bind_rows

combined_estimates <- 
  bind_rows(
    estimated_sample_ate %>% mutate(type = "lm_robust") %>% dplyr::select(-outcome, -df, -statistic, - p.value),
    estimated_aipw_ate %>% rename(std.error = std.err) %>% 
      mutate(
        type  = "aipw",
        term = estimated_sample_ate$term,
        conf.low = estimate - 1.96*std.error,
        conf.high = estimate + 1.96*std.error)
  )
# Outputs
list(cf = cf,
     df_forests = df_forests, 
     X = X,
     oob_tauhat_cf = oob_tauhat_cf, 
     var_imp = var_imp, 
     ntile_estimates = combined_estimates)
}

A function to generate fitted values.

fitted_vals <- function(var_of_interest, model = test){
  
  df_forests <- model$df_forests
  cf <- model$cf
  
      is_continuous <- (length(unique(df_forests[var_of_interest][[1]])) > 5) # crude rule for determining continuity
    if(is_continuous) {
      x_grid <- quantile(df_forests[var_of_interest][[1]], probs = seq(0, 1, length.out = 5))
    } else {
      x_grid <- sort(unique(df_forests[var_of_interest][[1]]))
    }
    
  df_grid <-  setNames(data.frame(x_grid), var_of_interest)
  
  other_covariates <- covariate_names[!covariate_names %in% var_of_interest]
  df_median <- df_forests %>% dplyr::select(all_of(other_covariates)) %>% summarise_all(median) 
  df_eval <- crossing(df_median, df_grid)
  
  pred <- predict(cf, newdata=df_eval[,covariate_names], estimate.variance=TRUE)
df_eval$tauhat <- pred$predictions
df_eval$se <- sqrt(pred$variance.estimates)

# Change to factor so the plotted values are evenly spaced (e.g. logicals)
df_eval %>% arrange(var_of_interest) %>%
  mutate(var_of_interest = as.factor(as.numeric(df_eval[var_of_interest][[1]])))
}

4.2 Causal Forests

4.2.1 Implementation

cf_all <- 
  lapply(treatment_levels_diff, function(x) het_df(x, outcome_name = "outcome_diff") %>% f_cf(n_trees=N_trees))
names(cf_all) <- treatment_levels_diff

cf_undecided <- 
  lapply(treatment_levels_diff, function(x) 
  het_df(x, outcome_name = "outcome_diff", dplyr::filter(df, undecided == 1)) %>% 
    f_cf(n_trees=N_trees))
names(cf_undecided) <- treatment_levels_diff

cf_hesitant<- 
  lapply(treatment_levels_diff, function(x) 
  het_df(x, outcome_name = "outcome_diff", dplyr::filter(df, refusing == 1)) %>% 
    f_cf(n_trees=N_trees))
names(cf_hesitant) <- treatment_levels_diff

4.2.2 Most common predictors

lapply(cf_all, function(j) names(j$var_imp[1:6])) %>% bind_rows(.id = "treatment") %>% kable(caption = "Strongest predictions", booktabs = TRUE)

Strongest predictions
financialA_diff	financialB_diff	negative_diff	transaction_diff
distance_km	age2	age2	distance_km
risk	solidarity	support.distance	age2
age2	days	undecided	solidarity
trust	distance_km	covid.media	EU.support
days	trust	days	undecided
left.right	undecided	distance_km	health

lapply(cf_undecided, function(j) names(j$var_imp[1:6])) %>% bind_rows(.id = "treatment") %>% kable(caption = "Most common predictors, undecided ", booktabs = TRUE)

Most common predictors, undecided
financialA_diff	financialB_diff	negative_diff	transaction_diff
risk	age2	age2	age2
trust	days	support.distance	solidarity
distance_km	distance_km	covid.infection.proximity	distance_km
covid.income	trust	days	EU.support
age2	risk	health	international.solidarity
days	migration.support	distance_km	eduyears

lapply(cf_hesitant, function(j) names(j$var_imp[1:6])) %>% bind_rows(.id = "treatment") %>% kable(caption = "Most common predictors, refusing ", booktabs = TRUE)

Most common predictors, refusing
financialA_diff	financialB_diff	negative_diff	transaction_diff
distance_km	distance_km	days	days
age2	age2	distance_km	distance_km
left.right	solidarity	age2	age2
migration.support	days	trust.country	migration.support
trust.media	risk	left.right	solidarity
risk	EU.support	EU.support	covid.income

what_matters <- 
  list(All = cf_all, Undecided = cf_undecided, Hesitant = cf_hesitant) %>%
  lapply(function(res) 
    lapply(res, function(j) j$var_imp %>% t %>% data.frame) %>%
      bind_rows %>% mutate(treatment = names(cf_all)) %>%
      gather(covariate, "value", - treatment)) %>% bind_rows(.id = "group")
  
what_matters_plot <- 
  what_matters  %>% 
  mutate(
    covariate = factor(covariate,   rev(var_list$new_name), rev(var_list$label)),
    treatment = factor(treatment, treatment_levels_diff, treatment_labels)) %>% 
    ggplot(aes(value, covariate, color = treatment)) + 
    geom_point()+
    scale_x_continuous(name="Variable Importance")+
    theme_bw() + facet_wrap(~group) + ylab(" ")

what_matters_plot

pdf(fig_3_path, width = 9, height = 9)
  what_matters_plot
dev.off()

png 
  2

4.2.3 Variation across ntiles

# plot
lapply(cf_all, function(j)  j$ntile_estimates) %>% 
  bind_rows(.id = "treatment") %>%
  ggplot() +
  geom_pointrange(
    aes(x = term, y = estimate, ymax = conf.high, ymin = conf.low, color = type), 
    size = 0.5,
    position = position_dodge(width = .5)) +
  geom_errorbar(aes(x = term, ymax = conf.high, ymin = conf.low, color = type), 
                width = 0.4,
                size = 0.75,
                position = position_dodge(width = .5)) +
  theme_bw() +
  labs(x = "N-tile", y = "ATE Estimate", title = "ATE within N-tiles") +
  facet_wrap(~treatment)

4.2.4 Graph heterogeneous effects

vars <- c("days", "age2", "undecided", "solidarity", "support.distance","distance_km")

all_marginals <- 
  lapply(vars, function(x) 
         lapply(cf_all , function(model)
           fitted_vals(x, model = model) %>%
             mutate(var = x)
           ) %>% bind_rows(.id = "treatment")) %>%
  bind_rows %>%
  mutate(var = factor(var, vars, c("Days", "Age (Percentile)", "Undecided", "Solidarity", "Support distancing","Distance to Vaccination Center")),
         treatment = 
           factor(treatment,treatment_levels_diff, treatment_labels),
var_of_interest = 
  factor(as.numeric(paste(var_of_interest)),
sort(unique(as.numeric(paste(var_of_interest))))))

figure_4 <-
  
all_marginals %>%
  mutate(ymin_val = tauhat-1.96*se) %>%
  mutate(ymax_val = tauhat+1.96*se) %>%
  ggplot()  + 
  geom_line(aes_string(x="var_of_interest", y="tauhat", group = 1), color="red") +
  geom_errorbar(aes_string(x="var_of_interest",ymin="ymin_val", ymax="ymax_val", width=.2),color="blue") +
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, colour = "#B55555") +
  ylab("Predicted Treatment Effect") +
  theme_bw() +
  theme(axis.ticks = element_blank()) +
  facet_grid(treatment ~ var, scales = "free_x") +
  xlab("quantiles")

figure_4

pdf(fig_4_path, width = 8, height = 6) 
figure_4
dev.off()

png 
  2

4.2.5 Best linear projections

Using causal forests functions:

blps <-
lapply(
  list(all = cf_all, undecided = cf_undecided, refusing = cf_hesitant), 
  function(set) lapply(set,  function(model)
       lapply(1:length(covariate_names), function(i)
           best_linear_projection(model$cf, model$X[,i]) %>% 
             tidy  %>% 
         dplyr::filter(term!="(Intercept)") %>% 
         dplyr::mutate(var = covariate_names[i])) %>%
        bind_rows()) %>% 
    bind_rows(.id = "treatment")) %>% bind_rows(.id = "set") %>%
  dplyr::mutate(treatment = factor(treatment, treatment_levels_diff, treatment_labels)) 


# importance
top <- 3
most_common <- c(lapply(cf_all, function(j) names(j$var_imp[1:top])) %>% bind_rows(.id = "treatment") %>% unlist(),
lapply(cf_undecided, function(j) names(j$var_imp[1:top])) %>% bind_rows(.id = "treatment") %>% unlist(),
lapply(cf_hesitant, function(j) names(j$var_imp[1:top])) %>% bind_rows(.id = "treatment") %>% unlist()) %>% unique()

# add in promised measures from PAP

most_common <- unique(c(most_common, "AfD", "risk", "trust"))
blps_plot <- 
  blps %>% dplyr::filter(var %in% most_common) %>%
  mutate(var = factor(var,   rev(var_list$new_name), rev(var_list$label))) %>%
    ggplot(aes(estimate, var, color = set)) + 
    geom_point(position=position_dodge(width = .5)) +
    geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96*std.error), width = .25,  position=position_dodge(width = .5)) +
  facet_grid(~treatment) +
  geom_vline(xintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw()  +
  ylab("")



blps_plot

pdf(blps_plot_path, height = 6, width = 9)
blps_plot
dev.off()

png 
  2

Using lm_robust:

lm_interactions <-
lapply(
  list(all = df, undecided = dplyr::filter(df, undecided==1), refusing = dplyr::filter(df, refusing==1)), 
  function(dff) lapply(treatment_levels_diff,  function(X)
       lapply(1:length(covariate_names), function(i)
           lm_robust(as.formula(paste("outcome_diff~", X, "*", covariate_names[i])), data = dff) %>% 
             tidy  %>% slice(4) %>%
         mutate(var = covariate_names[i], treatment = X)) %>%
        bind_rows()) %>% 
    bind_rows()) %>% bind_rows(.id = "set") %>%
  mutate(treatment = factor(treatment, treatment_levels_diff, treatment_labels)) 

lm_plot <- 
  lm_interactions %>% dplyr::filter(var %in% most_common) %>%
  dplyr::filter(!(var == "undecided" & set == "undecided")) %>%
  dplyr::filter(!(var == "refusing" & set == "refusing")) %>%
  mutate(var = factor(var,   rev(var_list$new_name), rev(var_list$label))) %>%
    ggplot(aes(estimate, var, color = set)) + 
    geom_point(position=position_dodge(width = .5)) +
    geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = .25,  position=position_dodge(width = .5)) +
  facet_grid(~treatment) +
  geom_vline(xintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw()  +
  ylab("")

lm_plot

pdf(lm_plot_path, height = 6, width = 9)
lm_plot
dev.off()

png 
  2

4.2.6 Marginal effects for undecided

v2 <- c("age2",  "solidarity", "support.distance", "trust", "trust.media")

v2labs <- c("Age (Percentile)",  "Solidarity", "Support distancing","General trust", "Trust media")


figure_8  <- 
  
  lapply(v2, function(x) 
         lapply(cf_undecided , function(model)
           fitted_vals(x, model = model) %>%
             mutate(var = x)
           ) %>% bind_rows(.id = "treatment")) %>%
  bind_rows %>%
  mutate(var = factor(var, v2, v2labs),
         treatment = factor(treatment, 
                            paste0(treatment_levels_diff), 
                            treatment_labels),
var_of_interest = 
  factor(as.numeric(paste(var_of_interest)),
sort(unique(as.numeric(paste(var_of_interest)))))) %>%
  mutate(ymin_val = tauhat-1.96*se) %>%
  mutate(ymax_val = tauhat+1.96*se) %>%
  ggplot()  + 
  geom_line(aes_string(x="var_of_interest", y="tauhat", group = 1), color="red") +
  geom_errorbar(aes_string(x="var_of_interest",ymin="ymin_val", ymax="ymax_val", width=.2),color="blue") +
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, colour = "#B55555") +
  ylab("Predicted Treatment Effect") +
  theme_bw() +
  theme(axis.ticks = element_blank()) +
  facet_grid(treatment ~ var, scales = "free_x") +
  xlab("quantiles")


figure_8

pdf(undecided_cf_path, height = 8, width = 8)
figure_8
dev.off()

png 
  2

4.3 Interaction plots

4.3.1 Predictions by age

M <- lm_robust(outcome_diff~age2*negative_diff*transaction_diff, data = df)

# Different for old 
linearHypothesis(M, "negative_diff + age2:negative_diff = transaction_diff + age2:transaction_diff")

Linear hypothesis test

Hypothesis:
negative_diff - transaction_diff  + age2:negative_diff - age2:transaction_diff = 0

Model 1: restricted model
Model 2: outcome_diff ~ age2 * negative_diff * transaction_diff

  Res.Df Df  Chisq Pr(>Chisq)    
1  20493                         
2  20492  1 84.622  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Different for young 
linearHypothesis(M, "negative_diff  = transaction_diff ")

Linear hypothesis test

Hypothesis:
negative_diff - transaction_diff = 0

Model 1: restricted model
Model 2: outcome_diff ~ age2 * negative_diff * transaction_diff

  Res.Df Df  Chisq Pr(>Chisq)    
1  20493                         
2  20492  1 60.115  8.945e-15 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

M0 <- lm(outcome_diff~age2*negative_diff*transaction_diff*financialB_diff, data = df)
M1 <- lm(outcome_diff~age*negative_diff*transaction_diff*financialB_diff, data = df)
M2 <- lm(outcome_diff~age*negative_diff*transaction_diff*financialB_diff, data = dplyr::filter(df, status == "Undecided"))

dff <- list(
  Freedoms = interplot(m = M2, var1 = "negative_diff", var2 = "age", plot = FALSE),
  'Local Doctors' = interplot(m = M2, var1 = "transaction_diff", var2 = "age", plot = FALSE),
    'Financial 50' = interplot(m = M2, var1 = "financialB_diff", var2 = "age", plot = FALSE)) %>% bind_rows(.id = "Treatment")


age_plot <- dff %>%
ggplot(aes(x=age, y=coef, colour=Treatment)) + #geom_point() +
  geom_line() +
  geom_ribbon(aes(ymin=lb, ymax=ub, fill=Treatment), alpha=0.1) + 
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  ylab("Effect of treatment on acceptance") + 
  xlab("Age of respondent")  + 
  theme_bw()


age_plot

4.3.2 Predictions by distance

M <- lm_robust(outcome_diff~distance_km*negative_diff*transaction_diff, data = df)
# Different for old 
linearHypothesis(M, "negative_diff + distance_km:negative_diff = transaction_diff + distance_km:transaction_diff")

Linear hypothesis test

Hypothesis:
negative_diff - transaction_diff  + distance_km:negative_diff - distance_km:transaction_diff = 0

Model 1: restricted model
Model 2: outcome_diff ~ distance_km * negative_diff * transaction_diff

  Res.Df Df Chisq Pr(>Chisq)   
1  20493                       
2  20492  1 7.811   0.005193 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Different for young 
linearHypothesis(M, "negative_diff  = transaction_diff ")

Linear hypothesis test

Hypothesis:
negative_diff - transaction_diff = 0

Model 1: restricted model
Model 2: outcome_diff ~ distance_km * negative_diff * transaction_diff

  Res.Df Df  Chisq Pr(>Chisq)
1  20493                     
2  20492  1 1.6787     0.1951

M1 <- lm(outcome_diff~distance_km*negative_diff*transaction_diff*financialB_diff, data = df)
M2 <- lm(outcome_diff~distance_km*negative_diff*transaction_diff*financialB_diff, data = dplyr::filter(df, status == "Undecided"))

dff <- list(
  Freedoms = interplot(m = M1, var1 = "negative_diff", var2 = "distance_km", plot = FALSE),
  'Local Doctors' = interplot(m = M1, var1 = "transaction_diff", var2 = "distance_km", plot = FALSE),
    'Financial 50' = interplot(m = M1, var1 = "financialB_diff", var2 = "distance_km", plot = FALSE)) %>% bind_rows(.id = "Treatment")


distance_plot <- dff %>%
ggplot(aes(x=distance_km, y=coef, colour=Treatment)) + #geom_point() +
  geom_line() +
  geom_ribbon(aes(ymin=lb, ymax=ub, fill=Treatment), alpha=0.1) + 
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  ylab("effect of treatment on acceptance") + 
  xlab("Distance to Vaccination Center")  + 
  theme_bw()


distance_plot

4.3.3 Export Fig 2

Fig 2 in the paper combines three panels. Exported here.

fig_2_ABC <- ggarrange(lm_plot + theme(plot.margin=unit(c(1,1,1.5,1.2)*.6,"cm"))
                                       ,
                       ggarrange(age_plot + ylim(-.03, .13) +
                                    theme(plot.margin=unit(c(1,1,1.5,1.2)*.6,"cm")), 
                                 distance_plot + ylab("") + ylim(-.03, .13) +
                                    theme(plot.margin=unit(c(1,1,1.5,1.2)*.6,"cm")),
                       
                    labels = c("B", "C"),
                    ncol = 2, common.legend = TRUE, legend="bottom"),
                    nrow = 2, labels = "A")

# figure_13
pdf(fig_2_ABC_path, width = 9, height = 9)
fig_2_ABC
dev.off()

png 
  2

fig_2_ABC

Replication archive for ‘Incentives can spur Covid-19 vaccination uptake’

Heike Klüver, Humboldt

Felix Hartmann, Humboldt

Macartan Humphreys, WZB and Columbia

Ferdinand Geißler, Humboldt

Johannes Giesecke, Humboldt

July 2021