This .Rmd file replicates all analyses from Incentives can spur Covid-19 vaccination uptake and saves tex tables and pdf figures as outputs. Data used can be downloaded here.

All code and input files are available at https://github.com/wzb-ipi/covid_hesitancy_2021

1 Housekeeping

knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, comment=NA)
options(qwraps2_markup = "markdown")

Packages: The next code chunk uses pacman to install and load all packages.

if (!require(pacman)) install.packages("pacman")

pacman::p_load(
  rgdal,       # geo data
  geojsonsf,   # read json
  rgeos,       # geo data
  sf,          # geo data
  gridExtra,   # plot arrange
  gtrendsR,    # google trends
  aod,         # hypothesis testing (1.3.1)
  car,         # linear hypothesis testing for causal tree (3.0-2)
  corrplot,    # Correlations (0.84)
  DeclareDesign, 
  dplyr,       # Data manipulation (0.8.0.1)
  evtree,      # evolutionary learning of globally optimal trees (1.0-7)
  fastDummies,
  fBasics,     # Summary statistics (3042.89)
  ggplot2,
  ggpubr,
  grf,         # Generalized random forests (0.10.2)
  haven,       # load sav
  interplot,   # Interactions plotting
  kableExtra,  # Prettier RMarkdown (1.0.1)
  knitr,
  labelled,
  psych,       # Correlation p-values (1.8.12)
  purrr,
  rpart,       # Classification and regression trees, or CART (4.1-13)
  reshape2,
  rpart.plot,  # Plotting trees (3.0.6)
  readr,       # Reading csv files (1.3.1)
  sjlabelled,
  stats,
  summarytools,
  texreg,
  tidyverse,
  tidyselect,
  tidyr,       # Database operations (0.8.3)
  treeClust,   # Predicting leaf position for causal trees (1.1-7)
  tibble)      # Modern alternative to data frames (2.1.1)

# Number of trees for causal forests
N_trees = 1000

# Set seed for reproducibility
set.seed(201911) 

Paths: The next code chunk provides paths for pulling data and saving figures and tables.

# PNAS figures
fig_1_AB_path <- "output/fig_1_AB.pdf"
fig_2_ABC_path <- "output/fig_2_ABC.pdf"

# Interim figures
fig_1_path <- "output/figure_1.pdf"
fig_2_path <- "output/figure_2.pdf"
fig_3_path <- "output/figure_3.pdf"
fig_4_path <- "output/figure_4.pdf"
google_plot_path <- "output/figure_5.pdf"
corr_matrix_plot_path <- "output/figure_6.pdf"
correlates_plot_path  <- "output/figure_7.pdf"
undecided_cf_path     <- "output/figure_8.pdf"
blps_plot_path <- "output/figure_9.pdf"
lm_plot_path <- "output/figure_10.pdf"

tab_1_path <- "output/table_1.tex"
tab_2_path <- "output/table_2.tex"
tab_3_path <- "output/table_3.tex"

# variable names and labels
var_list <- read.csv("input/vars.csv") %>% arrange(order)

covariate_names <- var_list$new_name[var_list$covariate==1]
  
# Labels for treatments
treatments <- treatment_levels <- c("financialA","financialB", "negative", "transaction")
treatment_levels_diff <- c("financialA_diff","financialB_diff", "negative_diff", "transaction_diff")
treatment_labels <- c("Financial 25", "Financial 50", "Freedoms", "Local Doctors")

statuses <- c("Acceptant", "Refusals", "Undecided")


# Training fraction
train_fraction <- 1  # for causal forests: currently all data used

2 Prepare Data

2.1 Import data

Import data, get labels for treatment, rename variables, generate differences.

Raw data has treatments recorded in a single vector with 12 values. These are now transformed into the underlying factors.

# get labels
Z_labels <- read.csv("input/Z_labels.csv") %>% dplyr::select(-X)
kable(Z_labels, booktabs = TRUE)
Z_1_1 negative_t1 financialA_t1 financialB_t1 transaction_t1
1 0 0 0 0
2 0 0 0 1
3 0 1 0 0
4 0 0 1 0
5 1 0 0 0
6 1 0 0 1
7 1 1 0 1
8 1 0 1 1
9 0 1 0 1
10 0 0 1 1
11 1 1 0 0
12 1 0 1 0
df <- read.csv("input/wave_1.csv") %>%
  rowid_to_column("ID")  %>%
  mutate(
    Z_1_1 = as.numeric(paste(c_0031)),
    outcome_t1 = v_74 / 10,
    Z_1_2 = as.numeric(paste(c_0032)),
    outcome_t2 = v_77 / 10
  ) %>%
  left_join(Z_labels) %>%
  left_join(
    Z_labels %>%
      rename(
        Z_1_2 = Z_1_1,
        negative_t2 = negative_t1,
        transaction_t2 = transaction_t1,
        financialA_t2 = financialA_t1,
        financialB_t2 = financialB_t1
      )
  ) %>%

  # rename covariates
  rename_at(
    vars(var_list$var_name[var_list$rename == 1]), 
    ~ var_list$new_name[var_list$rename == 1]) %>%
  
  # transformations
  mutate(
    outcome_diff     = outcome_t2 - outcome_t1,
    negative_diff    = negative_t2 - negative_t1,
    financialA_diff  = financialA_t2 - financialA_t1,
    financialB_diff  = financialB_t2 - financialB_t1,
    transaction_diff = transaction_t2 - transaction_t1,
    days = (date - min(date) +1)/21, # Normalized to 0-1
    status = ifelse(
      is.na(vaccination.intent),
      "Vaccinated",
      paste(vaccination.intent)
    ),
    status = dplyr::recode(
      status,
      "1" = "Acceptant",
      "2" = "Refusing",
      "3" = "Undecided"
    ),
    eduyears = (eduyears - min(eduyears, na.rm = TRUE))/max(eduyears, na.rm = TRUE),
    household.larger = (household.size > 2)*1,
    covid.media = 1-(covid.media-1)/4, 
    vaccinated = 1*(vaccination == 1),
    network.vaccinated = (network.vaccinated-1)/4,
    covid.infection = 1*(covid.infection == 1),
    covid.infection.proximity = (covid.infection.proximity-1)/2,
    covid.information = 1- (covid.information-1)/4,
    covid.rules.mask = 1- (covid.rules.mask-1)/5,
    covid.rules.distance = 1- (covid.rules.distance-1)/5,
    support.distance = 1-(support.distance-1)/4, 
    covid.income = 1-(covid.income-1)/4, 
    voted = 1*(voter.turnout ==1),
    age2 = (age - min(age))/(max(age) - min(age)),
    male = 1*(gender == 2), 
    political.interest = 1 - (political.interest-1)/3,
    left.right = (left.right-1)/10,
    CDU.CSU = 1*(party.id <= 2),
    CDU.CSU = ifelse(is.na(CDU.CSU), 0, CDU.CSU),
    SPD = 1*(party.id ==3),
    SPD = ifelse(is.na(SPD), 0, SPD),
    AfD = 1*(party.id ==4),
    AfD = ifelse(is.na(AfD), 0, AfD),
    Greens = 1*(party.id ==5),
    Greens = ifelse(is.na(Greens), 0, Greens),
    FDP = 1*(party.id ==6),
    FDP = ifelse(is.na(FDP), 0, FDP),
    Left = 1*(party.id ==7),
    Left = ifelse(is.na(Left), 0, Left),
    No.party = 1*(party.id == 9),
    No.party = ifelse(is.na(No.party), 0, No.party),
    solidarity = solidarity/10,
    international.solidarity = international.solidarity/10,
    EU.support = EU.support/10,
    migration.support = migration.support/10,
    Employed = 1*(Employment.Status==1),
    citizenship = (citizenship==3)*1,
    Country.of.birth  = 1*(Country.of.birth==2),
    risk = risk/10,
    trust = trust/10,
    trust.government = trust.government/4,
    trust.experts = trust.experts/4,
    trust.country = trust.country/4,
    trust.media =  trust.media/4,
    trust.healthcare = trust.healthcare/4,
    acceptant = 1*(vaccination.intent == 1),
    acceptant = ifelse(is.na(vaccination.intent), 1, acceptant),
    refusing = vaccination.intent == 2,
    refusing = ifelse(is.na(vaccination.intent), 0, refusing),
    undecided = 1*(vaccination.intent == 3),
    undecided = ifelse(is.na(vaccination.intent), 0, undecided),
    health = (health - 1)/6,
    health2 = 1*(health2 == 1),
    covid.surveys = ifelse(covid.surveys == 99, NA, covid.surveys),
    east.west =ifelse(federal.state== "4" |#"Brandenburg"
                                  federal.state== "8" | # Mecklenburg-Vorpommern
                                  federal.state== "13"| # Sachsen
                                  federal.state== "14"| # Sachsen-Anhalt
                                  federal.state== "16"  # Thüringen
                                  , 1, 0)
  )

2.2 Stacked dataframe

The wide dataframe is used for first differences analysis. In addition we make use of a stacked data frame for fixed effects analysis.

# Stack
df_stacked <- 
  bind_rows(mutate(df, t = 1), mutate(df, t = 2)) %>%
  mutate(
    outcome     = ifelse(t == 1, outcome_t1, outcome_t2),
    negative    = ifelse(t == 1, negative_t1, negative_t2),
    financialA  = ifelse(t == 1, financialA_t1, financialA_t2),
    financialB  = ifelse(t == 1, financialB_t1, financialB_t2),
    transaction = ifelse(t == 1, transaction_t1, transaction_t2),
    )

2.3 Summary statistics

The next blocks calculate key summary statistics.

# covariate_names[!(covariate_names %in% names(df))]

small_df <- 
  df %>% dplyr::select(all_of(covariate_names)) %>% 
  # select_if(is.numeric) %>% 
  drop_na()
# Make a data.frame containing summary statistics of interest

summ_stats <-
  small_df %>% 
  fBasics::basicStats() %>%
  t() %>%
  as.data.frame() %>%
  dplyr::select("Mean", "Stdev", "Minimum", "1. Quartile", "Median",  "3. Quartile", "Maximum") %>% 
  rename('Lower quartile'= '1. Quartile', 'Upper quartile' ='3. Quartile') 

# Add in labels
summ_stats <- summ_stats %>% 
  mutate(Variable = factor(rownames(summ_stats), var_list$new_name, var_list$label)) %>% relocate(Variable)
Variable Mean Stdev Minimum Lower quartile Median Upper quartile Maximum
Days since survey start 0.57 0.19 0.05 0.38 0.57 0.71 1
Male respondent 0.50 0.50 0.00 0.00 0.00 1.00 1
Age (percentile) 0.49 0.27 0.00 0.26 0.51 0.72 1
Non German Citizen 0.03 0.16 0.00 0.00 0.00 0.00 1
Born outside of Germany 0.06 0.24 0.00 0.00 0.00 0.00 1
Migration background of parents 0.23 0.42 0.00 0.00 0.00 0.00 1
In former East Germany 0.15 0.36 0.00 0.00 0.00 0.00 1
Household size 3 or more 0.33 0.47 0.00 0.00 0.00 1.00 1
Employed 0.44 0.50 0.00 0.00 0.00 1.00 1
Years of Education 0.64 0.24 0.00 0.50 0.50 0.80 1
Previously had Covid 0.03 0.17 0.00 0.00 0.00 0.00 1
Members of network infected 0.40 0.36 0.00 0.00 0.50 0.50 1
Good general health 0.65 0.23 0.00 0.50 0.67 0.83 1
Covid pre-existing conditions 0.41 0.49 0.00 0.00 0.00 1.00 1
Share of network vaccinated 0.25 0.24 0.00 0.00 0.25 0.50 1
Respondent has been vaccinated 0.06 0.25 0.00 0.00 0.00 0.00 1
Refusals 0.16 0.37 0.00 0.00 0.00 0.00 1
Undecided 0.17 0.38 0.00 0.00 0.00 0.00 1
Acceptant 0.67 0.47 0.00 0.00 1.00 1.00 1
Seeks Covid information 0.73 0.32 0.00 0.50 0.75 1.00 1
Believes media exaggerates risk 0.42 0.32 0.00 0.25 0.50 0.75 1
Compliance: mask rules 0.90 0.16 0.00 0.80 1.00 1.00 1
Compliance: distance rule 0.82 0.19 0.00 0.80 0.80 1.00 1
Support distancing even for vaccinated 0.71 0.30 0.00 0.50 0.75 1.00 1
Income loss due to Covid (.5 = no change) 0.59 0.19 0.00 0.50 0.50 0.75 1
Political interest 0.65 0.29 0.00 0.33 0.67 1.00 1
Voted last elections 0.83 0.38 0.00 1.00 1.00 1.00 1
Right leaning 0.44 0.18 0.00 0.30 0.50 0.50 1
AfD 0.07 0.26 0.00 0.00 0.00 0.00 1
Conservatives 0.20 0.40 0.00 0.00 0.00 0.00 1
Liberals 0.05 0.23 0.00 0.00 0.00 0.00 1
Green Party 0.16 0.37 0.00 0.00 0.00 0.00 1
Left Party 0.08 0.26 0.00 0.00 0.00 0.00 1
Social Democrats 0.14 0.34 0.00 0.00 0.00 0.00 1
No party identification 0.27 0.44 0.00 0.00 0.00 1.00 1
Willingness to take risks 0.41 0.24 0.00 0.20 0.40 0.60 1
General Trust 0.41 0.25 0.00 0.20 0.40 0.60 1
Trust state government 0.44 0.29 0.00 0.25 0.50 0.75 1
Trust experts 0.64 0.26 0.00 0.50 0.75 0.75 1
Trust federal government 0.45 0.27 0.00 0.25 0.50 0.75 1
Trust media 0.38 0.25 0.00 0.25 0.50 0.50 1
Trust healthcare 0.57 0.26 0.00 0.50 0.50 0.75 1
Values social solidarity 0.58 0.23 0.00 0.50 0.50 0.70 1
Values international solidarity 0.58 0.31 0.00 0.40 0.50 0.80 1
EU support 0.52 0.29 0.00 0.30 0.50 0.70 1
Migration support 0.48 0.27 0.00 0.30 0.50 0.70 1
Distance to Vaccination Center 0.13 0.10 0.00 0.06 0.11 0.18 1

Pairwise correlations.

png 
  2 

Around 65 % in the sample would get vaccinated, 17 % would not, and 18 % remain undecided.

fig_1 <- df %>% ggplot(aes(status)) + 
  geom_hline(yintercept=0.7, linetype="longdash", lwd=0.35, colour = "#B55555") +
  geom_bar(aes(y = (..count..)/sum(..count..)),width = 0.6,alpha = 0.5) + 
  scale_y_continuous(labels=scales::percent) +
  theme_bw(base_size=16)+
  theme(axis.title.y = element_blank())+
  ylab("Percent (%)")+
  coord_flip()
fig_1

pdf(fig_1_path, height = 3, width = 6)
fig_1
dev.off()
png 
  2 

Concerns of the hesitent and undecided

fig_2 <- 
  df %>% dplyr::select(ID, starts_with("fear.")) %>%
  mutate_if(is.character, as.numeric) %>% 
  melt(variable.name = "Group", id.vars = "ID") %>%  
  mutate(Group = 
           factor(Group,
                  c("fear.longterm", "fear.admission", "fear.effectiveness", "fear.side.effects", "fear.no.danger"),
                  c("Longterm Consequences", "Doubt approval process", "Doubt effectiveness", "Side Effects", "Covid not dangerous")))%>% 
  group_by(Group) %>%
  summarize(mean = 100*mean(value, na.rm = TRUE))  %>%
  ggplot() +
  geom_bar(aes(Group, mean), stat = "identity", width = 0.6,alpha = 0.5)+
  theme_bw(base_size=18)+
  ylab("Percent (%)")+
  theme(axis.title.y = element_blank())+
  coord_flip()
 fig_2

pdf(fig_2_path, height = 4, width = 8)
fig_2
dev.off()
png 
  2 

2.4 Correlates of Hesitancy

family_order <- var_list %>% group_by(family) %>% slice(1) %>% arrange(Family_order) %>% pull(family)

correlates <- 
list("acceptant", "refusing", "undecided") %>% 
lapply(function(y)
  lapply(setdiff(covariate_names, tolower(statuses)), function(x)
lm_robust(as.formula(paste(y, "~", x)), data = df) %>% tidy %>% dplyr::mutate(y=y, x=x)) %>% bind_rows) %>% bind_rows %>% dplyr::filter(term!="(Intercept)" & !(x %in% c("acceptant", "refusing", "undecided"))) %>%
  dplyr::mutate(x = factor(x, rev(var_list$new_name), rev(var_list$label))) %>%
  left_join(var_list %>% dplyr::rename(term = new_name) %>% dplyr::select(term, family)) %>%
  dplyr::mutate(family = factor(family, family_order))


fig_cov <- 
  correlates %>% 
dplyr::mutate(y = factor(y, c("acceptant", "undecided", "refusing" ))) %>%
dplyr::filter(x!="federal.state") %>%
ggplot(aes(estimate    , x)) + geom_point() + facet_grid(family ~ y, scales = "free_y") + ylab("") + xlab("How much more or less likely to be [acceptant / undecided / refusing] at baseline")  + 
  geom_vline(xintercept = 0, color = "red") +
  theme_bw() +
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = .25)

fig_cov

pdf(correlates_plot_path, height =9, width = 8)
fig_cov
dev.off()
png 
  2 

3 Main Results

3.1 Background

We conduct analysis of a 2x2x3 factorial survey experiment, with factors assigned with independent probabilities. Each respondent receives two vignettes successively. Our population of interest consists of all German citizens over the age of 18. For this study, we rely on a representative sample of 20,000 citizens across Germany. The online-survey was rolled out between March 12 and March 14, 2021.

Factor \(Z\) Level \(L\)
Negative incentives Control: There are no special regulations for vaccinated people even when the Corona
incidence is high. For example, they cannot travel again, visit cinemas, restaurants
or concerts and are still subject to contact restrictions.
Treatment: Special regulations apply to vaccinated people.
For example, even when the Corona incidence is high, they can travel again, visit cinemas,
restaurants or concerts and are not subject to any contact restrictions
Reduction Transaction Cost Control: Eligible citizens can get vaccinated against Corona at the
nearest vaccination center, but not from their family doctor.
Treatment: Eligible citizens can have themselves vaccinated against
Corona at the nearest vaccination center or their family doctor.
Financial renumeration Control: Citizens who are vaccinated will not receive any allowance
after receiving the vaccination.
Treatment: Citizens who get vaccinated receive an expense allowance
of {Level: 25} euros after receiving the vaccination.
Treatment: Citizens who get vaccinated receive an expense allowance
of {Level: 50} euros after receiving the vaccination.

3.2 Check strategy

We demonstrate that first differences and fixed effects effects analyses produce essentially identical results.

# Analysis

lm_robust(
  outcome_diff ~ negative_diff + financialA_diff + financialB_diff + transaction_diff,
  data=df, se_type = "stata") %>%  
  tidy %>%
  kable(caption = "Basic first differences analysis", digits = 2, booktabs = TRUE)
Basic first differences analysis
term estimate std.error statistic p.value conf.low conf.high df outcome
(Intercept) 0.00 0 -3.43 0 -0.01 0.00 20495 outcome_diff
negative_diff 0.03 0 15.52 0 0.02 0.03 20495 outcome_diff
financialA_diff 0.01 0 5.40 0 0.01 0.01 20495 outcome_diff
financialB_diff 0.02 0 11.30 0 0.02 0.03 20495 outcome_diff
transaction_diff 0.03 0 17.99 0 0.03 0.03 20495 outcome_diff
lm_robust(
  outcome ~ negative + financialA + financialB + transaction,
  data=df_stacked, se_type = "stata", fixed_effects =  ~ID) %>%  
  tidy %>%
  kable(caption = "Basic Fixed effects analysis", digits = 3, booktabs = TRUE)
Basic Fixed effects analysis
term estimate std.error statistic p.value conf.low conf.high df outcome
negative 0.025 0.002 15.500 0 0.022 0.028 20496 outcome
financialA 0.010 0.002 5.441 0 0.006 0.013 20496 outcome
financialB 0.022 0.002 11.257 0 0.018 0.026 20496 outcome
transaction 0.030 0.002 18.113 0 0.027 0.033 20496 outcome
# The analysis function takes a dataframe, demeans within the (differenced) Xs 
our_analysis <- function(dff)
  lm_robust(
  outcome_diff ~ negative*financialA*transaction + negative*financialB*transaction,
  data = dff %>%
    mutate(negative = negative_diff - mean(negative_diff),
           financialA = financialA_diff- mean(financialA_diff),
           financialB = financialB_diff - mean(financialB_diff),
           transaction = transaction_diff - mean(transaction_diff)), 
  se_type = "stata")  
  

our_analysis(df) %>% tidy %>%
  kable(caption = "Basic Fixed effects analysis with interactions", digits = 3)
Basic Fixed effects analysis with interactions
term estimate std.error statistic p.value conf.low conf.high df outcome
(Intercept) -0.004 0.001 -3.752 0.000 -0.006 -0.002 20488 outcome_diff
negative 0.025 0.002 15.214 0.000 0.022 0.028 20488 outcome_diff
financialA 0.010 0.002 5.343 0.000 0.006 0.013 20488 outcome_diff
transaction 0.030 0.002 18.078 0.000 0.026 0.033 20488 outcome_diff
financialB 0.022 0.002 11.236 0.000 0.018 0.026 20488 outcome_diff
negative:financialA 0.006 0.003 2.092 0.036 0.000 0.012 20488 outcome_diff
negative:transaction 0.002 0.003 0.734 0.463 -0.003 0.007 20488 outcome_diff
financialA:transaction -0.001 0.003 -0.385 0.700 -0.006 0.004 20488 outcome_diff
negative:financialB 0.006 0.003 1.820 0.069 0.000 0.012 20488 outcome_diff
transaction:financialB -0.002 0.003 -0.595 0.552 -0.007 0.004 20488 outcome_diff
negative:financialA:transaction -0.003 0.004 -0.609 0.542 -0.011 0.006 20488 outcome_diff
negative:transaction:financialB -0.006 0.005 -1.294 0.196 -0.015 0.003 20488 outcome_diff

3.3 Main analysis

statuses <- unique(df$status)

model_list <- lapply(statuses, function(j) our_analysis(df %>% dplyr::filter(status == j)))
names(model_list) <- statuses
model_list$All <- our_analysis(df)

by_status <- 
  lapply(model_list, function(model) tidy(model)) %>% 
  bind_rows(.id = "status") %>%
  mutate(status = factor(status, c("All", statuses)))

# Table version

fileConn <- file(tab_2_path)
writeLines(texreg(model_list, float.pos = "h!", include.ci = FALSE, caption = "Main results, with interactions. Individual fixed effects modelled by regressing differences in responses across two vignetters on differences in treatments and interactions. All treatments are centered on zero for each group.",
                  custom.coef.map =  list("(Intercept)" = "Constant (No incentives)",
                                          "financialA" = "25 Euro incentive",
                                          "financialB" = "50 Euro incentive",
                                          "negative" = "Freedoms",
                                          "transaction" = "Local doctors",
                                          
"negative:financialA" = "Freedoms * 25 Euros",
"negative:financialB" = "Freedoms * 50 Euros",
"negative:transaction" = "Freedoms * Local doctors",
"financialA:transaction" = "Local doctors * 25 Euros",
"transaction:financialB" = "Local doctors * 50 Euros",
"negative:financialA:transaction" = "Freedoms * Local doctors * 25 Euros",
"negative:transaction:financialB" ="Freedoms * Local doctors * 50 Euros"), digits = 3), fileConn)
close(fileConn)


# Figure Version
figure_1A <-
  
  by_status %>% 
  dplyr::filter(term %in% c("negative", "financialA", "financialB", "transaction")) %>% 
  dplyr::filter(status != "Vaccinated") %>%
  mutate(Treatment = factor(term, treatment_levels, treatment_labels)) %>%
  
  ggplot(aes(Treatment, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Estimated effect on prob vaccination (0-1)") + theme(legend.position="bottom")

figure_1A

3.4 Additional vaccinated subgroup

# Figure Version
figure_subgroups <-
  
  by_status %>% 
  dplyr::filter(term %in% c("negative", "financialA", "financialB", "transaction")) %>% 
  #dplyr::filter(status != "Vaccinated") %>%
  mutate(Treatment = factor(term, treatment_levels, treatment_labels)) %>%
  
  ggplot(aes(Treatment, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Change in probability of willingness to get vaccinated (0-1)")

figure_subgroups

3.5 By round

# analysis by period
df_stacked2 <- bind_rows(
  df_stacked %>% dplyr::filter(status != "Vaccinated"),
  df_stacked %>% dplyr::filter(status != "Vaccinated") %>% mutate(status = "All"))

out <-   lapply(1:2, function(period)
              df_stacked2 %>% dplyr::filter(t==period) %>%
                split(.$status) %>%
                map(~lm_robust(outcome ~ negative + financialA + financialB + transaction, data = .)) %>%
                map(tidy) %>%
  bind_rows(.id = "status") %>% mutate(t=period)) %>%
  bind_rows() %>%
  dplyr::filter(term %in% treatments) %>% 
  dplyr::filter(status != "Vaccinated") %>%
  mutate(t = factor(t)) %>%
  mutate(status = factor(status, c("All", "Acceptant", "Refusing", "Undecided"))) %>%
  mutate(term = factor(term, treatments, treatment_labels))

out %>%
  ggplot(aes(term, estimate, color = t, shape = t)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1) +
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+ facet_wrap(~status, ncol = 2) +
  ylab("Effect on vaccination probability (0-1)") 

# Analysis first round
  first <-
  df %>%
  split(.$status) %>%
  map(~lm_robust(outcome_t1 ~ negative_t1 + financialA_t1 + financialB_t1 + transaction_t1, data = .)) %>%
  map(tidy) %>%
  bind_rows(.id = "status")%>% 
  dplyr::filter(term %in% c("negative_t1", "financialA_t1", "financialB_t1", "transaction_t1")) %>% 
  dplyr::filter(status != "Vaccinated") 

  first<-first %>%
  ggplot(aes(term, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Change in probability of willingness to get vaccinated (0-1)")+
  ggtitle("First Vignette") 

# Analysis second round
  second<-df %>%
  split(.$status) %>%
  map(~lm_robust(outcome_t2~ negative_t2 + financialA_t2 + financialB_t2 + transaction_t2, data = .)) %>%
  map(tidy) %>%
  bind_rows(.id = "status")%>% 
  dplyr::filter(term %in% c("negative_t2", "financialA_t2", "financialB_t2", "transaction_t2")) %>% 
  dplyr::filter(status != "Vaccinated") %>%
  ggplot(aes(term, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Change in probability of willingness to get vaccinated (0-1)")+
  ggtitle("Second Vignette") 
  
# Analysis first round
  ggarrange(first, second)

3.6 Consistency bias

# effect of lag:
# Note that lag treatment is negatively correlated with current treatment

lag_out <-  
  bind_rows(
    df %>% dplyr::filter(status != "Vaccinated"),
    df %>% dplyr::filter(status != "Vaccinated") %>% mutate(status = "All"))  %>% 
                split(.$status) %>%
                map(~lm_robust(outcome_t2 ~ negative_t1 + financialA_t1 + financialB_t1 + transaction_t1, data = .)) %>%
                map(tidy) %>%
  bind_rows(.id = "status") %>%
  dplyr::filter(term %in% paste0(treatments, "_t1")) %>% 
  dplyr::filter(status != "Vaccinated") %>%
  mutate(status = factor(status, c("All", "Acceptant", "Refusing", "Undecided"))) %>%
  mutate(term = factor(term, paste0(treatments, "_t1"), treatment_labels))

lag_out %>%
  ggplot(aes(term, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1) +
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Effect of t1 treatment on t2 vaccination probability (0-1)") 

c(
  cor(df$financialA_t1, df$financialA_t2),
  cor(df$financialB_t1, df$financialB_t2),
  cor(df$negative_t1, df$negative_t2),
  cor(df$transaction_t1, df$transaction_t2))
[1] -0.12590611 -0.07113368 -0.13421823 -0.06858775

Accounting for current:

lm_robust(outcome_t2 ~ 
            negative_t1 + financialA_t1 + financialB_t1 + transaction_t1 +
                                 financialA_t2*transaction_t2*negative_t2+
                                 financialB_t2*transaction_t2*negative_t2, 
          data = df %>% dplyr::filter(status != "Vaccinated")) %>% tidy %>%
  kable(digits = 2)
term estimate std.error statistic p.value conf.low conf.high df outcome
(Intercept) 0.66 0.01 60.95 0.00 0.64 0.68 19140 outcome_t2
negative_t1 0.01 0.01 1.64 0.10 0.00 0.02 19140 outcome_t2
financialA_t1 -0.01 0.01 -1.46 0.14 -0.02 0.00 19140 outcome_t2
financialB_t1 -0.01 0.01 -0.95 0.34 -0.02 0.01 19140 outcome_t2
transaction_t1 0.01 0.01 1.80 0.07 0.00 0.02 19140 outcome_t2
financialA_t2 0.00 0.01 0.22 0.83 -0.02 0.03 19140 outcome_t2
transaction_t2 0.05 0.01 3.75 0.00 0.02 0.07 19140 outcome_t2
negative_t2 0.02 0.01 1.53 0.13 -0.01 0.04 19140 outcome_t2
financialB_t2 0.02 0.01 1.92 0.06 0.00 0.05 19140 outcome_t2
financialA_t2:transaction_t2 -0.02 0.02 -0.96 0.34 -0.05 0.02 19140 outcome_t2
financialA_t2:negative_t2 0.01 0.02 0.42 0.67 -0.03 0.04 19140 outcome_t2
transaction_t2:negative_t2 0.00 0.02 0.21 0.83 -0.03 0.04 19140 outcome_t2
transaction_t2:financialB_t2 -0.01 0.02 -0.40 0.69 -0.04 0.03 19140 outcome_t2
negative_t2:financialB_t2 0.00 0.02 0.04 0.97 -0.03 0.04 19140 outcome_t2
financialA_t2:transaction_t2:negative_t2 0.00 0.03 -0.03 0.98 -0.05 0.05 19140 outcome_t2
transaction_t2:negative_t2:financialB_t2 -0.02 0.03 -0.87 0.38 -0.07 0.03 19140 outcome_t2
lag_out_2 <-  
  bind_rows(
    df %>% dplyr::filter(status != "Vaccinated"),
    df %>% dplyr::filter(status != "Vaccinated") %>% mutate(status = "All"))  %>% 
                split(.$status) %>%
                map(~lm_robust(outcome_t2 ~ negative_t1 + financialA_t1 + financialB_t1 + transaction_t1 +
                                 financialA_t2*transaction_t2*negative_t2+
                                 financialB_t2*transaction_t2*negative_t2, data = .)) %>%
                map(tidy) %>%
  bind_rows(.id = "status") %>%
  dplyr::filter(term %in% paste0(treatments, "_t1")) %>% 
  dplyr::filter(status != "Vaccinated") %>%
  mutate(status = factor(status, c("All", "Acceptant", "Refusing", "Undecided"))) %>%
  mutate(term = factor(term, paste0(treatments, "_t1"), treatment_labels))

lag_out_2 %>%
  ggplot(aes(term, estimate, color = status, shape = status)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1) +
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw(base_size=14)+
  ylab("Effect of t1 treatment on t2 vaccination probability (0-1)")  + ylim(c(NA, .075))

Check for change in levels. Did average acceptance go up between measurements?

lm_robust(outcome_diff ~ 1, data = df) %>% tidy %>% kable(digits = 2)
term estimate std.error statistic p.value conf.low conf.high df outcome
(Intercept) 0 0 -3.61 0 -0.01 0 20499 outcome_diff

3.7 Omnibus treatment

The all or nothing comparison uses a small share of data, but shows evidence of a 14 point effect for the undecided.

Note that no fixed effects used here since we generally have only one observation per person.

data_omnibus <- lapply(statuses, function(s)
  df_stacked %>%
      mutate(
        omnibus = negative * financialB * transaction,
        zip = (1-negative) * (1-financialA) * (1-financialB) * (1-transaction)) %>%
      dplyr::filter((omnibus==1) | (zip==1)) %>%
      dplyr::filter(status == s)) 

names(data_omnibus) <- statuses 

lapply(data_omnibus, function(d)
table(table(d$ID))) %>% bind_cols %>% mutate(freq = 1:2) %>% relocate(freq) %>%
  kable(caption = "Most subjects appear only once", booktabs = TRUE)
Most subjects appear only once
freq Acceptant Refusing Vaccinated Undecided
1 3772 983 422 1105
2 154 49 21 43
omnibus <- lapply(data_omnibus, function(d) lm_robust(outcome ~ omnibus, data = d, cluster = ID, se_type = "stata")) 
  
  omnibus[[4]] %>% tidy %>%
  kable(digits = 2, caption = "All or nothing comparisons", booktabs = TRUE)
All or nothing comparisons
term estimate std.error statistic p.value conf.low conf.high df outcome
(Intercept) 0.40 0.01 43.83 0 0.38 0.41 1147 outcome
omnibus 0.13 0.01 10.23 0 0.11 0.16 1147 outcome
fileConn<-file(tab_3_path)
writeLines(texreg(omnibus, float.pos = "h!", include.ci = FALSE, caption = "\\label{omnibus} Effects of receiving all treatments compared to all control conditions", custom.coef.map =  list("(Intercept)" = "Constant (No incentives)","omnibus"
= "Maximal incentives")), fileConn)
close(fileConn)

3.8 Aggregate effects

policy_experiments <- list(
  experiment_1 =
    df_stacked %>%
      mutate(
        Z = negative * financialB * transaction,
        zip = (1-negative) * (1-financialA) * (1-financialB) * (1-transaction)) %>%
      dplyr::filter((Z==1) | (zip==1)) %>% 
  group_by(Z) %>% 
  mutate(N = n()) %>% ungroup() %>% 
  group_by(status, Z) %>% 
  summarize(n = n()/mean(N), share_pop_vaccinated = n()*mean(outcome)/mean(N)),
  
  experiment_2 = 
    df_stacked %>%
      dplyr::filter(financialA==0 & negative ==1 & transaction ==1) %>%
  group_by(financialB) %>% 
  mutate(N = n()) %>% ungroup() %>% 
  group_by(status, financialB) %>% 
  summarize(n = n()/mean(N), share_pop_vaccinated = n()*mean(outcome)/mean(N)) %>% arrange(financialB) %>% mutate(Z = financialB)) %>% bind_rows(.id = "experiment") %>%
  

  mutate(
  Z = 
  (experiment == "experiment_1") *  Z +
  (experiment == "experiment_2") *  (Z + .5),
  Z = factor(Z, c(0, .5, 1), c("No incentives", "Local doctors + Freedoms", "All incentives")))  %>% 
  dplyr::filter(!is.na(Z))

# set colors to be consistent
group.colors <- c(Acceptant = "#7CAE00", Refusing = "#00BFC4", Undecided ="#C77CFF", Vaccinated = "#F8766D")

figure_1B <- policy_experiments %>%
  ggplot(aes(fill=status, y=share_pop_vaccinated, x=Z)) + 
  geom_bar(position="stack", stat="identity",width = 0.5,alpha = 0.7) + 
  scale_fill_manual(values=group.colors)+
  ylab("Share of population vaccinated") + 
  theme_bw(base_size=14)+
  ylim(0:1) +
  xlab("Incentives") + theme(legend.position="bottom")

figure_1B

policy_experiments %>% group_by(Z) %>% arrange(status) %>% mutate(cumul = cumsum(share_pop_vaccinated)) %>% slice(n())
# A tibble: 3 x 7
# Groups:   Z [3]
  experiment   status     Z                 n share_pop_vaccin~ financialB cumul
  <chr>        <chr>      <fct>         <dbl>             <dbl>      <int> <dbl>
1 experiment_1 Vaccinated No incentiv~ 0.0674            0.0582         NA 0.674
2 experiment_2 Vaccinated Local docto~ 0.0678            0.0606          0 0.738
3 experiment_1 Vaccinated All incenti~ 0.0687            0.0636         NA 0.750

3.9 Export Fig 1

Fig 1 in the paper has two panels. Exported here.

fig_1_AB <- ggarrange(
  figure_1A + theme(plot.margin=unit(c(1,1,1.5,1.2),"cm")), 
  figure_1B + theme(plot.margin=unit(c(1,1,1.5,1.2),"cm")),
                    labels = c("A", "B"),
                    ncol = 2, nrow = 1)

pdf(fig_1_AB_path, width = 12.5, height = 6)
fig_1_AB
dev.off()
png 
  2 
fig_1_AB

3.10 Notes

Share of hesitant, undecided, that are AFD supports or support no party

df %>% 
  group_by(status) %>% 
  summarize(AfD = mean(AfD), 
            no_party = mean(No.party), 
            trust_experts = mean(trust.experts), 
            media_exaggerates = mean(covid.media), 
            fear_longterm = mean(fear.longterm, na.rm = TRUE)) %>%
  kable(digits = 2, booktabs = TRUE)
status AfD no_party trust_experts media_exaggerates fear_longterm
Acceptant 0.03 0.21 0.71 0.33 NaN
Refusing 0.20 0.37 0.42 0.72 0.64
Undecided 0.08 0.39 0.56 0.51 0.68
Vaccinated 0.04 0.24 0.68 0.38 NaN

4 Heterogeneity

For heterogeneity analysis we draw on the Causal Forests approach described in Athey, Tibshrani and Wager (Annals of Statistics, 2019) , Nie and Wager, 2017

4.1 Helpers

A function to prep heterogeneous effects data for a given treatment, removing others.

num_tiles <- 4  # ntiles = CATE is above / below the median

# Note this is set up so that we can swap in the difference outcome easily

het_df <- function(treatment_name = "transaction_diff", 
                   outcome_name = "outcome_diff",
                   data = df) {
  data$W = data[treatment_name][[1]]
  data$Y = data[outcome_name][[1]]
  data %>% 
    dplyr::select(Y, W, ID, all_of(covariate_names)) %>%
    drop_na() %>%
    mutate_if(is.factor, as.numeric)  %>%
    # mutate(federal.state =  as.numeric(factor(federal.state))) %>%
    # Trick to render W binary
    dplyr::mutate(
      Y = ifelse(W<0, -Y, Y),
      W = W^2)  
}

A function to fit the forest and save outputs: input dataframe should have a privileged Y and W.

# heterogeneous treatment effects 

f_cf <- function(df, n_trees = 1000) {
  
  # rule of thumb: num.trees = number of individuals
  
  df_forests <- sample_frac(df, replace=FALSE, size=train_fraction)
  X <- as.matrix(df_forests[, covariate_names])
  
  cf <-
    causal_forest(
    X = X,
    Y = df_forests$Y,
    W = df_forests$W,
    num.trees=n_trees) 

  ### Predict point estimates and standard errors (training set, out-of-bag)
  oob_pred      <- predict(cf, estimate.variance=TRUE)
  oob_tauhat_cf <- oob_pred$predictions
  oob_tauhat_cf_se <- sqrt(oob_pred$variance.estimates)
  
  var_imp        <- c(variable_importance(cf)) 
  names(var_imp) <- covariate_names
  var_imp <- var_imp %>% sort(decreasing=TRUE)
  
  df_forests$cate  <- oob_tauhat_cf
  df_forests$ntile <- factor(ntile(oob_tauhat_cf, n=num_tiles))

# Standard model estimates by quantile
estimated_sample_ate <- 
  lm_robust(Y ~ ntile + ntile:W, data=df_forests) %>% 
  tidy() %>% 
  dplyr::filter(stringr::str_detect(term, ":W"))

# AIPW estimates by quantile
estimated_aipw_ate <- 
  lapply(
  seq(num_tiles), function(w) 
    average_treatment_effect(cf, subset = df_forests$ntile == w)
  ) %>% bind_rows

combined_estimates <- 
  bind_rows(
    estimated_sample_ate %>% mutate(type = "lm_robust") %>% dplyr::select(-outcome, -df, -statistic, - p.value),
    estimated_aipw_ate %>% rename(std.error = std.err) %>% 
      mutate(
        type  = "aipw",
        term = estimated_sample_ate$term,
        conf.low = estimate - 1.96*std.error,
        conf.high = estimate + 1.96*std.error)
  )
# Outputs
list(cf = cf,
     df_forests = df_forests, 
     X = X,
     oob_tauhat_cf = oob_tauhat_cf, 
     var_imp = var_imp, 
     ntile_estimates = combined_estimates)
}

A function to generate fitted values.

fitted_vals <- function(var_of_interest, model = test){
  
  df_forests <- model$df_forests
  cf <- model$cf
  
      is_continuous <- (length(unique(df_forests[var_of_interest][[1]])) > 5) # crude rule for determining continuity
    if(is_continuous) {
      x_grid <- quantile(df_forests[var_of_interest][[1]], probs = seq(0, 1, length.out = 5))
    } else {
      x_grid <- sort(unique(df_forests[var_of_interest][[1]]))
    }
    
  df_grid <-  setNames(data.frame(x_grid), var_of_interest)
  
  other_covariates <- covariate_names[!covariate_names %in% var_of_interest]
  df_median <- df_forests %>% dplyr::select(all_of(other_covariates)) %>% summarise_all(median) 
  df_eval <- crossing(df_median, df_grid)
  
  pred <- predict(cf, newdata=df_eval[,covariate_names], estimate.variance=TRUE)
df_eval$tauhat <- pred$predictions
df_eval$se <- sqrt(pred$variance.estimates)

# Change to factor so the plotted values are evenly spaced (e.g. logicals)
df_eval %>% arrange(var_of_interest) %>%
  mutate(var_of_interest = as.factor(as.numeric(df_eval[var_of_interest][[1]])))
}

4.2 Causal Forests

4.2.1 Implementation

cf_all <- 
  lapply(treatment_levels_diff, function(x) het_df(x, outcome_name = "outcome_diff") %>% f_cf(n_trees=N_trees))
names(cf_all) <- treatment_levels_diff

cf_undecided <- 
  lapply(treatment_levels_diff, function(x) 
  het_df(x, outcome_name = "outcome_diff", dplyr::filter(df, undecided == 1)) %>% 
    f_cf(n_trees=N_trees))
names(cf_undecided) <- treatment_levels_diff

cf_hesitant<- 
  lapply(treatment_levels_diff, function(x) 
  het_df(x, outcome_name = "outcome_diff", dplyr::filter(df, refusing == 1)) %>% 
    f_cf(n_trees=N_trees))
names(cf_hesitant) <- treatment_levels_diff

4.2.2 Most common predictors

lapply(cf_all, function(j) names(j$var_imp[1:6])) %>% bind_rows(.id = "treatment") %>% kable(caption = "Strongest predictions", booktabs = TRUE)
Strongest predictions
financialA_diff financialB_diff negative_diff transaction_diff
distance_km age2 age2 distance_km
risk solidarity support.distance age2
age2 days undecided solidarity
trust distance_km covid.media EU.support
days trust days undecided
left.right undecided distance_km health
lapply(cf_undecided, function(j) names(j$var_imp[1:6])) %>% bind_rows(.id = "treatment") %>% kable(caption = "Most common predictors, undecided ", booktabs = TRUE)
Most common predictors, undecided
financialA_diff financialB_diff negative_diff transaction_diff
risk age2 age2 age2
trust days support.distance solidarity
distance_km distance_km covid.infection.proximity distance_km
covid.income trust days EU.support
age2 risk health international.solidarity
days migration.support distance_km eduyears
lapply(cf_hesitant, function(j) names(j$var_imp[1:6])) %>% bind_rows(.id = "treatment") %>% kable(caption = "Most common predictors, refusing ", booktabs = TRUE)
Most common predictors, refusing
financialA_diff financialB_diff negative_diff transaction_diff
distance_km distance_km days days
age2 age2 distance_km distance_km
left.right solidarity age2 age2
migration.support days trust.country migration.support
trust.media risk left.right solidarity
risk EU.support EU.support covid.income
what_matters <- 
  list(All = cf_all, Undecided = cf_undecided, Hesitant = cf_hesitant) %>%
  lapply(function(res) 
    lapply(res, function(j) j$var_imp %>% t %>% data.frame) %>%
      bind_rows %>% mutate(treatment = names(cf_all)) %>%
      gather(covariate, "value", - treatment)) %>% bind_rows(.id = "group")
  
what_matters_plot <- 
  what_matters  %>% 
  mutate(
    covariate = factor(covariate,   rev(var_list$new_name), rev(var_list$label)),
    treatment = factor(treatment, treatment_levels_diff, treatment_labels)) %>% 
    ggplot(aes(value, covariate, color = treatment)) + 
    geom_point()+
    scale_x_continuous(name="Variable Importance")+
    theme_bw() + facet_wrap(~group) + ylab(" ")

what_matters_plot

pdf(fig_3_path, width = 9, height = 9)
  what_matters_plot
dev.off()
png 
  2 

4.2.3 Variation across ntiles

# plot
lapply(cf_all, function(j)  j$ntile_estimates) %>% 
  bind_rows(.id = "treatment") %>%
  ggplot() +
  geom_pointrange(
    aes(x = term, y = estimate, ymax = conf.high, ymin = conf.low, color = type), 
    size = 0.5,
    position = position_dodge(width = .5)) +
  geom_errorbar(aes(x = term, ymax = conf.high, ymin = conf.low, color = type), 
                width = 0.4,
                size = 0.75,
                position = position_dodge(width = .5)) +
  theme_bw() +
  labs(x = "N-tile", y = "ATE Estimate", title = "ATE within N-tiles") +
  facet_wrap(~treatment)

4.2.4 Graph heterogeneous effects

vars <- c("days", "age2", "undecided", "solidarity", "support.distance","distance_km")

all_marginals <- 
  lapply(vars, function(x) 
         lapply(cf_all , function(model)
           fitted_vals(x, model = model) %>%
             mutate(var = x)
           ) %>% bind_rows(.id = "treatment")) %>%
  bind_rows %>%
  mutate(var = factor(var, vars, c("Days", "Age (Percentile)", "Undecided", "Solidarity", "Support distancing","Distance to Vaccination Center")),
         treatment = 
           factor(treatment,treatment_levels_diff, treatment_labels),
var_of_interest = 
  factor(as.numeric(paste(var_of_interest)),
sort(unique(as.numeric(paste(var_of_interest))))))

figure_4 <-
  
all_marginals %>%
  mutate(ymin_val = tauhat-1.96*se) %>%
  mutate(ymax_val = tauhat+1.96*se) %>%
  ggplot()  + 
  geom_line(aes_string(x="var_of_interest", y="tauhat", group = 1), color="red") +
  geom_errorbar(aes_string(x="var_of_interest",ymin="ymin_val", ymax="ymax_val", width=.2),color="blue") +
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, colour = "#B55555") +
  ylab("Predicted Treatment Effect") +
  theme_bw() +
  theme(axis.ticks = element_blank()) +
  facet_grid(treatment ~ var, scales = "free_x") +
  xlab("quantiles")

figure_4

pdf(fig_4_path, width = 8, height = 6) 
figure_4
dev.off()
png 
  2 

4.2.5 Best linear projections

Using causal forests functions:

blps <-
lapply(
  list(all = cf_all, undecided = cf_undecided, refusing = cf_hesitant), 
  function(set) lapply(set,  function(model)
       lapply(1:length(covariate_names), function(i)
           best_linear_projection(model$cf, model$X[,i]) %>% 
             tidy  %>% 
         dplyr::filter(term!="(Intercept)") %>% 
         dplyr::mutate(var = covariate_names[i])) %>%
        bind_rows()) %>% 
    bind_rows(.id = "treatment")) %>% bind_rows(.id = "set") %>%
  dplyr::mutate(treatment = factor(treatment, treatment_levels_diff, treatment_labels)) 


# importance
top <- 3
most_common <- c(lapply(cf_all, function(j) names(j$var_imp[1:top])) %>% bind_rows(.id = "treatment") %>% unlist(),
lapply(cf_undecided, function(j) names(j$var_imp[1:top])) %>% bind_rows(.id = "treatment") %>% unlist(),
lapply(cf_hesitant, function(j) names(j$var_imp[1:top])) %>% bind_rows(.id = "treatment") %>% unlist()) %>% unique()

# add in promised measures from PAP

most_common <- unique(c(most_common, "AfD", "risk", "trust"))
blps_plot <- 
  blps %>% dplyr::filter(var %in% most_common) %>%
  mutate(var = factor(var,   rev(var_list$new_name), rev(var_list$label))) %>%
    ggplot(aes(estimate, var, color = set)) + 
    geom_point(position=position_dodge(width = .5)) +
    geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96*std.error), width = .25,  position=position_dodge(width = .5)) +
  facet_grid(~treatment) +
  geom_vline(xintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw()  +
  ylab("")



blps_plot

pdf(blps_plot_path, height = 6, width = 9)
blps_plot
dev.off()
png 
  2 

Using lm_robust:

lm_interactions <-
lapply(
  list(all = df, undecided = dplyr::filter(df, undecided==1), refusing = dplyr::filter(df, refusing==1)), 
  function(dff) lapply(treatment_levels_diff,  function(X)
       lapply(1:length(covariate_names), function(i)
           lm_robust(as.formula(paste("outcome_diff~", X, "*", covariate_names[i])), data = dff) %>% 
             tidy  %>% slice(4) %>%
         mutate(var = covariate_names[i], treatment = X)) %>%
        bind_rows()) %>% 
    bind_rows()) %>% bind_rows(.id = "set") %>%
  mutate(treatment = factor(treatment, treatment_levels_diff, treatment_labels)) 

lm_plot <- 
  lm_interactions %>% dplyr::filter(var %in% most_common) %>%
  dplyr::filter(!(var == "undecided" & set == "undecided")) %>%
  dplyr::filter(!(var == "refusing" & set == "refusing")) %>%
  mutate(var = factor(var,   rev(var_list$new_name), rev(var_list$label))) %>%
    ggplot(aes(estimate, var, color = set)) + 
    geom_point(position=position_dodge(width = .5)) +
    geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = .25,  position=position_dodge(width = .5)) +
  facet_grid(~treatment) +
  geom_vline(xintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  theme_bw()  +
  ylab("")

lm_plot

pdf(lm_plot_path, height = 6, width = 9)
lm_plot
dev.off()
png 
  2 

4.2.6 Marginal effects for undecided

v2 <- c("age2",  "solidarity", "support.distance", "trust", "trust.media")

v2labs <- c("Age (Percentile)",  "Solidarity", "Support distancing","General trust", "Trust media")


figure_8  <- 
  
  lapply(v2, function(x) 
         lapply(cf_undecided , function(model)
           fitted_vals(x, model = model) %>%
             mutate(var = x)
           ) %>% bind_rows(.id = "treatment")) %>%
  bind_rows %>%
  mutate(var = factor(var, v2, v2labs),
         treatment = factor(treatment, 
                            paste0(treatment_levels_diff), 
                            treatment_labels),
var_of_interest = 
  factor(as.numeric(paste(var_of_interest)),
sort(unique(as.numeric(paste(var_of_interest)))))) %>%
  mutate(ymin_val = tauhat-1.96*se) %>%
  mutate(ymax_val = tauhat+1.96*se) %>%
  ggplot()  + 
  geom_line(aes_string(x="var_of_interest", y="tauhat", group = 1), color="red") +
  geom_errorbar(aes_string(x="var_of_interest",ymin="ymin_val", ymax="ymax_val", width=.2),color="blue") +
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, colour = "#B55555") +
  ylab("Predicted Treatment Effect") +
  theme_bw() +
  theme(axis.ticks = element_blank()) +
  facet_grid(treatment ~ var, scales = "free_x") +
  xlab("quantiles")


figure_8

pdf(undecided_cf_path, height = 8, width = 8)
figure_8
dev.off()
png 
  2 

4.3 Interaction plots

4.3.1 Predictions by age

M <- lm_robust(outcome_diff~age2*negative_diff*transaction_diff, data = df)

# Different for old 
linearHypothesis(M, "negative_diff + age2:negative_diff = transaction_diff + age2:transaction_diff")
Linear hypothesis test

Hypothesis:
negative_diff - transaction_diff  + age2:negative_diff - age2:transaction_diff = 0

Model 1: restricted model
Model 2: outcome_diff ~ age2 * negative_diff * transaction_diff

  Res.Df Df  Chisq Pr(>Chisq)    
1  20493                         
2  20492  1 84.622  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Different for young 
linearHypothesis(M, "negative_diff  = transaction_diff ")
Linear hypothesis test

Hypothesis:
negative_diff - transaction_diff = 0

Model 1: restricted model
Model 2: outcome_diff ~ age2 * negative_diff * transaction_diff

  Res.Df Df  Chisq Pr(>Chisq)    
1  20493                         
2  20492  1 60.115  8.945e-15 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
M0 <- lm(outcome_diff~age2*negative_diff*transaction_diff*financialB_diff, data = df)
M1 <- lm(outcome_diff~age*negative_diff*transaction_diff*financialB_diff, data = df)
M2 <- lm(outcome_diff~age*negative_diff*transaction_diff*financialB_diff, data = dplyr::filter(df, status == "Undecided"))

dff <- list(
  Freedoms = interplot(m = M2, var1 = "negative_diff", var2 = "age", plot = FALSE),
  'Local Doctors' = interplot(m = M2, var1 = "transaction_diff", var2 = "age", plot = FALSE),
    'Financial 50' = interplot(m = M2, var1 = "financialB_diff", var2 = "age", plot = FALSE)) %>% bind_rows(.id = "Treatment")


age_plot <- dff %>%
ggplot(aes(x=age, y=coef, colour=Treatment)) + #geom_point() +
  geom_line() +
  geom_ribbon(aes(ymin=lb, ymax=ub, fill=Treatment), alpha=0.1) + 
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  ylab("Effect of treatment on acceptance") + 
  xlab("Age of respondent")  + 
  theme_bw()


age_plot

4.3.2 Predictions by distance

M <- lm_robust(outcome_diff~distance_km*negative_diff*transaction_diff, data = df)
# Different for old 
linearHypothesis(M, "negative_diff + distance_km:negative_diff = transaction_diff + distance_km:transaction_diff")
Linear hypothesis test

Hypothesis:
negative_diff - transaction_diff  + distance_km:negative_diff - distance_km:transaction_diff = 0

Model 1: restricted model
Model 2: outcome_diff ~ distance_km * negative_diff * transaction_diff

  Res.Df Df Chisq Pr(>Chisq)   
1  20493                       
2  20492  1 7.811   0.005193 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Different for young 
linearHypothesis(M, "negative_diff  = transaction_diff ")
Linear hypothesis test

Hypothesis:
negative_diff - transaction_diff = 0

Model 1: restricted model
Model 2: outcome_diff ~ distance_km * negative_diff * transaction_diff

  Res.Df Df  Chisq Pr(>Chisq)
1  20493                     
2  20492  1 1.6787     0.1951
M1 <- lm(outcome_diff~distance_km*negative_diff*transaction_diff*financialB_diff, data = df)
M2 <- lm(outcome_diff~distance_km*negative_diff*transaction_diff*financialB_diff, data = dplyr::filter(df, status == "Undecided"))

dff <- list(
  Freedoms = interplot(m = M1, var1 = "negative_diff", var2 = "distance_km", plot = FALSE),
  'Local Doctors' = interplot(m = M1, var1 = "transaction_diff", var2 = "distance_km", plot = FALSE),
    'Financial 50' = interplot(m = M1, var1 = "financialB_diff", var2 = "distance_km", plot = FALSE)) %>% bind_rows(.id = "Treatment")


distance_plot <- dff %>%
ggplot(aes(x=distance_km, y=coef, colour=Treatment)) + #geom_point() +
  geom_line() +
  geom_ribbon(aes(ymin=lb, ymax=ub, fill=Treatment), alpha=0.1) + 
  geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") + 
  ylab("effect of treatment on acceptance") + 
  xlab("Distance to Vaccination Center")  + 
  theme_bw()


distance_plot

4.3.3 Export Fig 2

Fig 2 in the paper combines three panels. Exported here.

fig_2_ABC <- ggarrange(lm_plot + theme(plot.margin=unit(c(1,1,1.5,1.2)*.6,"cm"))
                                       ,
                       ggarrange(age_plot + ylim(-.03, .13) +
                                    theme(plot.margin=unit(c(1,1,1.5,1.2)*.6,"cm")), 
                                 distance_plot + ylab("") + ylim(-.03, .13) +
                                    theme(plot.margin=unit(c(1,1,1.5,1.2)*.6,"cm")),
                       
                    labels = c("B", "C"),
                    ncol = 2, common.legend = TRUE, legend="bottom"),
                    nrow = 2, labels = "A")

# figure_13
pdf(fig_2_ABC_path, width = 9, height = 9)
fig_2_ABC
dev.off()
png 
  2 
fig_2_ABC