Housekeeping
knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, comment=NA)
options(qwraps2_markup = "markdown")
Packages: The next code chunk uses pacman
to install and load all packages.
if (!require(pacman)) install.packages("pacman")
pacman::p_load(
rgdal, # geo data
geojsonsf, # read json
rgeos, # geo data
sf, # geo data
gridExtra, # plot arrange
gtrendsR, # google trends
aod, # hypothesis testing (1.3.1)
car, # linear hypothesis testing for causal tree (3.0-2)
corrplot, # Correlations (0.84)
DeclareDesign,
dplyr, # Data manipulation (0.8.0.1)
evtree, # evolutionary learning of globally optimal trees (1.0-7)
fastDummies,
fBasics, # Summary statistics (3042.89)
ggplot2,
ggpubr,
grf, # Generalized random forests (0.10.2)
haven, # load sav
interplot, # Interactions plotting
kableExtra, # Prettier RMarkdown (1.0.1)
knitr,
labelled,
psych, # Correlation p-values (1.8.12)
purrr,
rpart, # Classification and regression trees, or CART (4.1-13)
reshape2,
rpart.plot, # Plotting trees (3.0.6)
readr, # Reading csv files (1.3.1)
sjlabelled,
stats,
summarytools,
texreg,
tidyverse,
tidyselect,
tidyr, # Database operations (0.8.3)
treeClust, # Predicting leaf position for causal trees (1.1-7)
tibble) # Modern alternative to data frames (2.1.1)
# Number of trees for causal forests
N_trees = 1000
# Set seed for reproducibility
set.seed(201911)
Paths: The next code chunk provides paths for pulling data and saving figures and tables.
# PNAS figures
fig_1_AB_path <- "output/fig_1_AB.pdf"
fig_2_ABC_path <- "output/fig_2_ABC.pdf"
# Interim figures
fig_1_path <- "output/figure_1.pdf"
fig_2_path <- "output/figure_2.pdf"
fig_3_path <- "output/figure_3.pdf"
fig_4_path <- "output/figure_4.pdf"
google_plot_path <- "output/figure_5.pdf"
corr_matrix_plot_path <- "output/figure_6.pdf"
correlates_plot_path <- "output/figure_7.pdf"
undecided_cf_path <- "output/figure_8.pdf"
blps_plot_path <- "output/figure_9.pdf"
lm_plot_path <- "output/figure_10.pdf"
tab_1_path <- "output/table_1.tex"
tab_2_path <- "output/table_2.tex"
tab_3_path <- "output/table_3.tex"
# variable names and labels
var_list <- read.csv("input/vars.csv") %>% arrange(order)
covariate_names <- var_list$new_name[var_list$covariate==1]
# Labels for treatments
treatments <- treatment_levels <- c("financialA","financialB", "negative", "transaction")
treatment_levels_diff <- c("financialA_diff","financialB_diff", "negative_diff", "transaction_diff")
treatment_labels <- c("Financial 25", "Financial 50", "Freedoms", "Local Doctors")
statuses <- c("Acceptant", "Refusals", "Undecided")
# Training fraction
train_fraction <- 1 # for causal forests: currently all data used
Prepare Data
Import data
Import data, get labels for treatment, rename variables, generate differences.
Raw data has treatments recorded in a single vector with 12 values. These are now transformed into the underlying factors.
# get labels
Z_labels <- read.csv("input/Z_labels.csv") %>% dplyr::select(-X)
kable(Z_labels, booktabs = TRUE)
Z_1_1
|
negative_t1
|
financialA_t1
|
financialB_t1
|
transaction_t1
|
1
|
0
|
0
|
0
|
0
|
2
|
0
|
0
|
0
|
1
|
3
|
0
|
1
|
0
|
0
|
4
|
0
|
0
|
1
|
0
|
5
|
1
|
0
|
0
|
0
|
6
|
1
|
0
|
0
|
1
|
7
|
1
|
1
|
0
|
1
|
8
|
1
|
0
|
1
|
1
|
9
|
0
|
1
|
0
|
1
|
10
|
0
|
0
|
1
|
1
|
11
|
1
|
1
|
0
|
0
|
12
|
1
|
0
|
1
|
0
|
df <- read.csv("input/wave_1.csv") %>%
rowid_to_column("ID") %>%
mutate(
Z_1_1 = as.numeric(paste(c_0031)),
outcome_t1 = v_74 / 10,
Z_1_2 = as.numeric(paste(c_0032)),
outcome_t2 = v_77 / 10
) %>%
left_join(Z_labels) %>%
left_join(
Z_labels %>%
rename(
Z_1_2 = Z_1_1,
negative_t2 = negative_t1,
transaction_t2 = transaction_t1,
financialA_t2 = financialA_t1,
financialB_t2 = financialB_t1
)
) %>%
# rename covariates
rename_at(
vars(var_list$var_name[var_list$rename == 1]),
~ var_list$new_name[var_list$rename == 1]) %>%
# transformations
mutate(
outcome_diff = outcome_t2 - outcome_t1,
negative_diff = negative_t2 - negative_t1,
financialA_diff = financialA_t2 - financialA_t1,
financialB_diff = financialB_t2 - financialB_t1,
transaction_diff = transaction_t2 - transaction_t1,
days = (date - min(date) +1)/21, # Normalized to 0-1
status = ifelse(
is.na(vaccination.intent),
"Vaccinated",
paste(vaccination.intent)
),
status = dplyr::recode(
status,
"1" = "Acceptant",
"2" = "Refusing",
"3" = "Undecided"
),
eduyears = (eduyears - min(eduyears, na.rm = TRUE))/max(eduyears, na.rm = TRUE),
household.larger = (household.size > 2)*1,
covid.media = 1-(covid.media-1)/4,
vaccinated = 1*(vaccination == 1),
network.vaccinated = (network.vaccinated-1)/4,
covid.infection = 1*(covid.infection == 1),
covid.infection.proximity = (covid.infection.proximity-1)/2,
covid.information = 1- (covid.information-1)/4,
covid.rules.mask = 1- (covid.rules.mask-1)/5,
covid.rules.distance = 1- (covid.rules.distance-1)/5,
support.distance = 1-(support.distance-1)/4,
covid.income = 1-(covid.income-1)/4,
voted = 1*(voter.turnout ==1),
age2 = (age - min(age))/(max(age) - min(age)),
male = 1*(gender == 2),
political.interest = 1 - (political.interest-1)/3,
left.right = (left.right-1)/10,
CDU.CSU = 1*(party.id <= 2),
CDU.CSU = ifelse(is.na(CDU.CSU), 0, CDU.CSU),
SPD = 1*(party.id ==3),
SPD = ifelse(is.na(SPD), 0, SPD),
AfD = 1*(party.id ==4),
AfD = ifelse(is.na(AfD), 0, AfD),
Greens = 1*(party.id ==5),
Greens = ifelse(is.na(Greens), 0, Greens),
FDP = 1*(party.id ==6),
FDP = ifelse(is.na(FDP), 0, FDP),
Left = 1*(party.id ==7),
Left = ifelse(is.na(Left), 0, Left),
No.party = 1*(party.id == 9),
No.party = ifelse(is.na(No.party), 0, No.party),
solidarity = solidarity/10,
international.solidarity = international.solidarity/10,
EU.support = EU.support/10,
migration.support = migration.support/10,
Employed = 1*(Employment.Status==1),
citizenship = (citizenship==3)*1,
Country.of.birth = 1*(Country.of.birth==2),
risk = risk/10,
trust = trust/10,
trust.government = trust.government/4,
trust.experts = trust.experts/4,
trust.country = trust.country/4,
trust.media = trust.media/4,
trust.healthcare = trust.healthcare/4,
acceptant = 1*(vaccination.intent == 1),
acceptant = ifelse(is.na(vaccination.intent), 1, acceptant),
refusing = vaccination.intent == 2,
refusing = ifelse(is.na(vaccination.intent), 0, refusing),
undecided = 1*(vaccination.intent == 3),
undecided = ifelse(is.na(vaccination.intent), 0, undecided),
health = (health - 1)/6,
health2 = 1*(health2 == 1),
covid.surveys = ifelse(covid.surveys == 99, NA, covid.surveys),
east.west =ifelse(federal.state== "4" |#"Brandenburg"
federal.state== "8" | # Mecklenburg-Vorpommern
federal.state== "13"| # Sachsen
federal.state== "14"| # Sachsen-Anhalt
federal.state== "16" # Thüringen
, 1, 0)
)
Stacked dataframe
The wide dataframe is used for first differences analysis. In addition we make use of a stacked data frame for fixed effects analysis.
# Stack
df_stacked <-
bind_rows(mutate(df, t = 1), mutate(df, t = 2)) %>%
mutate(
outcome = ifelse(t == 1, outcome_t1, outcome_t2),
negative = ifelse(t == 1, negative_t1, negative_t2),
financialA = ifelse(t == 1, financialA_t1, financialA_t2),
financialB = ifelse(t == 1, financialB_t1, financialB_t2),
transaction = ifelse(t == 1, transaction_t1, transaction_t2),
)
Summary statistics
The next blocks calculate key summary statistics.
# covariate_names[!(covariate_names %in% names(df))]
small_df <-
df %>% dplyr::select(all_of(covariate_names)) %>%
# select_if(is.numeric) %>%
drop_na()
# Make a data.frame containing summary statistics of interest
summ_stats <-
small_df %>%
fBasics::basicStats() %>%
t() %>%
as.data.frame() %>%
dplyr::select("Mean", "Stdev", "Minimum", "1. Quartile", "Median", "3. Quartile", "Maximum") %>%
rename('Lower quartile'= '1. Quartile', 'Upper quartile' ='3. Quartile')
# Add in labels
summ_stats <- summ_stats %>%
mutate(Variable = factor(rownames(summ_stats), var_list$new_name, var_list$label)) %>% relocate(Variable)
Variable
|
Mean
|
Stdev
|
Minimum
|
Lower quartile
|
Median
|
Upper quartile
|
Maximum
|
Days since survey start
|
0.57
|
0.19
|
0.05
|
0.38
|
0.57
|
0.71
|
1
|
Male respondent
|
0.50
|
0.50
|
0.00
|
0.00
|
0.00
|
1.00
|
1
|
Age (percentile)
|
0.49
|
0.27
|
0.00
|
0.26
|
0.51
|
0.72
|
1
|
Non German Citizen
|
0.03
|
0.16
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Born outside of Germany
|
0.06
|
0.24
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Migration background of parents
|
0.23
|
0.42
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
In former East Germany
|
0.15
|
0.36
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Household size 3 or more
|
0.33
|
0.47
|
0.00
|
0.00
|
0.00
|
1.00
|
1
|
Employed
|
0.44
|
0.50
|
0.00
|
0.00
|
0.00
|
1.00
|
1
|
Years of Education
|
0.64
|
0.24
|
0.00
|
0.50
|
0.50
|
0.80
|
1
|
Previously had Covid
|
0.03
|
0.17
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Members of network infected
|
0.40
|
0.36
|
0.00
|
0.00
|
0.50
|
0.50
|
1
|
Good general health
|
0.65
|
0.23
|
0.00
|
0.50
|
0.67
|
0.83
|
1
|
Covid pre-existing conditions
|
0.41
|
0.49
|
0.00
|
0.00
|
0.00
|
1.00
|
1
|
Share of network vaccinated
|
0.25
|
0.24
|
0.00
|
0.00
|
0.25
|
0.50
|
1
|
Respondent has been vaccinated
|
0.06
|
0.25
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Refusals
|
0.16
|
0.37
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Undecided
|
0.17
|
0.38
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Acceptant
|
0.67
|
0.47
|
0.00
|
0.00
|
1.00
|
1.00
|
1
|
Seeks Covid information
|
0.73
|
0.32
|
0.00
|
0.50
|
0.75
|
1.00
|
1
|
Believes media exaggerates risk
|
0.42
|
0.32
|
0.00
|
0.25
|
0.50
|
0.75
|
1
|
Compliance: mask rules
|
0.90
|
0.16
|
0.00
|
0.80
|
1.00
|
1.00
|
1
|
Compliance: distance rule
|
0.82
|
0.19
|
0.00
|
0.80
|
0.80
|
1.00
|
1
|
Support distancing even for vaccinated
|
0.71
|
0.30
|
0.00
|
0.50
|
0.75
|
1.00
|
1
|
Income loss due to Covid (.5 = no change)
|
0.59
|
0.19
|
0.00
|
0.50
|
0.50
|
0.75
|
1
|
Political interest
|
0.65
|
0.29
|
0.00
|
0.33
|
0.67
|
1.00
|
1
|
Voted last elections
|
0.83
|
0.38
|
0.00
|
1.00
|
1.00
|
1.00
|
1
|
Right leaning
|
0.44
|
0.18
|
0.00
|
0.30
|
0.50
|
0.50
|
1
|
AfD
|
0.07
|
0.26
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Conservatives
|
0.20
|
0.40
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Liberals
|
0.05
|
0.23
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Green Party
|
0.16
|
0.37
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Left Party
|
0.08
|
0.26
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
Social Democrats
|
0.14
|
0.34
|
0.00
|
0.00
|
0.00
|
0.00
|
1
|
No party identification
|
0.27
|
0.44
|
0.00
|
0.00
|
0.00
|
1.00
|
1
|
Willingness to take risks
|
0.41
|
0.24
|
0.00
|
0.20
|
0.40
|
0.60
|
1
|
General Trust
|
0.41
|
0.25
|
0.00
|
0.20
|
0.40
|
0.60
|
1
|
Trust state government
|
0.44
|
0.29
|
0.00
|
0.25
|
0.50
|
0.75
|
1
|
Trust experts
|
0.64
|
0.26
|
0.00
|
0.50
|
0.75
|
0.75
|
1
|
Trust federal government
|
0.45
|
0.27
|
0.00
|
0.25
|
0.50
|
0.75
|
1
|
Trust media
|
0.38
|
0.25
|
0.00
|
0.25
|
0.50
|
0.50
|
1
|
Trust healthcare
|
0.57
|
0.26
|
0.00
|
0.50
|
0.50
|
0.75
|
1
|
Values social solidarity
|
0.58
|
0.23
|
0.00
|
0.50
|
0.50
|
0.70
|
1
|
Values international solidarity
|
0.58
|
0.31
|
0.00
|
0.40
|
0.50
|
0.80
|
1
|
EU support
|
0.52
|
0.29
|
0.00
|
0.30
|
0.50
|
0.70
|
1
|
Migration support
|
0.48
|
0.27
|
0.00
|
0.30
|
0.50
|
0.70
|
1
|
Distance to Vaccination Center
|
0.13
|
0.10
|
0.00
|
0.06
|
0.11
|
0.18
|
1
|
Pairwise correlations.
png
2
Around 65 % in the sample would get vaccinated, 17 % would not, and 18 % remain undecided.
fig_1 <- df %>% ggplot(aes(status)) +
geom_hline(yintercept=0.7, linetype="longdash", lwd=0.35, colour = "#B55555") +
geom_bar(aes(y = (..count..)/sum(..count..)),width = 0.6,alpha = 0.5) +
scale_y_continuous(labels=scales::percent) +
theme_bw(base_size=16)+
theme(axis.title.y = element_blank())+
ylab("Percent (%)")+
coord_flip()
fig_1
pdf(fig_1_path, height = 3, width = 6)
fig_1
dev.off()
png
2
Concerns of the hesitent and undecided
fig_2 <-
df %>% dplyr::select(ID, starts_with("fear.")) %>%
mutate_if(is.character, as.numeric) %>%
melt(variable.name = "Group", id.vars = "ID") %>%
mutate(Group =
factor(Group,
c("fear.longterm", "fear.admission", "fear.effectiveness", "fear.side.effects", "fear.no.danger"),
c("Longterm Consequences", "Doubt approval process", "Doubt effectiveness", "Side Effects", "Covid not dangerous")))%>%
group_by(Group) %>%
summarize(mean = 100*mean(value, na.rm = TRUE)) %>%
ggplot() +
geom_bar(aes(Group, mean), stat = "identity", width = 0.6,alpha = 0.5)+
theme_bw(base_size=18)+
ylab("Percent (%)")+
theme(axis.title.y = element_blank())+
coord_flip()
fig_2
pdf(fig_2_path, height = 4, width = 8)
fig_2
dev.off()
png
2
Correlates of Hesitancy
family_order <- var_list %>% group_by(family) %>% slice(1) %>% arrange(Family_order) %>% pull(family)
correlates <-
list("acceptant", "refusing", "undecided") %>%
lapply(function(y)
lapply(setdiff(covariate_names, tolower(statuses)), function(x)
lm_robust(as.formula(paste(y, "~", x)), data = df) %>% tidy %>% dplyr::mutate(y=y, x=x)) %>% bind_rows) %>% bind_rows %>% dplyr::filter(term!="(Intercept)" & !(x %in% c("acceptant", "refusing", "undecided"))) %>%
dplyr::mutate(x = factor(x, rev(var_list$new_name), rev(var_list$label))) %>%
left_join(var_list %>% dplyr::rename(term = new_name) %>% dplyr::select(term, family)) %>%
dplyr::mutate(family = factor(family, family_order))
fig_cov <-
correlates %>%
dplyr::mutate(y = factor(y, c("acceptant", "undecided", "refusing" ))) %>%
dplyr::filter(x!="federal.state") %>%
ggplot(aes(estimate , x)) + geom_point() + facet_grid(family ~ y, scales = "free_y") + ylab("") + xlab("How much more or less likely to be [acceptant / undecided / refusing] at baseline") +
geom_vline(xintercept = 0, color = "red") +
theme_bw() +
geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = .25)
fig_cov
pdf(correlates_plot_path, height =9, width = 8)
fig_cov
dev.off()
png
2
Main Results
Background
We conduct analysis of a 2x2x3 factorial survey experiment, with factors assigned with independent probabilities. Each respondent receives two vignettes successively. Our population of interest consists of all German citizens over the age of 18. For this study, we rely on a representative sample of 20,000 citizens across Germany. The online-survey was rolled out between March 12 and March 14, 2021.
Negative incentives |
Control: There are no special regulations for vaccinated people even when the Corona incidence is high. For example, they cannot travel again, visit cinemas, restaurants or concerts and are still subject to contact restrictions. |
|
Treatment: Special regulations apply to vaccinated people. For example, even when the Corona incidence is high, they can travel again, visit cinemas, restaurants or concerts and are not subject to any contact restrictions |
Reduction Transaction Cost |
Control: Eligible citizens can get vaccinated against Corona at the nearest vaccination center, but not from their family doctor. |
|
Treatment: Eligible citizens can have themselves vaccinated against Corona at the nearest vaccination center or their family doctor. |
Financial renumeration |
Control: Citizens who are vaccinated will not receive any allowance after receiving the vaccination. |
|
Treatment: Citizens who get vaccinated receive an expense allowance of {Level: 25} euros after receiving the vaccination. |
|
Treatment: Citizens who get vaccinated receive an expense allowance of {Level: 50} euros after receiving the vaccination. |
Check strategy
We demonstrate that first differences and fixed effects effects analyses produce essentially identical results.
# Analysis
lm_robust(
outcome_diff ~ negative_diff + financialA_diff + financialB_diff + transaction_diff,
data=df, se_type = "stata") %>%
tidy %>%
kable(caption = "Basic first differences analysis", digits = 2, booktabs = TRUE)
Basic first differences analysis
term
|
estimate
|
std.error
|
statistic
|
p.value
|
conf.low
|
conf.high
|
df
|
outcome
|
(Intercept)
|
0.00
|
0
|
-3.43
|
0
|
-0.01
|
0.00
|
20495
|
outcome_diff
|
negative_diff
|
0.03
|
0
|
15.52
|
0
|
0.02
|
0.03
|
20495
|
outcome_diff
|
financialA_diff
|
0.01
|
0
|
5.40
|
0
|
0.01
|
0.01
|
20495
|
outcome_diff
|
financialB_diff
|
0.02
|
0
|
11.30
|
0
|
0.02
|
0.03
|
20495
|
outcome_diff
|
transaction_diff
|
0.03
|
0
|
17.99
|
0
|
0.03
|
0.03
|
20495
|
outcome_diff
|
lm_robust(
outcome ~ negative + financialA + financialB + transaction,
data=df_stacked, se_type = "stata", fixed_effects = ~ID) %>%
tidy %>%
kable(caption = "Basic Fixed effects analysis", digits = 3, booktabs = TRUE)
Basic Fixed effects analysis
term
|
estimate
|
std.error
|
statistic
|
p.value
|
conf.low
|
conf.high
|
df
|
outcome
|
negative
|
0.025
|
0.002
|
15.500
|
0
|
0.022
|
0.028
|
20496
|
outcome
|
financialA
|
0.010
|
0.002
|
5.441
|
0
|
0.006
|
0.013
|
20496
|
outcome
|
financialB
|
0.022
|
0.002
|
11.257
|
0
|
0.018
|
0.026
|
20496
|
outcome
|
transaction
|
0.030
|
0.002
|
18.113
|
0
|
0.027
|
0.033
|
20496
|
outcome
|
# The analysis function takes a dataframe, demeans within the (differenced) Xs
our_analysis <- function(dff)
lm_robust(
outcome_diff ~ negative*financialA*transaction + negative*financialB*transaction,
data = dff %>%
mutate(negative = negative_diff - mean(negative_diff),
financialA = financialA_diff- mean(financialA_diff),
financialB = financialB_diff - mean(financialB_diff),
transaction = transaction_diff - mean(transaction_diff)),
se_type = "stata")
our_analysis(df) %>% tidy %>%
kable(caption = "Basic Fixed effects analysis with interactions", digits = 3)
Basic Fixed effects analysis with interactions
term
|
estimate
|
std.error
|
statistic
|
p.value
|
conf.low
|
conf.high
|
df
|
outcome
|
(Intercept)
|
-0.004
|
0.001
|
-3.752
|
0.000
|
-0.006
|
-0.002
|
20488
|
outcome_diff
|
negative
|
0.025
|
0.002
|
15.214
|
0.000
|
0.022
|
0.028
|
20488
|
outcome_diff
|
financialA
|
0.010
|
0.002
|
5.343
|
0.000
|
0.006
|
0.013
|
20488
|
outcome_diff
|
transaction
|
0.030
|
0.002
|
18.078
|
0.000
|
0.026
|
0.033
|
20488
|
outcome_diff
|
financialB
|
0.022
|
0.002
|
11.236
|
0.000
|
0.018
|
0.026
|
20488
|
outcome_diff
|
negative:financialA
|
0.006
|
0.003
|
2.092
|
0.036
|
0.000
|
0.012
|
20488
|
outcome_diff
|
negative:transaction
|
0.002
|
0.003
|
0.734
|
0.463
|
-0.003
|
0.007
|
20488
|
outcome_diff
|
financialA:transaction
|
-0.001
|
0.003
|
-0.385
|
0.700
|
-0.006
|
0.004
|
20488
|
outcome_diff
|
negative:financialB
|
0.006
|
0.003
|
1.820
|
0.069
|
0.000
|
0.012
|
20488
|
outcome_diff
|
transaction:financialB
|
-0.002
|
0.003
|
-0.595
|
0.552
|
-0.007
|
0.004
|
20488
|
outcome_diff
|
negative:financialA:transaction
|
-0.003
|
0.004
|
-0.609
|
0.542
|
-0.011
|
0.006
|
20488
|
outcome_diff
|
negative:transaction:financialB
|
-0.006
|
0.005
|
-1.294
|
0.196
|
-0.015
|
0.003
|
20488
|
outcome_diff
|
Main analysis
statuses <- unique(df$status)
model_list <- lapply(statuses, function(j) our_analysis(df %>% dplyr::filter(status == j)))
names(model_list) <- statuses
model_list$All <- our_analysis(df)
by_status <-
lapply(model_list, function(model) tidy(model)) %>%
bind_rows(.id = "status") %>%
mutate(status = factor(status, c("All", statuses)))
# Table version
fileConn <- file(tab_2_path)
writeLines(texreg(model_list, float.pos = "h!", include.ci = FALSE, caption = "Main results, with interactions. Individual fixed effects modelled by regressing differences in responses across two vignetters on differences in treatments and interactions. All treatments are centered on zero for each group.",
custom.coef.map = list("(Intercept)" = "Constant (No incentives)",
"financialA" = "25 Euro incentive",
"financialB" = "50 Euro incentive",
"negative" = "Freedoms",
"transaction" = "Local doctors",
"negative:financialA" = "Freedoms * 25 Euros",
"negative:financialB" = "Freedoms * 50 Euros",
"negative:transaction" = "Freedoms * Local doctors",
"financialA:transaction" = "Local doctors * 25 Euros",
"transaction:financialB" = "Local doctors * 50 Euros",
"negative:financialA:transaction" = "Freedoms * Local doctors * 25 Euros",
"negative:transaction:financialB" ="Freedoms * Local doctors * 50 Euros"), digits = 3), fileConn)
close(fileConn)
# Figure Version
figure_1A <-
by_status %>%
dplyr::filter(term %in% c("negative", "financialA", "financialB", "transaction")) %>%
dplyr::filter(status != "Vaccinated") %>%
mutate(Treatment = factor(term, treatment_levels, treatment_labels)) %>%
ggplot(aes(Treatment, estimate, color = status, shape = status)) +
geom_point(position = position_dodge(width = 0.3)) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") +
theme_bw(base_size=14)+
ylab("Estimated effect on prob vaccination (0-1)") + theme(legend.position="bottom")
figure_1A
Additional vaccinated subgroup
# Figure Version
figure_subgroups <-
by_status %>%
dplyr::filter(term %in% c("negative", "financialA", "financialB", "transaction")) %>%
#dplyr::filter(status != "Vaccinated") %>%
mutate(Treatment = factor(term, treatment_levels, treatment_labels)) %>%
ggplot(aes(Treatment, estimate, color = status, shape = status)) +
geom_point(position = position_dodge(width = 0.3)) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") +
theme_bw(base_size=14)+
ylab("Change in probability of willingness to get vaccinated (0-1)")
figure_subgroups
By round
# analysis by period
df_stacked2 <- bind_rows(
df_stacked %>% dplyr::filter(status != "Vaccinated"),
df_stacked %>% dplyr::filter(status != "Vaccinated") %>% mutate(status = "All"))
out <- lapply(1:2, function(period)
df_stacked2 %>% dplyr::filter(t==period) %>%
split(.$status) %>%
map(~lm_robust(outcome ~ negative + financialA + financialB + transaction, data = .)) %>%
map(tidy) %>%
bind_rows(.id = "status") %>% mutate(t=period)) %>%
bind_rows() %>%
dplyr::filter(term %in% treatments) %>%
dplyr::filter(status != "Vaccinated") %>%
mutate(t = factor(t)) %>%
mutate(status = factor(status, c("All", "Acceptant", "Refusing", "Undecided"))) %>%
mutate(term = factor(term, treatments, treatment_labels))
out %>%
ggplot(aes(term, estimate, color = t, shape = t)) +
geom_point(position = position_dodge(width = 0.3)) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1) +
geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") +
theme_bw(base_size=14)+ facet_wrap(~status, ncol = 2) +
ylab("Effect on vaccination probability (0-1)")
# Analysis first round
first <-
df %>%
split(.$status) %>%
map(~lm_robust(outcome_t1 ~ negative_t1 + financialA_t1 + financialB_t1 + transaction_t1, data = .)) %>%
map(tidy) %>%
bind_rows(.id = "status")%>%
dplyr::filter(term %in% c("negative_t1", "financialA_t1", "financialB_t1", "transaction_t1")) %>%
dplyr::filter(status != "Vaccinated")
first<-first %>%
ggplot(aes(term, estimate, color = status, shape = status)) +
geom_point(position = position_dodge(width = 0.3)) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") +
theme_bw(base_size=14)+
ylab("Change in probability of willingness to get vaccinated (0-1)")+
ggtitle("First Vignette")
# Analysis second round
second<-df %>%
split(.$status) %>%
map(~lm_robust(outcome_t2~ negative_t2 + financialA_t2 + financialB_t2 + transaction_t2, data = .)) %>%
map(tidy) %>%
bind_rows(.id = "status")%>%
dplyr::filter(term %in% c("negative_t2", "financialA_t2", "financialB_t2", "transaction_t2")) %>%
dplyr::filter(status != "Vaccinated") %>%
ggplot(aes(term, estimate, color = status, shape = status)) +
geom_point(position = position_dodge(width = 0.3)) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1)+
geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") +
theme_bw(base_size=14)+
ylab("Change in probability of willingness to get vaccinated (0-1)")+
ggtitle("Second Vignette")
# Analysis first round
ggarrange(first, second)
Consistency bias
# effect of lag:
# Note that lag treatment is negatively correlated with current treatment
lag_out <-
bind_rows(
df %>% dplyr::filter(status != "Vaccinated"),
df %>% dplyr::filter(status != "Vaccinated") %>% mutate(status = "All")) %>%
split(.$status) %>%
map(~lm_robust(outcome_t2 ~ negative_t1 + financialA_t1 + financialB_t1 + transaction_t1, data = .)) %>%
map(tidy) %>%
bind_rows(.id = "status") %>%
dplyr::filter(term %in% paste0(treatments, "_t1")) %>%
dplyr::filter(status != "Vaccinated") %>%
mutate(status = factor(status, c("All", "Acceptant", "Refusing", "Undecided"))) %>%
mutate(term = factor(term, paste0(treatments, "_t1"), treatment_labels))
lag_out %>%
ggplot(aes(term, estimate, color = status, shape = status)) +
geom_point(position = position_dodge(width = 0.3)) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1) +
geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") +
theme_bw(base_size=14)+
ylab("Effect of t1 treatment on t2 vaccination probability (0-1)")
c(
cor(df$financialA_t1, df$financialA_t2),
cor(df$financialB_t1, df$financialB_t2),
cor(df$negative_t1, df$negative_t2),
cor(df$transaction_t1, df$transaction_t2))
[1] -0.12590611 -0.07113368 -0.13421823 -0.06858775
Accounting for current:
lm_robust(outcome_t2 ~
negative_t1 + financialA_t1 + financialB_t1 + transaction_t1 +
financialA_t2*transaction_t2*negative_t2+
financialB_t2*transaction_t2*negative_t2,
data = df %>% dplyr::filter(status != "Vaccinated")) %>% tidy %>%
kable(digits = 2)
term
|
estimate
|
std.error
|
statistic
|
p.value
|
conf.low
|
conf.high
|
df
|
outcome
|
(Intercept)
|
0.66
|
0.01
|
60.95
|
0.00
|
0.64
|
0.68
|
19140
|
outcome_t2
|
negative_t1
|
0.01
|
0.01
|
1.64
|
0.10
|
0.00
|
0.02
|
19140
|
outcome_t2
|
financialA_t1
|
-0.01
|
0.01
|
-1.46
|
0.14
|
-0.02
|
0.00
|
19140
|
outcome_t2
|
financialB_t1
|
-0.01
|
0.01
|
-0.95
|
0.34
|
-0.02
|
0.01
|
19140
|
outcome_t2
|
transaction_t1
|
0.01
|
0.01
|
1.80
|
0.07
|
0.00
|
0.02
|
19140
|
outcome_t2
|
financialA_t2
|
0.00
|
0.01
|
0.22
|
0.83
|
-0.02
|
0.03
|
19140
|
outcome_t2
|
transaction_t2
|
0.05
|
0.01
|
3.75
|
0.00
|
0.02
|
0.07
|
19140
|
outcome_t2
|
negative_t2
|
0.02
|
0.01
|
1.53
|
0.13
|
-0.01
|
0.04
|
19140
|
outcome_t2
|
financialB_t2
|
0.02
|
0.01
|
1.92
|
0.06
|
0.00
|
0.05
|
19140
|
outcome_t2
|
financialA_t2:transaction_t2
|
-0.02
|
0.02
|
-0.96
|
0.34
|
-0.05
|
0.02
|
19140
|
outcome_t2
|
financialA_t2:negative_t2
|
0.01
|
0.02
|
0.42
|
0.67
|
-0.03
|
0.04
|
19140
|
outcome_t2
|
transaction_t2:negative_t2
|
0.00
|
0.02
|
0.21
|
0.83
|
-0.03
|
0.04
|
19140
|
outcome_t2
|
transaction_t2:financialB_t2
|
-0.01
|
0.02
|
-0.40
|
0.69
|
-0.04
|
0.03
|
19140
|
outcome_t2
|
negative_t2:financialB_t2
|
0.00
|
0.02
|
0.04
|
0.97
|
-0.03
|
0.04
|
19140
|
outcome_t2
|
financialA_t2:transaction_t2:negative_t2
|
0.00
|
0.03
|
-0.03
|
0.98
|
-0.05
|
0.05
|
19140
|
outcome_t2
|
transaction_t2:negative_t2:financialB_t2
|
-0.02
|
0.03
|
-0.87
|
0.38
|
-0.07
|
0.03
|
19140
|
outcome_t2
|
lag_out_2 <-
bind_rows(
df %>% dplyr::filter(status != "Vaccinated"),
df %>% dplyr::filter(status != "Vaccinated") %>% mutate(status = "All")) %>%
split(.$status) %>%
map(~lm_robust(outcome_t2 ~ negative_t1 + financialA_t1 + financialB_t1 + transaction_t1 +
financialA_t2*transaction_t2*negative_t2+
financialB_t2*transaction_t2*negative_t2, data = .)) %>%
map(tidy) %>%
bind_rows(.id = "status") %>%
dplyr::filter(term %in% paste0(treatments, "_t1")) %>%
dplyr::filter(status != "Vaccinated") %>%
mutate(status = factor(status, c("All", "Acceptant", "Refusing", "Undecided"))) %>%
mutate(term = factor(term, paste0(treatments, "_t1"), treatment_labels))
lag_out_2 %>%
ggplot(aes(term, estimate, color = status, shape = status)) +
geom_point(position = position_dodge(width = 0.3)) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(width = 0.3), width = .1) +
geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") +
theme_bw(base_size=14)+
ylab("Effect of t1 treatment on t2 vaccination probability (0-1)") + ylim(c(NA, .075))
Check for change in levels. Did average acceptance go up between measurements?
lm_robust(outcome_diff ~ 1, data = df) %>% tidy %>% kable(digits = 2)
term
|
estimate
|
std.error
|
statistic
|
p.value
|
conf.low
|
conf.high
|
df
|
outcome
|
(Intercept)
|
0
|
0
|
-3.61
|
0
|
-0.01
|
0
|
20499
|
outcome_diff
|
Omnibus treatment
The all or nothing comparison uses a small share of data, but shows evidence of a 14 point effect for the undecided.
Note that no fixed effects used here since we generally have only one observation per person.
data_omnibus <- lapply(statuses, function(s)
df_stacked %>%
mutate(
omnibus = negative * financialB * transaction,
zip = (1-negative) * (1-financialA) * (1-financialB) * (1-transaction)) %>%
dplyr::filter((omnibus==1) | (zip==1)) %>%
dplyr::filter(status == s))
names(data_omnibus) <- statuses
lapply(data_omnibus, function(d)
table(table(d$ID))) %>% bind_cols %>% mutate(freq = 1:2) %>% relocate(freq) %>%
kable(caption = "Most subjects appear only once", booktabs = TRUE)
Most subjects appear only once
freq
|
Acceptant
|
Refusing
|
Vaccinated
|
Undecided
|
1
|
3772
|
983
|
422
|
1105
|
2
|
154
|
49
|
21
|
43
|
omnibus <- lapply(data_omnibus, function(d) lm_robust(outcome ~ omnibus, data = d, cluster = ID, se_type = "stata"))
omnibus[[4]] %>% tidy %>%
kable(digits = 2, caption = "All or nothing comparisons", booktabs = TRUE)
All or nothing comparisons
term
|
estimate
|
std.error
|
statistic
|
p.value
|
conf.low
|
conf.high
|
df
|
outcome
|
(Intercept)
|
0.40
|
0.01
|
43.83
|
0
|
0.38
|
0.41
|
1147
|
outcome
|
omnibus
|
0.13
|
0.01
|
10.23
|
0
|
0.11
|
0.16
|
1147
|
outcome
|
fileConn<-file(tab_3_path)
writeLines(texreg(omnibus, float.pos = "h!", include.ci = FALSE, caption = "\\label{omnibus} Effects of receiving all treatments compared to all control conditions", custom.coef.map = list("(Intercept)" = "Constant (No incentives)","omnibus"
= "Maximal incentives")), fileConn)
close(fileConn)
Aggregate effects
policy_experiments <- list(
experiment_1 =
df_stacked %>%
mutate(
Z = negative * financialB * transaction,
zip = (1-negative) * (1-financialA) * (1-financialB) * (1-transaction)) %>%
dplyr::filter((Z==1) | (zip==1)) %>%
group_by(Z) %>%
mutate(N = n()) %>% ungroup() %>%
group_by(status, Z) %>%
summarize(n = n()/mean(N), share_pop_vaccinated = n()*mean(outcome)/mean(N)),
experiment_2 =
df_stacked %>%
dplyr::filter(financialA==0 & negative ==1 & transaction ==1) %>%
group_by(financialB) %>%
mutate(N = n()) %>% ungroup() %>%
group_by(status, financialB) %>%
summarize(n = n()/mean(N), share_pop_vaccinated = n()*mean(outcome)/mean(N)) %>% arrange(financialB) %>% mutate(Z = financialB)) %>% bind_rows(.id = "experiment") %>%
mutate(
Z =
(experiment == "experiment_1") * Z +
(experiment == "experiment_2") * (Z + .5),
Z = factor(Z, c(0, .5, 1), c("No incentives", "Local doctors + Freedoms", "All incentives"))) %>%
dplyr::filter(!is.na(Z))
# set colors to be consistent
group.colors <- c(Acceptant = "#7CAE00", Refusing = "#00BFC4", Undecided ="#C77CFF", Vaccinated = "#F8766D")
figure_1B <- policy_experiments %>%
ggplot(aes(fill=status, y=share_pop_vaccinated, x=Z)) +
geom_bar(position="stack", stat="identity",width = 0.5,alpha = 0.7) +
scale_fill_manual(values=group.colors)+
ylab("Share of population vaccinated") +
theme_bw(base_size=14)+
ylim(0:1) +
xlab("Incentives") + theme(legend.position="bottom")
figure_1B
policy_experiments %>% group_by(Z) %>% arrange(status) %>% mutate(cumul = cumsum(share_pop_vaccinated)) %>% slice(n())
# A tibble: 3 x 7
# Groups: Z [3]
experiment status Z n share_pop_vaccin~ financialB cumul
<chr> <chr> <fct> <dbl> <dbl> <int> <dbl>
1 experiment_1 Vaccinated No incentiv~ 0.0674 0.0582 NA 0.674
2 experiment_2 Vaccinated Local docto~ 0.0678 0.0606 0 0.738
3 experiment_1 Vaccinated All incenti~ 0.0687 0.0636 NA 0.750
Export Fig 1
Fig 1 in the paper has two panels. Exported here.
fig_1_AB <- ggarrange(
figure_1A + theme(plot.margin=unit(c(1,1,1.5,1.2),"cm")),
figure_1B + theme(plot.margin=unit(c(1,1,1.5,1.2),"cm")),
labels = c("A", "B"),
ncol = 2, nrow = 1)
pdf(fig_1_AB_path, width = 12.5, height = 6)
fig_1_AB
dev.off()
png
2
fig_1_AB
Notes
Share of hesitant, undecided, that are AFD supports or support no party
df %>%
group_by(status) %>%
summarize(AfD = mean(AfD),
no_party = mean(No.party),
trust_experts = mean(trust.experts),
media_exaggerates = mean(covid.media),
fear_longterm = mean(fear.longterm, na.rm = TRUE)) %>%
kable(digits = 2, booktabs = TRUE)
status
|
AfD
|
no_party
|
trust_experts
|
media_exaggerates
|
fear_longterm
|
Acceptant
|
0.03
|
0.21
|
0.71
|
0.33
|
NaN
|
Refusing
|
0.20
|
0.37
|
0.42
|
0.72
|
0.64
|
Undecided
|
0.08
|
0.39
|
0.56
|
0.51
|
0.68
|
Vaccinated
|
0.04
|
0.24
|
0.68
|
0.38
|
NaN
|
Heterogeneity
For heterogeneity analysis we draw on the Causal Forests approach described in Athey, Tibshrani and Wager (Annals of Statistics, 2019) , Nie and Wager, 2017
Helpers
A function to prep heterogeneous effects data for a given treatment, removing others.
num_tiles <- 4 # ntiles = CATE is above / below the median
# Note this is set up so that we can swap in the difference outcome easily
het_df <- function(treatment_name = "transaction_diff",
outcome_name = "outcome_diff",
data = df) {
data$W = data[treatment_name][[1]]
data$Y = data[outcome_name][[1]]
data %>%
dplyr::select(Y, W, ID, all_of(covariate_names)) %>%
drop_na() %>%
mutate_if(is.factor, as.numeric) %>%
# mutate(federal.state = as.numeric(factor(federal.state))) %>%
# Trick to render W binary
dplyr::mutate(
Y = ifelse(W<0, -Y, Y),
W = W^2)
}
A function to fit the forest and save outputs: input dataframe should have a privileged Y and W.
# heterogeneous treatment effects
f_cf <- function(df, n_trees = 1000) {
# rule of thumb: num.trees = number of individuals
df_forests <- sample_frac(df, replace=FALSE, size=train_fraction)
X <- as.matrix(df_forests[, covariate_names])
cf <-
causal_forest(
X = X,
Y = df_forests$Y,
W = df_forests$W,
num.trees=n_trees)
### Predict point estimates and standard errors (training set, out-of-bag)
oob_pred <- predict(cf, estimate.variance=TRUE)
oob_tauhat_cf <- oob_pred$predictions
oob_tauhat_cf_se <- sqrt(oob_pred$variance.estimates)
var_imp <- c(variable_importance(cf))
names(var_imp) <- covariate_names
var_imp <- var_imp %>% sort(decreasing=TRUE)
df_forests$cate <- oob_tauhat_cf
df_forests$ntile <- factor(ntile(oob_tauhat_cf, n=num_tiles))
# Standard model estimates by quantile
estimated_sample_ate <-
lm_robust(Y ~ ntile + ntile:W, data=df_forests) %>%
tidy() %>%
dplyr::filter(stringr::str_detect(term, ":W"))
# AIPW estimates by quantile
estimated_aipw_ate <-
lapply(
seq(num_tiles), function(w)
average_treatment_effect(cf, subset = df_forests$ntile == w)
) %>% bind_rows
combined_estimates <-
bind_rows(
estimated_sample_ate %>% mutate(type = "lm_robust") %>% dplyr::select(-outcome, -df, -statistic, - p.value),
estimated_aipw_ate %>% rename(std.error = std.err) %>%
mutate(
type = "aipw",
term = estimated_sample_ate$term,
conf.low = estimate - 1.96*std.error,
conf.high = estimate + 1.96*std.error)
)
# Outputs
list(cf = cf,
df_forests = df_forests,
X = X,
oob_tauhat_cf = oob_tauhat_cf,
var_imp = var_imp,
ntile_estimates = combined_estimates)
}
A function to generate fitted values.
fitted_vals <- function(var_of_interest, model = test){
df_forests <- model$df_forests
cf <- model$cf
is_continuous <- (length(unique(df_forests[var_of_interest][[1]])) > 5) # crude rule for determining continuity
if(is_continuous) {
x_grid <- quantile(df_forests[var_of_interest][[1]], probs = seq(0, 1, length.out = 5))
} else {
x_grid <- sort(unique(df_forests[var_of_interest][[1]]))
}
df_grid <- setNames(data.frame(x_grid), var_of_interest)
other_covariates <- covariate_names[!covariate_names %in% var_of_interest]
df_median <- df_forests %>% dplyr::select(all_of(other_covariates)) %>% summarise_all(median)
df_eval <- crossing(df_median, df_grid)
pred <- predict(cf, newdata=df_eval[,covariate_names], estimate.variance=TRUE)
df_eval$tauhat <- pred$predictions
df_eval$se <- sqrt(pred$variance.estimates)
# Change to factor so the plotted values are evenly spaced (e.g. logicals)
df_eval %>% arrange(var_of_interest) %>%
mutate(var_of_interest = as.factor(as.numeric(df_eval[var_of_interest][[1]])))
}
Causal Forests
Implementation
cf_all <-
lapply(treatment_levels_diff, function(x) het_df(x, outcome_name = "outcome_diff") %>% f_cf(n_trees=N_trees))
names(cf_all) <- treatment_levels_diff
cf_undecided <-
lapply(treatment_levels_diff, function(x)
het_df(x, outcome_name = "outcome_diff", dplyr::filter(df, undecided == 1)) %>%
f_cf(n_trees=N_trees))
names(cf_undecided) <- treatment_levels_diff
cf_hesitant<-
lapply(treatment_levels_diff, function(x)
het_df(x, outcome_name = "outcome_diff", dplyr::filter(df, refusing == 1)) %>%
f_cf(n_trees=N_trees))
names(cf_hesitant) <- treatment_levels_diff
Most common predictors
lapply(cf_all, function(j) names(j$var_imp[1:6])) %>% bind_rows(.id = "treatment") %>% kable(caption = "Strongest predictions", booktabs = TRUE)
Strongest predictions
financialA_diff
|
financialB_diff
|
negative_diff
|
transaction_diff
|
distance_km
|
age2
|
age2
|
distance_km
|
risk
|
solidarity
|
support.distance
|
age2
|
age2
|
days
|
undecided
|
solidarity
|
trust
|
distance_km
|
covid.media
|
EU.support
|
days
|
trust
|
days
|
undecided
|
left.right
|
undecided
|
distance_km
|
health
|
lapply(cf_undecided, function(j) names(j$var_imp[1:6])) %>% bind_rows(.id = "treatment") %>% kable(caption = "Most common predictors, undecided ", booktabs = TRUE)
Most common predictors, undecided
financialA_diff
|
financialB_diff
|
negative_diff
|
transaction_diff
|
risk
|
age2
|
age2
|
age2
|
trust
|
days
|
support.distance
|
solidarity
|
distance_km
|
distance_km
|
covid.infection.proximity
|
distance_km
|
covid.income
|
trust
|
days
|
EU.support
|
age2
|
risk
|
health
|
international.solidarity
|
days
|
migration.support
|
distance_km
|
eduyears
|
lapply(cf_hesitant, function(j) names(j$var_imp[1:6])) %>% bind_rows(.id = "treatment") %>% kable(caption = "Most common predictors, refusing ", booktabs = TRUE)
Most common predictors, refusing
financialA_diff
|
financialB_diff
|
negative_diff
|
transaction_diff
|
distance_km
|
distance_km
|
days
|
days
|
age2
|
age2
|
distance_km
|
distance_km
|
left.right
|
solidarity
|
age2
|
age2
|
migration.support
|
days
|
trust.country
|
migration.support
|
trust.media
|
risk
|
left.right
|
solidarity
|
risk
|
EU.support
|
EU.support
|
covid.income
|
what_matters <-
list(All = cf_all, Undecided = cf_undecided, Hesitant = cf_hesitant) %>%
lapply(function(res)
lapply(res, function(j) j$var_imp %>% t %>% data.frame) %>%
bind_rows %>% mutate(treatment = names(cf_all)) %>%
gather(covariate, "value", - treatment)) %>% bind_rows(.id = "group")
what_matters_plot <-
what_matters %>%
mutate(
covariate = factor(covariate, rev(var_list$new_name), rev(var_list$label)),
treatment = factor(treatment, treatment_levels_diff, treatment_labels)) %>%
ggplot(aes(value, covariate, color = treatment)) +
geom_point()+
scale_x_continuous(name="Variable Importance")+
theme_bw() + facet_wrap(~group) + ylab(" ")
what_matters_plot
pdf(fig_3_path, width = 9, height = 9)
what_matters_plot
dev.off()
png
2
Variation across ntiles
# plot
lapply(cf_all, function(j) j$ntile_estimates) %>%
bind_rows(.id = "treatment") %>%
ggplot() +
geom_pointrange(
aes(x = term, y = estimate, ymax = conf.high, ymin = conf.low, color = type),
size = 0.5,
position = position_dodge(width = .5)) +
geom_errorbar(aes(x = term, ymax = conf.high, ymin = conf.low, color = type),
width = 0.4,
size = 0.75,
position = position_dodge(width = .5)) +
theme_bw() +
labs(x = "N-tile", y = "ATE Estimate", title = "ATE within N-tiles") +
facet_wrap(~treatment)
Graph heterogeneous effects
vars <- c("days", "age2", "undecided", "solidarity", "support.distance","distance_km")
all_marginals <-
lapply(vars, function(x)
lapply(cf_all , function(model)
fitted_vals(x, model = model) %>%
mutate(var = x)
) %>% bind_rows(.id = "treatment")) %>%
bind_rows %>%
mutate(var = factor(var, vars, c("Days", "Age (Percentile)", "Undecided", "Solidarity", "Support distancing","Distance to Vaccination Center")),
treatment =
factor(treatment,treatment_levels_diff, treatment_labels),
var_of_interest =
factor(as.numeric(paste(var_of_interest)),
sort(unique(as.numeric(paste(var_of_interest))))))
figure_4 <-
all_marginals %>%
mutate(ymin_val = tauhat-1.96*se) %>%
mutate(ymax_val = tauhat+1.96*se) %>%
ggplot() +
geom_line(aes_string(x="var_of_interest", y="tauhat", group = 1), color="red") +
geom_errorbar(aes_string(x="var_of_interest",ymin="ymin_val", ymax="ymax_val", width=.2),color="blue") +
geom_hline(yintercept=0, linetype="longdash", lwd=0.35, colour = "#B55555") +
ylab("Predicted Treatment Effect") +
theme_bw() +
theme(axis.ticks = element_blank()) +
facet_grid(treatment ~ var, scales = "free_x") +
xlab("quantiles")
figure_4
pdf(fig_4_path, width = 8, height = 6)
figure_4
dev.off()
png
2
Best linear projections
Using causal forests functions:
blps <-
lapply(
list(all = cf_all, undecided = cf_undecided, refusing = cf_hesitant),
function(set) lapply(set, function(model)
lapply(1:length(covariate_names), function(i)
best_linear_projection(model$cf, model$X[,i]) %>%
tidy %>%
dplyr::filter(term!="(Intercept)") %>%
dplyr::mutate(var = covariate_names[i])) %>%
bind_rows()) %>%
bind_rows(.id = "treatment")) %>% bind_rows(.id = "set") %>%
dplyr::mutate(treatment = factor(treatment, treatment_levels_diff, treatment_labels))
# importance
top <- 3
most_common <- c(lapply(cf_all, function(j) names(j$var_imp[1:top])) %>% bind_rows(.id = "treatment") %>% unlist(),
lapply(cf_undecided, function(j) names(j$var_imp[1:top])) %>% bind_rows(.id = "treatment") %>% unlist(),
lapply(cf_hesitant, function(j) names(j$var_imp[1:top])) %>% bind_rows(.id = "treatment") %>% unlist()) %>% unique()
# add in promised measures from PAP
most_common <- unique(c(most_common, "AfD", "risk", "trust"))
blps_plot <-
blps %>% dplyr::filter(var %in% most_common) %>%
mutate(var = factor(var, rev(var_list$new_name), rev(var_list$label))) %>%
ggplot(aes(estimate, var, color = set)) +
geom_point(position=position_dodge(width = .5)) +
geom_errorbar(aes(xmin = estimate - 1.96*std.error, xmax = estimate + 1.96*std.error), width = .25, position=position_dodge(width = .5)) +
facet_grid(~treatment) +
geom_vline(xintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") +
theme_bw() +
ylab("")
blps_plot
pdf(blps_plot_path, height = 6, width = 9)
blps_plot
dev.off()
png
2
Using lm_robust
:
lm_interactions <-
lapply(
list(all = df, undecided = dplyr::filter(df, undecided==1), refusing = dplyr::filter(df, refusing==1)),
function(dff) lapply(treatment_levels_diff, function(X)
lapply(1:length(covariate_names), function(i)
lm_robust(as.formula(paste("outcome_diff~", X, "*", covariate_names[i])), data = dff) %>%
tidy %>% slice(4) %>%
mutate(var = covariate_names[i], treatment = X)) %>%
bind_rows()) %>%
bind_rows()) %>% bind_rows(.id = "set") %>%
mutate(treatment = factor(treatment, treatment_levels_diff, treatment_labels))
lm_plot <-
lm_interactions %>% dplyr::filter(var %in% most_common) %>%
dplyr::filter(!(var == "undecided" & set == "undecided")) %>%
dplyr::filter(!(var == "refusing" & set == "refusing")) %>%
mutate(var = factor(var, rev(var_list$new_name), rev(var_list$label))) %>%
ggplot(aes(estimate, var, color = set)) +
geom_point(position=position_dodge(width = .5)) +
geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = .25, position=position_dodge(width = .5)) +
facet_grid(~treatment) +
geom_vline(xintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") +
theme_bw() +
ylab("")
lm_plot
pdf(lm_plot_path, height = 6, width = 9)
lm_plot
dev.off()
png
2
Marginal effects for undecided
v2 <- c("age2", "solidarity", "support.distance", "trust", "trust.media")
v2labs <- c("Age (Percentile)", "Solidarity", "Support distancing","General trust", "Trust media")
figure_8 <-
lapply(v2, function(x)
lapply(cf_undecided , function(model)
fitted_vals(x, model = model) %>%
mutate(var = x)
) %>% bind_rows(.id = "treatment")) %>%
bind_rows %>%
mutate(var = factor(var, v2, v2labs),
treatment = factor(treatment,
paste0(treatment_levels_diff),
treatment_labels),
var_of_interest =
factor(as.numeric(paste(var_of_interest)),
sort(unique(as.numeric(paste(var_of_interest)))))) %>%
mutate(ymin_val = tauhat-1.96*se) %>%
mutate(ymax_val = tauhat+1.96*se) %>%
ggplot() +
geom_line(aes_string(x="var_of_interest", y="tauhat", group = 1), color="red") +
geom_errorbar(aes_string(x="var_of_interest",ymin="ymin_val", ymax="ymax_val", width=.2),color="blue") +
geom_hline(yintercept=0, linetype="longdash", lwd=0.35, colour = "#B55555") +
ylab("Predicted Treatment Effect") +
theme_bw() +
theme(axis.ticks = element_blank()) +
facet_grid(treatment ~ var, scales = "free_x") +
xlab("quantiles")
figure_8
pdf(undecided_cf_path, height = 8, width = 8)
figure_8
dev.off()
png
2
Interaction plots
Predictions by age
M <- lm_robust(outcome_diff~age2*negative_diff*transaction_diff, data = df)
# Different for old
linearHypothesis(M, "negative_diff + age2:negative_diff = transaction_diff + age2:transaction_diff")
Linear hypothesis test
Hypothesis:
negative_diff - transaction_diff + age2:negative_diff - age2:transaction_diff = 0
Model 1: restricted model
Model 2: outcome_diff ~ age2 * negative_diff * transaction_diff
Res.Df Df Chisq Pr(>Chisq)
1 20493
2 20492 1 84.622 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Different for young
linearHypothesis(M, "negative_diff = transaction_diff ")
Linear hypothesis test
Hypothesis:
negative_diff - transaction_diff = 0
Model 1: restricted model
Model 2: outcome_diff ~ age2 * negative_diff * transaction_diff
Res.Df Df Chisq Pr(>Chisq)
1 20493
2 20492 1 60.115 8.945e-15 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
M0 <- lm(outcome_diff~age2*negative_diff*transaction_diff*financialB_diff, data = df)
M1 <- lm(outcome_diff~age*negative_diff*transaction_diff*financialB_diff, data = df)
M2 <- lm(outcome_diff~age*negative_diff*transaction_diff*financialB_diff, data = dplyr::filter(df, status == "Undecided"))
dff <- list(
Freedoms = interplot(m = M2, var1 = "negative_diff", var2 = "age", plot = FALSE),
'Local Doctors' = interplot(m = M2, var1 = "transaction_diff", var2 = "age", plot = FALSE),
'Financial 50' = interplot(m = M2, var1 = "financialB_diff", var2 = "age", plot = FALSE)) %>% bind_rows(.id = "Treatment")
age_plot <- dff %>%
ggplot(aes(x=age, y=coef, colour=Treatment)) + #geom_point() +
geom_line() +
geom_ribbon(aes(ymin=lb, ymax=ub, fill=Treatment), alpha=0.1) +
geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") +
ylab("Effect of treatment on acceptance") +
xlab("Age of respondent") +
theme_bw()
age_plot
Predictions by distance
M <- lm_robust(outcome_diff~distance_km*negative_diff*transaction_diff, data = df)
# Different for old
linearHypothesis(M, "negative_diff + distance_km:negative_diff = transaction_diff + distance_km:transaction_diff")
Linear hypothesis test
Hypothesis:
negative_diff - transaction_diff + distance_km:negative_diff - distance_km:transaction_diff = 0
Model 1: restricted model
Model 2: outcome_diff ~ distance_km * negative_diff * transaction_diff
Res.Df Df Chisq Pr(>Chisq)
1 20493
2 20492 1 7.811 0.005193 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Different for young
linearHypothesis(M, "negative_diff = transaction_diff ")
Linear hypothesis test
Hypothesis:
negative_diff - transaction_diff = 0
Model 1: restricted model
Model 2: outcome_diff ~ distance_km * negative_diff * transaction_diff
Res.Df Df Chisq Pr(>Chisq)
1 20493
2 20492 1 1.6787 0.1951
M1 <- lm(outcome_diff~distance_km*negative_diff*transaction_diff*financialB_diff, data = df)
M2 <- lm(outcome_diff~distance_km*negative_diff*transaction_diff*financialB_diff, data = dplyr::filter(df, status == "Undecided"))
dff <- list(
Freedoms = interplot(m = M1, var1 = "negative_diff", var2 = "distance_km", plot = FALSE),
'Local Doctors' = interplot(m = M1, var1 = "transaction_diff", var2 = "distance_km", plot = FALSE),
'Financial 50' = interplot(m = M1, var1 = "financialB_diff", var2 = "distance_km", plot = FALSE)) %>% bind_rows(.id = "Treatment")
distance_plot <- dff %>%
ggplot(aes(x=distance_km, y=coef, colour=Treatment)) + #geom_point() +
geom_line() +
geom_ribbon(aes(ymin=lb, ymax=ub, fill=Treatment), alpha=0.1) +
geom_hline(yintercept=0, linetype="longdash", lwd=0.35, size=0.75, colour = "#B55555") +
ylab("effect of treatment on acceptance") +
xlab("Distance to Vaccination Center") +
theme_bw()
distance_plot
Export Fig 2
Fig 2 in the paper combines three panels. Exported here.
fig_2_ABC <- ggarrange(lm_plot + theme(plot.margin=unit(c(1,1,1.5,1.2)*.6,"cm"))
,
ggarrange(age_plot + ylim(-.03, .13) +
theme(plot.margin=unit(c(1,1,1.5,1.2)*.6,"cm")),
distance_plot + ylab("") + ylim(-.03, .13) +
theme(plot.margin=unit(c(1,1,1.5,1.2)*.6,"cm")),
labels = c("B", "C"),
ncol = 2, common.legend = TRUE, legend="bottom"),
nrow = 2, labels = "A")
# figure_13
pdf(fig_2_ABC_path, width = 9, height = 9)
fig_2_ABC
dev.off()
png
2
fig_2_ABC