1 Set up

This code takes cleaned and aggregated data as input. Cleaning and aggregation is done in 1_cleaning.Rmd.

1.1 Helper functions

# The helper renormalizes weights so that each study gets the 
# same total weight even if they are missing data

study_weighting <- function(data)
  data %>% 
    dplyr::group_by(country) %>% 
    dplyr::mutate(weight = weight/sum(weight)) %>% 
    dplyr::ungroup() 

lm_helper <- function(data,...) {
  data %>% 
    study_weighting() %>% 
    estimatr::lm_robust(data = .,...) %>% 
    {bind_cols( tidy(.), n = nobs(.) )}
}

# Subgroup analysis : Function to apply analysis function over groups

grp_analysis <- function(df, y, x)
  
  df %>%
    dplyr::filter(if_all(c(all_of(x), all_of(y), cluster, weight), ~ !is.na(.))) %>%
    dplyr::nest_by(group, get(x)) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(y, "~ 1")), cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop") %>% 
    dplyr::rename(!!x := "get(x)")
  


# Reasons analysis: Function to apply analysis function over groups

reasons_together <- function(df, reason, num = "Yes")
  
  df %>%
    dplyr::filter(take_vaccine %in% num, 
                  if_all(c(all_of(reason), cluster, weight), ~ !is.na(.))) %>%
    dplyr::nest_by(group) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(reason, "~ 1")), 
                cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop")


reasons_together_subgroup <- function(df, reason, num = "Yes", 
                                      dem_group = NA, dem_subgroup = NA){
  
  if (dem_group == "gender")
    df <- filter(df, gender %in% dem_subgroup)
  
  df %>%
    dplyr::filter(take_vaccine %in% num,
                  !is.na(get(reason))) %>%
    dplyr::nest_by(group) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(reason, "~ 1")), cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop")
}

# Age analysis for reasons

age_analysis <- function(df, reason, num = "Yes", filter_by=NA){
  df %>%
    dplyr::filter({{filter_by}}==1)  %>%
    dplyr::filter(take_vaccine %in% num, 
                  if_all(c(all_of(reason), cluster, weight), ~ !is.na(.))) %>%
    dplyr::nest_by(group) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(reason, "~ 1")), 
                cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop")
}

1.2 Final cleaning and harmonization of weights

# Call data created in 1_cleaning.Rmd
df <- readr::read_csv("3_rep_data/combined.csv", guess_max = 30000)

# If no cluster information given for a study then individuals are clusters 
# Ensure cluster ids are distinct across studies
df <- 
  df %>% 
  dplyr::group_by(study) %>% 
  dplyr::mutate(
    cluster = ifelse(is.na(cluster), paste(1:n()), cluster),
    cluster = paste0(gsub(" ", "_", tolower(country)), "_", cluster))


# Weights sum to 1 in each study and recode age and education into bins
df <- 
  df %>% 
  dplyr::group_by(study) %>% 
  dplyr::mutate(
         weight_replace = mean(weight, rm.na = TRUE),
         weight = if_else(is.na(weight), if_else(is.na(weight_replace), 1, weight_replace), weight),
         weight = weight/sum(weight)) %>% 
  dplyr::ungroup() %>%
  dplyr::mutate(
    age_groups = as.character(cut(x = age, breaks = c(-Inf, 18, 30, 45, 60, +Inf), right = F)),
    age_groups_binary = ifelse(age >= 55, "55+", NA),
    age_groups_binary = ifelse(age < 55, "<55", age_groups_binary),
    age_less24=ifelse(age<=24, 1, 0),
    age_25_54=ifelse(age>=25 & age<=54, 1, 0),
    age_55_more=ifelse(age>=55, 1, 0),
    age_groups_three=ifelse(age<=24, "<25", NA),
    age_groups_three=ifelse(age>=25 & age<=54, "25-54", age_groups_three),
    age_groups_three=ifelse(age>=55, "55+", age_groups_three),
    educ_binary = if_else(educ == "More than secondary", "> Secondary", "Up to Secondary")) 


# We create a new dataframe with countries and with "All" (only LMICs). Countries are clusters in "All" analysis
# USA and Russia excluded from "All" set

df2 <- 
  dplyr::bind_rows(
    mutate(df, group = country),
    mutate(filter(df, country != "USA" & country != "Russia"), group = "All")) %>% 
  mutate(
    cluster = if_else(group == "All", 
                      gsub(pattern = " ", replacement = "_", x = tolower(country)), 
                      cluster)) 

1.3 Data checks

Checks on data structure. Note n, missingness, presence of data on weights or clusters.

1.3.1 Data structure

df %>% group_by(country) %>%
  summarize(n = n(), 
            sd_wt = sd(weight, na.rm = TRUE)/mean(weight, na.rm = TRUE), 
            cl_size = n()/length(unique(cluster)), 
            take_1 = mean((take_vaccine == "Yes")[take_vaccine == "Yes" | take_vaccine == "No" ], na.rm = TRUE),
            take_2 = mean((take_vaccine == "Yes"), na.rm = TRUE),
            take_3 = mean(take_vaccine_num, na.rm = TRUE),
            take_dk = mean(take_vaccine == "DK"),
            .take = mean(is.na(take_vaccine_num)), .age = mean(is.na(age)),
            .gender = mean(is.na(gender)), .educ = mean(is.na(educ))) %>%
  kable(digits = 2, 
        caption = "Observations, missingness patterns, data structure. Column .var is the share missing for variable var.", booktabs = TRUE, linesep = "", format.args = list(big.mark = ",", 
  scientific = FALSE))
Observations, missingness patterns, data structure. Column .var is the share missing for variable var.
country n sd_wt cl_size take_1 take_2 take_3 take_dk .take .age .gender .educ
Burkina Faso 977 0.08 1.00 0.69 0.67 0.67 . 0 0.88 0.00 0.00
Colombia 1,012 0.16 1.00 0.79 0.79 0.75 . 0 0.32 0.00 0.00
India 1,680 0.62 11.83 0.83 0.83 0.84 0.00 0 0.00 0.00 0.80
Mozambique 862 0.00 5.29 0.91 0.91 0.89 . 0 0.00 0.00 0.04
Nepal 1,389 0.51 15.43 0.97 0.97 0.97 0.00 0 0.05 0.05 1.00
Nigeria 1,868 0.00 1.00 0.78 0.78 0.76 . 0 0.00 0.00 1.00
Pakistan 1 1,633 1.17 15.41 0.86 0.73 0.72 . 0 0.00 0.00 0.01
Pakistan 2 1,492 0.00 1.00 0.84 0.67 0.66 . 0 1.00 1.00 0.00
Russia 22,125 1.86 1.00 0.37 0.27 0.27 0.27 0 0.00 0.00 0.00
Rwanda 1,355 0.04 1.00 0.94 0.94 0.85 . 0 0.00 0.00 0.00
Sierra Leone 1 1,070 0.06 1.00 0.78 0.78 0.78 0.00 0 0.00 0.00 0.03
Sierra Leone 2 2,110 0.00 11.05 0.88 0.88 0.88 . 0 0.01 0.00 0.00
Uganda 1 3,362 0.00 6.75 0.91 0.91 0.86 . 0 0.05 0.00 0.19
Uganda 2 1,366 0.00 4.41 0.79 0.77 0.77 . 0 0.00 0.00 0.00
USA 1,959 0.77 1.00 0.74 0.74 0.67 . 0 0.00 0.00 0.00

1.3.2 Age distribution

df %>% 
  select(country, age) %>%
  gather(category, value, -country) %>%
  mutate(value = as.numeric(value)) %>%
  ggplot(aes(value)) + geom_density() + facet_wrap(~country, ncol = 3)

1.3.3 Gender, education distribution

df %>% 
  select(country, gender, educ) %>%
  gather(category, value, -country) %>%
    ggplot(aes(value)) +  geom_bar() + facet_grid(country ~ category, scales = "free")

2 Tables and Figures

2.1 Table 1: Vaccine data from WGM, WHO

# Call data from WGM
dfwgm <- read.csv("3_rep_data/table_wgm.csv")

# Call data from WHO
df_vacc_coveragebis <- read.csv("3_rep_data/vacc_cov.csv")

# Put together and order labels

table_1b <- dfwgm %>%
  left_join(df_vacc_coveragebis) %>%
  mutate(country = as.factor(country),
         country = forcats::fct_relevel(country, "Russia", "USA", after = Inf)) %>% 
  arrange(country) %>%
  select(country, Effectiveness, Safety, Important, BCG, DTP1, MCV1, Coverage)

# To Latex

tab_1b <- 
knitr::kable(
  table_1b,
  caption =  "Vaccination beliefs and coverage for the countries in our sample",
  col.names = c("",
                "Effective","Safe","Important for children to have",
                "Tuberculosis (BCG)", "Diphtheria, Tetanus and Pertussis (DTP1)",
                "Measles (MCV1)",
                "% of parents with any child that was ever vaccinated"),
  format = "latex", booktabs = T, linesep = "", align = c("l", rep("c", 7)), label = "otherv") %>% 
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"), 
                            full_width = FALSE, font_size = base_font_size - 2)  %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::add_header_above(c(" " = 1, 
                                 "% Respondents agreeing Vaccines are..." = 3,
                                 "Vaccine coverage in 2019 (% of infants)" = 3,
                                 " " = 1), bold = TRUE) %>%
  kableExtra::column_spec(1:5, width = "9em") %>% 
  kableExtra::column_spec(6:7, width = "5em") %>%
  kableExtra::column_spec(8, width = "9em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 1 presents an overview of vaccination beliefs and incidence across countries in our sample. Columns 2-4 and 8 use data from the Wellcome Global Monitor 2018. Column 8 shows the percentage of respondents who are parents and report having had any of their children ever vaccinated. Columns 2-4 show the percentage of all respondents that either strongly agree or somewhat agree with the statement above each column. All percentages are obtained using national weights. Columns 5-7 use data from the World Health Organization on vaccine incidence. Columns 5-7 report the percentage of infants per country receiving the vaccine indicated in each column.", 
    threeparttable = T) %>%
  kableExtra::landscape()

knitr::kable(
  table_1b,
  caption =  "Vaccination beliefs and coverage for the countries in our sample",
  col.names = c("",
                "Effective","Safe","Important for children to have",
                "Tuberculosis (BCG)", "Diphtheria, Tetanus and Pertussis (DTP1)",
                "Measles (MCV1)",
                "% of parents with any child that was ever vaccinated"),
  format = "html", booktabs = T, linesep = "", align = c("l", rep("c", 7)), label = "otherv") %>% 
  kableExtra::kable_styling(full_width = FALSE)  %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::add_header_above(c(" " = 1, 
                                 "% Respondents agreeing Vaccines are..." = 3,
                                 "Vaccine coverage in 2019 (% of infants)" = 3,
                                 " " = 1), bold = TRUE) %>%
  kableExtra::column_spec(1:5, width = "9em") %>% 
  kableExtra::column_spec(6:7, width = "5em") %>%
  kableExtra::column_spec(8, width = "9em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 1 presents an overview of vaccination beliefs and incidence across countries in our sample. Columns 2-5 use data from the Wellcome Global Monitor 2018. Column 2 shows the percentage of respondents who are parents and report having had any of their children ever vaccinated. Columns 3-5 show the percentage of all respondents that either strongly agree or somewhat agree with the statement above each column. All percentages are obtained using national weights. Columns 6-8 use data from the World Health Organization on vaccine incidence. Columns 6-8 report the percentage of infants per country receiving the vaccine indicated in each column.", 
    threeparttable = T)
Vaccination beliefs and coverage for the countries in our sample
% Respondents agreeing Vaccines are…
Vaccine coverage in 2019 (% of infants)
Effective Safe Important for children to have Tuberculosis (BCG) Diphtheria, Tetanus and Pertussis (DTP1) Measles (MCV1) % of parents with any child that was ever vaccinated
Burkina Faso 87 72 95 98 95 88 97
Colombia 83 84 99 89 92 95 95
India 96 97 98 92 94 95 92
Mozambique 87 93 98 94 93 87 95
Nepal 89 93 99 96 96 92 95
Nigeria 82 92 96 67 65 54 95
Pakistan 91 92 95 88 86 75 94
Rwanda 99 97 99 98 99 96 100
Sierra Leone 95 95 99 86 95 93 97
Uganda 82 87 98 88 99 87 98
Russia 67 48 80 96 97 98 96
USA 85 73 87 . 97 90 95
Table 1 presents an overview of vaccination beliefs and incidence across countries in our sample. Columns 2-5 use data from the Wellcome Global Monitor 2018. Column 2 shows the percentage of respondents who are parents and report having had any of their children ever vaccinated. Columns 3-5 show the percentage of all respondents that either strongly agree or somewhat agree with the statement above each column. All percentages are obtained using national weights. Columns 6-8 use data from the World Health Organization on vaccine incidence. Columns 6-8 report the percentage of infants per country receiving the vaccine indicated in each column.

2.2 Table with summary of samples

tab_sampling <- 
  readxl::read_excel("2_input_data/studies_info.xlsx", sheet = "sample") %>%
  dplyr::select("Study" = "country", "Date"="date",
                "Geographic scope", "Sampling methodology", "Survey modality", "Weights") %>%
  knitr::kable(
    caption =  "Summary of studies sampling",
    format = "latex", booktabs = T, linesep = "", label = "sampling") %>% 
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"),
                            font_size = base_font_size - 2) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1:2, width = "8em") %>%
  kableExtra::column_spec(3, width = "12em")  %>%
  kableExtra::column_spec(4, width = "30em") %>% 
  kableExtra::landscape()

readxl::read_excel("2_input_data/studies_info.xlsx", sheet = "sample") %>%
  dplyr::select("Study" = "country", "Date"="date",
                "Geographic scope", "Sampling methodology", "Survey modality", "Weights") %>%
  knitr::kable(caption =  "Summary of studies' sampling", linesep = "", format = "html") %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "8em") %>%
  kableExtra::column_spec(2, width = "12em")  %>%
  kableExtra::column_spec(3, width = "30em")
Summary of studies’ sampling
Study Date Geographic scope Sampling methodology Survey modality Weights
Burkina Faso October to December 2020 National Random digit dialing (RDD) Phone Yes
Colombia August 2020 National Random digit dialing (RDD) Phone Yes
India June 2020 to January 2021 Subnational, Slums in 2 cities Representative sample of slum dwellers living in vicinity of a community toilet and located in Uttar Pradesh Phone Yes
Mozambique October to November 2020 Subnational, 2 cities
  1. Random sample in urban and periurban markets stratified by gender and type of establishment in Maputo; 2) Random sample representative of communities in the Cabo Delgado, stratified on urban, semiurban, and rural areas
Phone No
Nepal December 2020 Subnational, 2 districts Random sample of poor households from randomly selected villages in Kanchanpur Phone Yes
Nigeria November to December 2020 Subnational, 1 state
  1. Random sample of individuals in Kaduna; 2) Sample of phone numbers from a phone list of Kaduna state residents
Phone No
Pakistan 1 July to September 2020 Subnational, 2 districts Random sample of individuals in administrative police units in two districts of Punjab Phone Yes
Pakistan 2 September to October 2020 Subnational, 1 province Random digit dialing (RDD) on a random sample of all numerically possible mobile phone numbers in the region of Punjab Phone No
Russia November to December 2020 Subnational, 61 regions Sample recruited from the Russian online survey company OMI (Online Market Intelligence). Sampling targeted at having a minimum of respondents per region, as well as representation of age, gender and education groups. Online Yes
Rwanda October to November 2020 National Random digit dialing (RDD) Phone Yes
Sierra Leone 1 October 2020 National Random digit dialing (RDD) Phone Yes
Sierra Leone 2 October 2020 to January 2021 National A random sample of households in 195 rural towns across all 14 districts of Sierra Leone Phone No
Uganda 1 September to December 2020 Subnational, 13 districts Sample of women in households from semi-rural and rural villages across 13 districts in Uganda, selected according to the likelihood of having children Phone No
Uganda 2 November to December 2020 Subnational, 1 district Random sample of households in Kampala Phone No
USA December 2020 National Nation-wide sample of adult internet users recruited through the market research firm Lucid Online Yes

2.3 Main Results: Acceptance Rates (disaggregated by group)

2.3.1 Figure

# Prep levels

main_results <- 
  df2 %>% 
  dplyr::filter(dplyr::if_all(c(take_vaccine_num, cluster, weight), ~ !is.na(.))) %>% 
  dplyr::nest_by(group) %>%
  dplyr::summarize(
    lm_helper(data = data, 
              formula = take_vaccine_num ~ 1, cluster = cluster,
              weight = weight, se_type = "stata"),
    .groups = "drop")

# Gender
acc_by_gender <- grp_analysis(df2, y = "take_vaccine_num", x = "gender")

# Education (all original categories and binary recoding)
acc_by_educ        <- grp_analysis(df2, y = "take_vaccine_num", x = "educ")
acc_by_educ_binary <- grp_analysis(df2, y = "take_vaccine_num", x = "educ_binary")

# Age (all original categories and binary recoding)
acc_by_age <- grp_analysis(df2, y = "take_vaccine_num", x = "age_groups_three") %>%
  dplyr::filter(statistic!=Inf) %>%
  dplyr::filter(conf.low>0)
acc_by_age_binary <- grp_analysis(df2, y = "take_vaccine_num", x = "age_groups_binary")


# Put them together in a single df. Make estimates "percentages" and round
ans <- 
  dplyr::bind_rows(
    main_results %>% mutate(cat = "All", var = "All"),
    acc_by_gender %>% rename(cat = gender) %>% mutate(var = "By gender"),
    acc_by_educ_binary %>% rename(cat = educ_binary) %>% mutate(var = "By education"),
    acc_by_age_binary %>% rename(cat = age_groups_binary) %>% mutate(var = "By age")) %>%
  dplyr::mutate(across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 1)))

# Join with a tags df, which includes details on the study (national or subnational)
tags <- 
  readxl::read_excel("2_input_data/studies_info.xlsx", sheet = "sample") %>%
  dplyr::select(group = country, tag = "Geographic scope") %>%
  dplyr::left_join(filter(ans, cat == "All"), by = "group") %>%
  dplyr::mutate(tag = paste0(group, " (", tag, ", ", n, ")")) %>%
  dplyr::select(group, tag)

# Prepare df to plot. Important but ugly relevel of factors, happening all over the code

ans %<>% 
  dplyr::left_join(tags) %>%
  dplyr::mutate(tag = ifelse(group == "All", "All LMICs", tag)) %>%
  # group_by(var) %>% 
  # arrange(cat) %>%
  dplyr::mutate(
    var = factor(var, levels = c("All", "By gender", "By education", "By age")),
    cat = factor(
      cat, ordered = TRUE,
      levels = rev(c(c("Female", "Male", "Up to Secondary", 
                       "> Secondary", "<55", "55+", "All"))),
      labels = rev(c("Female", "Male", "Up to Secondary", 
                     "More than Secondary", "$< 55$", "$55 +$", "All"))),
    tag = gsub(pattern = " \\(", "\\\n\\(", tag))

special_cases <- 
  sort(unique(ans$tag)[grep(unique(ans$tag), pattern = "All LMICs|Russia|USA")])

ans %<>% 
  dplyr::mutate(
    tag = 
      factor(x = tag, ordered = TRUE,
             levels = rev(c(sort(unique(tag)[!(unique(tag) %in% special_cases)]), special_cases))))

#Colour blind palette (available palletes are smaller so we expand it)
safe_colorblind_palette <- c("#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", 
                             "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888", 
                             "#88CCEE")


#Plot
fig_1 <- 
  ans %>% 
  ggplot(data = ., aes(x = tag, y = estimate, color = cat)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), 
                size = .5, width = .2, position = position_dodge(0.6)) + 
  geom_point(position = position_dodge(0.6)) +
  facet_grid(. ~ var, scales = "free_x", space = "free") + 
  coord_flip() +
  guides(color = guide_legend(reverse = TRUE, nrow = 2)) +
  geom_vline(xintercept = 3.5, color = "darkgrey") +
  geom_vline(xintercept = 2.5, color = "darkgrey") +
  scale_colour_manual(values = safe_colorblind_palette) +
  labs(title = "If a COVID-19 vaccine becomes available in [country], would you take it?", 
       color = "Subgroups", x = "") +
  theme_bw(base_size = (base_font_size - 2)) + ylim(c(0,100)) +
  theme(legend.position = "bottom",
        plot.caption = element_text(hjust = 0), #Default is hjust=1
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        plot.caption.position =  "plot",
        axis.text.y = element_text(hjust = 0))

fig_1