1 Set up

This code takes cleaned and aggregated data as input. Cleaning and aggregation is done in 1_cleaning.Rmd.

1.1 Helper functions

Code used to prepare data (prepare suvery weights), run analyses and robustness checks.

# The helper renormalizes weights so that each study gets the 
# same total weight even if they are missing data

study_weighting <- function(data)
  data %>% 
    dplyr::group_by(country) %>% 
    dplyr::mutate(weight = weight/sum(weight)) %>% 
    dplyr::ungroup() 

lm_helper <- function(data,...) {
  data %>% 
    study_weighting() %>% 
    estimatr::lm_robust(data = .,...) %>% 
    {bind_cols( tidy(.), n = nobs(.) )}
}

# Leave-X-out helper that takes data and sample_var, 
# nests by sample_var performs LOO with loo_n observations out
# applying loo_fun to each sample

loo_helper <- 
  function(data, 
           sample_var, 
           loo_n = 1,
           loo_fun = 
             function(dat) lm_helper(data = dat, 
                                     formula = take_vaccine_num ~ 1, cluster = cluster,
                                     weight = weight, se_type = "stata")
  ) {
    
  .var <- data[[sample_var]]
  
  data %>% 
    {
      plyr::adply(.data = combn(unique(.var), loo_n), 
                  .margins = 2, 
                  .fun = function(x) loo_fun(.[!(.var %in% x), ]) )
    }
}

# Subgroup analysis : Function to apply analysis function over groups

grp_analysis <- function(df, y, x)
  
  df %>%
    dplyr::filter(if_all(c(all_of(x), all_of(y), cluster, weight), ~ !is.na(.))) %>%
    dplyr::nest_by(group, get(x)) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(y, "~ 1")), cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop") %>% 
    dplyr::rename(!!x := "get(x)")
  


# Reasons analysis: Function to apply analysis function over groups

reasons_together <- function(df, reason, num = "Yes")
  
  df %>%
    dplyr::filter(take_vaccine %in% num, 
                  if_all(c(all_of(reason), cluster, weight), ~ !is.na(.))) %>%
    dplyr::nest_by(group) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(reason, "~ 1")), 
                cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop")


reasons_together_subgroup <- function(df, reason, num = "Yes", 
                                      dem_group = NA, dem_subgroup = NA){
  
  if (dem_group == "gender")
    df <- filter(df, gender %in% dem_subgroup)
  
  df %>%
    dplyr::filter(take_vaccine %in% num,
                  !is.na(get(reason))) %>%
    dplyr::nest_by(group) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(reason, "~ 1")), cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop")
}

# Age analysis for reasons

age_analysis <- function(df, reason, num = "Yes", filter_by=NA){
  df %>%
    dplyr::filter({{filter_by}}==1)  %>%
    dplyr::filter(take_vaccine %in% num, 
                  if_all(c(all_of(reason), cluster, weight), ~ !is.na(.))) %>%
    dplyr::nest_by(group) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(reason, "~ 1")), 
                cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop")
}

1.2 Final cleaning and harmonization of weights

Groups variables into discrete categories and prepares survey weights.

# Call data created in 1_cleaning.Rmd
df <- readr::read_csv("3_rep_data/combined.csv", guess_max = 30000)

# If no cluster information given for a study then individuals are clusters 
# Ensure cluster ids are distinct across studies
df <- 
  df %>% 
  dplyr::group_by(study) %>% 
  dplyr::mutate(
    cluster = ifelse(is.na(cluster), paste(1:n()), cluster),
    cluster = paste0(gsub(" ", "_", tolower(country)), "_", cluster))


# Weights sum to 1 in each study and recode age and education into bins
df <- 
  df %>% 
  dplyr::group_by(study) %>% 
  dplyr::mutate(
         weight_replace = mean(weight, rm.na = TRUE),
         weight = if_else(is.na(weight), if_else(is.na(weight_replace), 1, weight_replace), weight),
         weight = weight/sum(weight)) %>% 
  dplyr::ungroup() %>%
  dplyr::mutate(
    age_groups = as.character(cut(x = age, breaks = c(-Inf, 18, 30, 45, 60, +Inf), right = F)),
    age_groups_binary = ifelse(age >= 55, "55+", NA),
    age_groups_binary = ifelse(age < 55, "<55", age_groups_binary),
    age_less24 = ifelse(age <= 24, 1, 0),
    age_25_54 = ifelse(age >= 25 & age <= 54, 1, 0),
    age_55_more = ifelse(age >= 55, 1, 0),
    age_groups_three = ifelse(age <= 24, "<25", NA),
    age_groups_three = ifelse(age >= 25 & age <= 54, "25-54", age_groups_three),
    age_groups_three = ifelse(age >= 55, "55+", age_groups_three),
    educ_binary = if_else(educ == "More than secondary", "> Secondary", "Up to Secondary")) 


# We create a new dataframe with countries and with "All" (only LMICs). Countries are clusters in "All" analysis
# USA and Russia excluded from "All" set

df2 <- 
  dplyr::bind_rows(
    mutate(df, group = country),
    mutate(filter(df, country != "USA" & country != "Russia"), group = "All")) %>% 
  mutate(
    cluster = if_else(group == "All", 
                      gsub(pattern = " ", replacement = "_", x = tolower(country)), 
                      cluster))

1.3 Data checks

Checks on data structure. Note n, missingness, presence of data on weights or clusters.

1.3.1 Data structure

df %>% group_by(country) %>%
  summarize(n = n(), 
            sd_wt = sd(weight, na.rm = TRUE)/mean(weight, na.rm = TRUE), 
            cl_size = n()/length(unique(cluster)), 
            take_1 = mean((take_vaccine == "Yes")[take_vaccine == "Yes" | take_vaccine == "No" ], na.rm = TRUE),
            take_2 = mean((take_vaccine == "Yes"), na.rm = TRUE),
            take_3 = mean(take_vaccine_num, na.rm = TRUE),
            take_dk = mean(take_vaccine == "DK"),
            .take = mean(is.na(take_vaccine_num)), .age = mean(is.na(age)),
            .gender = mean(is.na(gender)), .educ = mean(is.na(educ))) %>%
  kable(digits = 2, 
        caption = "Observations, missingness patterns, data structure. Column .var is the share missing for variable var.", booktabs = TRUE, linesep = "", format.args = list(big.mark = ",", 
  scientific = FALSE))

Observations, missingness patterns, data structure. Column .var is the share missing for variable var.
country	n	sd_wt	cl_size	take_1	take_2	take_3	take_dk	.age	.gender	.educ
Burkina Faso	977	0.08	1.00	0.69	0.67	0.67	.	0.88	0.00	0.00
Colombia	1,012	0.16	1.00	0.79	0.79	0.75	.	0.32	0.00	0.00
India	1,680	0.62	11.83	0.83	0.83	0.84	0.00	0.00	0.00	0.80
Mozambique	862	0.00	5.29	0.91	0.91	0.89	.	0.00	0.00	0.04
Nepal	1,389	0.51	15.43	0.97	0.97	0.97	0.00	0.05	0.05	1.00
Nigeria	1,868	0.00	1.00	0.78	0.78	0.76	.	0.00	0.00	1.00
Pakistan 1	1,633	1.17	15.41	0.86	0.73	0.72	.	0.00	0.00	0.01
Pakistan 2	1,492	0.00	1.00	0.84	0.67	0.66	.	1.00	1.00	0.00
Russia	22,125	1.86	1.00	0.37	0.27	0.27	0.27	0.00	0.00	0.00
Rwanda	1,355	0.04	1.00	0.94	0.94	0.85	.	0.00	0.00	0.00
Sierra Leone 1	1,070	0.06	1.00	0.78	0.78	0.78	0.00	0.00	0.00	0.03
Sierra Leone 2	2,110	0.00	11.05	0.88	0.88	0.88	.	0.01	0.00	0.00
Uganda 1	3,362	0.00	6.75	0.91	0.91	0.86	.	0.05	0.00	0.19
Uganda 2	1,366	0.00	4.41	0.79	0.77	0.77	.	0.00	0.00	0.00
USA	1,959	0.77	1.00	0.74	0.74	0.67	.	0.00	0.00	0.00

1.3.2 Age distribution

df %>% 
  select(country, age) %>%
  gather(category, value, -country) %>%
  mutate(value = as.numeric(value)) %>%
  ggplot(aes(value)) + geom_density() + facet_wrap(~country, ncol = 3)

1.3.3 Gender, education distribution

df %>% 
  select(country, gender, educ) %>%
  gather(category, value, -country) %>%
    ggplot(aes(value)) +  geom_bar() + facet_grid(country ~ category, scales = "free")

2 Tables and Figures

2.1 Table 1: Vaccine data from WGM, WHO

# Call data from WGM
dfwgm <- read.csv("3_rep_data/table_wgm.csv")

# Call data from WHO
df_vacc_coveragebis <- read.csv("3_rep_data/vacc_cov.csv")

# Put together and order labels

table_1b <- dfwgm %>%
  left_join(df_vacc_coveragebis) %>%
  mutate(country = as.factor(country),
         country = forcats::fct_relevel(country, "Russia", "USA", after = Inf)) %>% 
  arrange(country) %>%
  select(country, Effectiveness, Safety, Important, BCG, DTP1, MCV1, Coverage)

# To Latex

tab_1b <- 
knitr::kable(
  table_1b,
  caption =  "Vaccination beliefs and coverage for the countries in our sample",
  col.names = c("",
                "Effective","Safe","Important for children to have",
                "Tuberculosis (BCG)", "Diphtheria, Tetanus and Pertussis (DTP1)",
                "Measles (MCV1)",
                "% of parents with any child that was ever vaccinated"),
  format = "latex", booktabs = T, linesep = "", align = c("l", rep("c", 7)), label = "otherv") %>% 
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"), 
                            full_width = FALSE, font_size = base_font_size - 2)  %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::add_header_above(c(" " = 1, 
                                 "% Respondents agreeing Vaccines are..." = 3,
                                 "Vaccine coverage in 2019 (% of infants)" = 3,
                                 " " = 1), bold = TRUE) %>%
  kableExtra::column_spec(1:5, width = "9em") %>% 
  kableExtra::column_spec(6:7, width = "5em") %>%
  kableExtra::column_spec(8, width = "9em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 1 presents an overview of vaccination beliefs and incidence across countries in our sample. Columns 2-4 and 8 use data from the Wellcome Global Monitor 2018. Column 8 shows the percentage of respondents who are parents and report having had any of their children ever vaccinated. Columns 2-4 show the percentage of all respondents that either strongly agree or somewhat agree with the statement above each column. All percentages are obtained using national weights. Columns 5-7 use data from the World Health Organization on vaccine incidence. Columns 5-7 report the percentage of infants per country receiving the vaccine indicated in each column.", 
    threeparttable = T) %>%
  kableExtra::landscape()

knitr::kable(
  table_1b,
  caption =  "Vaccination beliefs and coverage for the countries in our sample",
  col.names = c("",
                "Effective","Safe","Important for children to have",
                "Tuberculosis (BCG)", "Diphtheria, Tetanus and Pertussis (DTP1)",
                "Measles (MCV1)",
                "% of parents with any child that was ever vaccinated"),
  format = "html", booktabs = T, linesep = "", align = c("l", rep("c", 7)), label = "otherv") %>% 
  kableExtra::kable_styling(full_width = FALSE)  %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::add_header_above(c(" " = 1, 
                                 "% Respondents agreeing Vaccines are..." = 3,
                                 "Vaccine coverage in 2019 (% of infants)" = 3,
                                 " " = 1), bold = TRUE) %>%
  kableExtra::column_spec(1:5, width = "9em") %>% 
  kableExtra::column_spec(6:7, width = "5em") %>%
  kableExtra::column_spec(8, width = "9em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 1 presents an overview of vaccination beliefs and incidence across countries in our sample. Columns 2-5 use data from the Wellcome Global Monitor 2018. Column 2 shows the percentage of respondents who are parents and report having had any of their children ever vaccinated. Columns 3-5 show the percentage of all respondents that either strongly agree or somewhat agree with the statement above each column. All percentages are obtained using national weights. Columns 6-8 use data from the World Health Organization on vaccine incidence. Columns 6-8 report the percentage of infants per country receiving the vaccine indicated in each column.", 
    threeparttable = T)

Vaccination beliefs and coverage for the countries in our sample
	% Respondents agreeing Vaccines are…			Vaccine coverage in 2019 (% of infants)
	Effective	Safe	Important for children to have	Tuberculosis (BCG)	Diphtheria, Tetanus and Pertussis (DTP1)	Measles (MCV1)	% of parents with any child that was ever vaccinated
Burkina Faso	87	72	95	98	95	88	97
Colombia	83	84	99	89	92	95	95
India	96	97	98	92	94	95	92
Mozambique	87	93	98	94	93	87	95
Nepal	89	93	99	96	96	92	95
Nigeria	82	92	96	67	65	54	95
Pakistan	91	92	95	88	86	75	94
Rwanda	99	97	99	98	99	96	100
Sierra Leone	95	95	99	86	95	93	97
Uganda	82	87	98	88	99	87	98
Russia	67	48	80	96	97	98	96
USA	85	73	87	.	97	90	95
Table 1 presents an overview of vaccination beliefs and incidence across countries in our sample. Columns 2-5 use data from the Wellcome Global Monitor 2018. Column 2 shows the percentage of respondents who are parents and report having had any of their children ever vaccinated. Columns 3-5 show the percentage of all respondents that either strongly agree or somewhat agree with the statement above each column. All percentages are obtained using national weights. Columns 6-8 use data from the World Health Organization on vaccine incidence. Columns 6-8 report the percentage of infants per country receiving the vaccine indicated in each column.

2.2 Table with summary of samples

tab_sampling <- 
  readxl::read_excel("2_input_data/studies_info.xlsx", sheet = "sample") %>%
  dplyr::select("Study" = "country", "Date"="date",
                "Geographic scope", "Sampling methodology", "Survey modality", "Weights") %>%
  knitr::kable(
    caption =  "Summary of studies sampling",
    format = "latex", booktabs = T, linesep = "", label = "sampling") %>% 
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"),
                            font_size = base_font_size - 2) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1:2, width = "8em") %>%
  kableExtra::column_spec(3, width = "12em")  %>%
  kableExtra::column_spec(4, width = "30em") %>% 
  kableExtra::landscape()

readxl::read_excel("2_input_data/studies_info.xlsx", sheet = "sample") %>%
  dplyr::select("Study" = "country", "Date"="date",
                "Geographic scope", "Sampling methodology", "Survey modality", "Weights") %>%
  knitr::kable(caption =  "Summary of studies' sampling", linesep = "", format = "html") %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "8em") %>%
  kableExtra::column_spec(2, width = "12em")  %>%
  kableExtra::column_spec(3, width = "30em")

Summary of studies’ sampling
Study	Date	Geographic scope	Sampling methodology	Survey modality	Weights
Burkina Faso	October to December 2020	National	Random digit dialing (RDD)	Phone	Yes
Colombia	August 2020	National	Random digit dialing (RDD)	Phone	Yes
India	June 2020 to January 2021	Subnational, Slums in 2 cities	Representative sample of slum dwellers living in vicinity of a community toilet and located in Uttar Pradesh	Phone	Yes
Mozambique	October to November 2020	Subnational, 2 cities	Random sample in urban and periurban markets stratified by gender and type of establishment in Maputo; 2) Random sample representative of communities in the Cabo Delgado, stratified on urban, semiurban, and rural areas	Phone	No
Nepal	December 2020	Subnational, 2 districts	Random sample of poor households from randomly selected villages in Kanchanpur	Phone	Yes
Nigeria	November to December 2020	Subnational, 1 state	Random sample of individuals in Kaduna; 2) Sample of phone numbers from a phone list of Kaduna state residents	Phone	No
Pakistan 1	July to September 2020	Subnational, 2 districts	Random sample of individuals in administrative police units in two districts of Punjab	Phone	Yes
Pakistan 2	September to October 2020	Subnational, 1 province	Random digit dialing (RDD) on a random sample of all numerically possible mobile phone numbers in the region of Punjab	Phone	No
Russia	November to December 2020	Subnational, 61 regions	Sample recruited from the Russian online survey company OMI (Online Market Intelligence). Sampling targeted at having a minimum of respondents per region, as well as representation of age, gender and education groups.	Online	Yes
Rwanda	October to November 2020	National	Random digit dialing (RDD)	Phone	Yes
Sierra Leone 1	October 2020	National	Random digit dialing (RDD)	Phone	Yes
Sierra Leone 2	October 2020 to January 2021	National	A random sample of households in 195 rural towns across all 14 districts of Sierra Leone	Phone	No
Uganda 1	September to December 2020	Subnational, 13 districts	Sample of women in households from semi-rural and rural villages across 13 districts in Uganda, selected according to the likelihood of having children	Phone	No
Uganda 2	November to December 2020	Subnational, 1 district	Random sample of households in Kampala	Phone	No
USA	December 2020	National	Nation-wide sample of adult internet users recruited through the market research firm Lucid	Online	Yes

2.3 Main Results: Acceptance Rates (disaggregated by group)

2.3.1 Figure

# Prep levels

main_results <- 
  df2 %>% 
  dplyr::filter(dplyr::if_all(c(take_vaccine_num, cluster, weight), ~ !is.na(.))) %>% 
  dplyr::nest_by(group) %>%
  dplyr::summarize(
    lm_helper(data = data, 
              formula = take_vaccine_num ~ 1, cluster = cluster,
              weight = weight, se_type = "stata"),
    .groups = "drop")

# Gender
acc_by_gender <- grp_analysis(df2, y = "take_vaccine_num", x = "gender")

# Education (all original categories and binary recoding)
acc_by_educ_binary <- grp_analysis(df2, y = "take_vaccine_num", x = "educ_binary")

# Age (all original categories and binary recoding)
acc_by_age <- grp_analysis(df2, y = "take_vaccine_num", x = "age_groups_three") %>%
  dplyr::filter(statistic!=Inf) %>%
  dplyr::filter(conf.low>0)
acc_by_age_binary <- grp_analysis(df2, y = "take_vaccine_num", x = "age_groups_binary")


# Put them together in a single df. Make estimates "percentages" and round
ans <- 
  dplyr::bind_rows(
    main_results %>% mutate(cat = "All", var = "All"),
    acc_by_gender %>% rename(cat = gender) %>% mutate(var = "By gender"),
    acc_by_educ_binary %>% rename(cat = educ_binary) %>% mutate(var = "By education"),
    acc_by_age %>% rename(cat = age_groups_three) %>% mutate(var = "By age")) %>%
  dplyr::mutate(across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 1)))

# Join with a tags df, which includes details on the study (national or subnational)
tags <- 
  readxl::read_excel("2_input_data/studies_info.xlsx", sheet = "sample") %>%
  dplyr::select(group = country, tag = "Geographic scope") %>%
  dplyr::left_join(filter(ans, cat == "All"), by = "group") %>%
  dplyr::mutate(tag = paste0(group, " (", tag, ", ", n, ")")) %>%
  dplyr::select(group, tag)

# Prepare df to plot. Important but ugly relevel of factors, happening all over the code

ans %<>% 
  dplyr::left_join(tags) %>%
  dplyr::mutate(tag = ifelse(group == "All", "All LMICs", tag)) %>%
  # group_by(var) %>% 
  # arrange(cat) %>%
  dplyr::mutate(
    var = factor(var, levels = c("All", "By gender", "By education", "By age")),
    cat = factor(
      cat, ordered = TRUE,
      levels = rev(c("Female", "Male", "Up to Secondary", 
                       "> Secondary", "<25", "25-54", "55+", "All")),
      labels = rev(c("Female", "Male", "Up to Secondary", 
                     "More than Secondary", "$< 25$", "$25-54$", "$55 +$", "All"))),
    tag = gsub(pattern = " \\(", "\\\n\\(", tag))


special_cases <- 
  sort(unique(ans$tag)[grep(unique(ans$tag), pattern = "All LMICs|Russia|USA")])

ans %<>% 
  dplyr::mutate(
    tag = 
      factor(x = tag, ordered = TRUE,
             levels = rev(c(sort(unique(tag)[!(unique(tag) %in% special_cases)]), special_cases))))

ans_loo <- ans

#Colour blind palette (available palletes are smaller so we expand it)
safe_colorblind_palette <- c("#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", 
                             "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888", 
                             "#88CCEE")


#Plot
fig_1_ages <- 
  ans %>% 
  ggplot(data = ., aes(x = tag, y = estimate, color = cat)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), 
                size = .5, width = .2, position = position_dodge(0.6)) + 
  geom_point(position = position_dodge(0.6)) +
  facet_grid(. ~ var, scales = "free_x", space = "free") + 
  coord_flip() +
  guides(color = guide_legend(reverse = TRUE, nrow = 2)) +
  geom_vline(xintercept = 3.5, color = "darkgrey") +
  geom_vline(xintercept = 2.5, color = "darkgrey") +
  scale_colour_manual(values = safe_colorblind_palette) +
  labs(title = "If a COVID-19 vaccine becomes available in [country], would you take it?", 
       color = "Subgroups", x = "") +
  theme_bw(base_size = (base_font_size - 2)) + ylim(c(0,100)) +
  theme(legend.position = "bottom",
        plot.caption = element_text(hjust = 0), #Default is hjust=1
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        plot.caption.position =  "plot",
        axis.text.y = element_text(hjust = 0))

fig_1_ages

2.3.2 Invariance to aggregation at country level

Weighting so that each country (rather than each study) has equal weight has only a marginal impact on estimated LMIC average acceptance.

df_country <- 
  df %>% 
  filter(!is.na(take_vaccine_num)) %>%
  filter(!(country %in% c("USA", "Russia"))) %>%
  mutate(cluster_1 = country,
         cluster_2 = recode(country, 
                          "Pakistan 1" = "Pakistan",
                          "Pakistan 2" = "Pakistan",
                          "Sierra Leone 1" = "Sierra Leone",
                          "Sierra Leone 2" = "Sierra Leone",
                          "Uganda 1" = "Uganda",
                          "Uganda 2" = "Uganda",
                          )) %>%
  group_by(country) %>% mutate(weight_1 = weight/sum(weight)) %>% ungroup() %>%
  group_by(cluster_2) %>% mutate(weight_2 = weight_1/sum(weight_1)) %>% ungroup() 
# Inspect weights 
df_country %>% group_by(country) %>% summarize(w1 = sum(weight_1), w2 = sum(weight_2)) %>% kable

country	w1	w2
Burkina Faso	1	1.0
Colombia	1	1.0
India	1	1.0
Mozambique	1	1.0
Nepal	1	1.0
Nigeria	1	1.0
Pakistan 1	1	0.5
Pakistan 2	1	0.5
Rwanda	1	1.0
Sierra Leone 1	1	0.5
Sierra Leone 2	1	0.5
Uganda 1	1	0.5
Uganda 2	1	0.5

unit_check <- 
  list(
    study = lm_robust(data = df_country, 
              formula = take_vaccine_num ~ 1, cluster = cluster_1,
              weight = weight_1, se_type = "stata"),
    
    country = lm_robust(data = df_country, 
              formula = take_vaccine_num ~ 1, cluster = cluster_2,
              weight = weight_2, se_type = "stata")) %>%
  
  lapply(tidy) %>% bind_rows(.id = "Unit")
unit_check %>% kable(digits = 2)

Unit	term	estimate	std.error	statistic	p.value	conf.low	conf.high	df	outcome
study	(Intercept)	0.80	0.02	32.85	0	0.75	0.86	12	take_vaccine_num
country	(Intercept)	0.81	0.03	28.95	0	0.74	0.87	9	take_vaccine_num

2.3.3 Robustness keeping only national samples

nationals <- df2 %>%
  filter(country=="Burkina Faso" |
           country=="Colombia" |
           country=="Rwanda" |
           country=="Sierra Leone 1" |
           country=="Sierra Leone 2")

main_results_n <- 
  nationals %>% 
  dplyr::filter(dplyr::if_all(c(take_vaccine_num, cluster, weight), ~ !is.na(.))) %>% 
  dplyr::nest_by(group) %>%
  dplyr::summarize(
    lm_helper(data = data, 
              formula = take_vaccine_num ~ 1, cluster = cluster,
              weight = weight, se_type = "stata"),
    .groups = "drop")

# Gender
acc_by_gender_n <- grp_analysis(nationals, y = "take_vaccine_num", x = "gender")

# Education (all original categories and binary recoding)
acc_by_educ_binary_n <- grp_analysis(nationals, y = "take_vaccine_num", x = "educ_binary")

# Age (all original categories and binary recoding)
acc_by_age_n <- grp_analysis(nationals, y = "take_vaccine_num", x = "age_groups_three") %>%
  dplyr::filter(statistic!=Inf) %>%
  dplyr::filter(conf.low>0)

ans_n <- 
  dplyr::bind_rows(
    main_results_n %>% mutate(cat = "All", var = "All"),
    acc_by_gender_n %>% rename(cat = gender) %>% mutate(var = "By gender"),
    acc_by_educ_binary_n %>% rename(cat = educ_binary) %>% mutate(var = "By education"),
    acc_by_age_n %>% rename(cat = age_groups_three) %>% mutate(var = "By age")) %>%
  dplyr::mutate(across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 1))) %>%
  filter(group=="All") %>%
  mutate(group="All LMICs (National samples)",
         tag="All LMICs (National samples)")  


# Prepare df to plot. Important but ugly relevel of factors, happening all over the code

ans_n %<>% 
  # group_by(var) %>% 
  # arrange(cat) %>%
  dplyr::mutate(
    var = factor(var, levels = c("All", "By gender", "By education", "By age")),
    cat = factor(
      cat, ordered = TRUE,
      levels = rev(c(c("Female", "Male", "Up to Secondary", 
                       "> Secondary", "<25", "25-54", "55+", "All"))),
      labels = rev(c("Female", "Male", "Up to Secondary", 
                     "More than Secondary", "$< 25$", "$25-54$", "$55 +$", "All"))),
    tag = gsub(pattern = " \\(", "\\\n\\(", tag)) %>%
  bind_rows(ans)

special_cases <- 
  sort(unique(ans_n$tag)[grep(unique(ans_n$tag), pattern = "All LMICs (National samples)|All LMICs|Russia|USA")])

ans_n %<>% 
  dplyr::mutate(
    tag = 
      factor(x = tag, ordered = TRUE,
             levels = rev(c(sort(unique(tag)[!(unique(tag) %in% special_cases)]), special_cases))))

ans_national <- ans_n %>% filter(group == "All LMICs (National samples)" & cat=="All") 



fig_1_ages <- 
  ans_n %>% 
  ggplot(data = ., aes(x = tag, y = estimate, color = cat)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), 
                size = .5, width = .2, position = position_dodge(0.6)) + 
  geom_point(position = position_dodge(0.6)) +
  facet_grid(. ~ var, scales = "free_x", space = "free") + 
  coord_flip() +
  guides(color = guide_legend(reverse = TRUE, nrow = 2)) +
  geom_vline(xintercept = 4.5, color = "darkgrey") +
  geom_vline(xintercept = 3.5, color = "darkgrey") +
  geom_vline(xintercept = 2.5, color = "darkgrey") +
  scale_colour_manual(values = safe_colorblind_palette) +
  labs(title = "If a COVID-19 vaccine becomes available in [country], would you take it?", 
       color = "Subgroups", x = "") +
  theme_bw(base_size = (base_font_size - 2)) + ylim(c(0,100)) +
  theme(legend.position = "bottom",
        plot.caption = element_text(hjust = 0), #Default is hjust=1
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        plot.caption.position =  "plot",
        axis.text.y = element_text(hjust = 0))

fig_1_ages

ans_national %>% kable(digits = 2)

group	term	estimate	std.error	statistic	p.value	conf.low	conf.high	df	outcome	n	cat	var	tag
All LMICs (National samples)	(Intercept)	78.4	0.04	20.72	0	67.9	89	4	take_vaccine_num	6524	All	All	All LMICs (National samples)

2.3.4 Robustness check using leave-m-out approach

The main estimates for average LMIC acceptance do not vary strongly if we exclude one or two samples at a time and re-estimate average each time

loo_estimates <- 
  plyr::ldply(
    .data = list(1,2), 
    .fun = function(x) {
      df %>% 
        dplyr::filter(dplyr::if_all(c(take_vaccine_num, cluster, weight), ~ !is.na(.))) %>% 
        dplyr::filter(!(country %in% c("USA", "Russia"))) %>% 
        loo_helper(., "country", loo_n = x) %>% 
        dplyr::mutate(m = paste0("Leaving ", x, " out"), tag = "All", var = "All")
    })

loo_estimates <- 
  plyr::mdply(
    .data = expand_grid(x = c(1,2), var = c("gender", "educ_binary", "age_groups_three")), 
    .fun = function(x, var) {
      df2 %>% 
        dplyr::filter(dplyr::if_all(c(take_vaccine_num, cluster, weight), ~ !is.na(.))) %>% 
        dplyr::filter(group == "All") %>% 
        loo_helper(., "country", 
                   loo_n = x, 
                   loo_fun = function(dat) grp_analysis(dat, y = "take_vaccine_num", x = var)) %>% 
        dplyr::mutate(m = paste0("Leaving ", x, " out"), var = var)
    }) %>% 
  dplyr::mutate(tag = dplyr::coalesce(gender, educ_binary, age_groups_three),
                var = plyr::mapvalues(var, 
                                      from = c("gender", "educ_binary", "age_groups_three"),
                                      to = c("By gender", "By education", "By age"))) %>%
  dplyr::bind_rows(loo_estimates, .)

loo_estimates %<>% 
  dplyr::mutate(
    var = factor(var, levels = c("All", "By gender", "By education", "By age")),
    tag = factor(
      tag, ordered = TRUE,
      levels = rev(c("Female", "Male", "Up to Secondary", 
                       "> Secondary", "<25", "25-54", "55+", "All")),
      labels = rev(c("Female", "Male", "Up to Secondary", 
                     "More than Secondary", "$< 25$", "$25-54$", "$55 +$", "All")))
  ) %>% 
  dplyr::select(var, m, tag, estimate)

safe_colorblind_palette <- c("#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", 
                             "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888", 
                             "#88CCEE")

ans_loo %<>% 
  dplyr::filter(group %in% c("All", "USA", "Russia")) %>% 
  dplyr::mutate(group = plyr::mapvalues(group, from = "All", to = "All LMIC"),
                tag = cat)

fig_hist_loo <- 
  loo_estimates %>%
  dplyr::mutate(estimate = estimate*100) %>% 
  ggplot(aes(estimate, color = tag, fill = tag)) + 
  geom_histogram(aes(y = ..density..), bins = 200, 
                 position = "dodge", alpha = .3, size = .3) +
  geom_vline(data = ans_loo, 
             aes(xintercept = estimate, color = cat, linetype = group), size = .9) +
  facet_grid_paginate(var + tag ~ m)  +
  scale_color_manual(
    name = "Subgroups", 
    values = safe_colorblind_palette) + 
  scale_fill_manual(
    name = "Subgroups", 
    values = safe_colorblind_palette) + 
  scale_linetype_manual(
    name = "Sample Average", 
    values = c("solid", "11", "dashed")) + 
  scale_x_continuous(n.breaks = 8, name = "estimate") +
  scale_y_continuous(labels = scales::percent_format(scale = 1, suffix = "", accuracy = 0.1)) +
  labs(title = "",
       x = "") +
  guides(color = guide_legend(reverse = TRUE, nrow = 2, keyheight = 1),
         fill = guide_legend(reverse = TRUE, nrow = 2, keyheight = 1),
         linetype = guide_legend(keyheight = 2)) +
  theme_bw(base_size = (base_font_size - 2)) +
  theme(legend.position = "bottom")

fig_hist_loo

2.3.5 Differences in means

# Analysis of differences in means only LMICs
# Notice that Uganda 1 is dropped, because of it does not have reference categories for gender or age
# Notice that we are using df (and not df2) as data, since it does not include "All"

# Population estimate (clustering on country)

differences_means_gen_age <- 
  lapply(c("gender", "age_groups_binary", "age_groups_three"), function(i) {
    df %>% 
      dplyr::filter(country != "USA" & country != "Russia" & country != "Uganda 1") %>%
      dplyr::filter(if_all(c(all_of(i), all_of("take_vaccine_num")), ~ !is.na(.))) %>%
      study_weighting() %>%
      estimatr::lm_robust(as.formula(paste("take_vaccine_num ~", i)),
                          fixed_effects = ~country,
                          weight = weight,
                          cluster = country,
                          se_type = "stata",
                          data = .) %>% 
      tidy %>% 
      dplyr::select(estimate, std.error, p.value, df, term)
  }) %>% 
  dplyr::bind_rows(.)

differences_means_educ <- 
    df %>% 
      dplyr::filter(country != "USA" & country != "Russia") %>% 
      dplyr::filter(if_all(c(all_of("educ_binary"), all_of("take_vaccine_num")), ~ !is.na(.))) %>%
      study_weighting() %>%
      estimatr::lm_robust(
        take_vaccine_num ~educ_binary, 
        fixed_effects = ~country, 
        weight = weight, 
        cluster = country,
        se_type = "stata",
        data = .) %>% 
      tidy %>% 
      dplyr::select(estimate, std.error, p.value, df, term)


dif_gender <- 
  differences_means_gen_age %>% 
  dplyr::filter(term == "genderMale") %>% 
  pull(estimate) %>% 
  {. * 100} %>% 
  round(., 1)

dif_age <- 
  differences_means_gen_age %>% 
  dplyr::filter(term == "age_groups_binary55+") %>% 
  pull(estimate) %>% 
  {. * 100} %>% 
  round(., 1)

dif_age_three <- 
  differences_means_gen_age %>%
  dplyr::filter(term == "age_groups_three25-54" | term == "age_groups_three55+") %>%
  pull(estimate) %>% 
  {. * 100} %>% 
  round(., 1)
 
dif_educ <- 
  differences_means_educ %>% 
  dplyr::filter(term == "educ_binaryUp to Secondary") %>% 
  pull(estimate) %>% 
  {. * 100} %>% 
  round(., 1)

diffmeans <- 
  rbind(differences_means_gen_age, differences_means_educ) %>%
  dplyr::rename(Estimate = estimate,
         Std.error = std.error,
         `P-value` = p.value,
         "Degrees of freedom" = df,
         "Baseline category" = term) %>%
  dplyr::mutate(
    Variable = ifelse(`Baseline category` == "genderMale", 
                      "Gender (Female)", ""),
    Variable = ifelse(`Baseline category` == "age_groups_binary55+", 
                      "Age", Variable),
    Variable = ifelse(`Baseline category` == "educ_binaryUp to Secondary", 
                      "Education (Secondary +)", Variable),
    `Baseline category` = ifelse(`Baseline category` == "genderMale", 
                                 "Male", `Baseline category`),
    `Baseline category` = ifelse(`Baseline category` == "age_groups_binary55+", 
                                 "55+", `Baseline category`),
    `Baseline category` = ifelse(`Baseline category` == "educ_binaryUp to Secondary", 
                                 "Up to secondary", `Baseline category`)) 

dmeans <- diffmeans %>%
  filter(Variable != "") %>%
  knitr::kable(
    digits = 2,
    caption =  "Differences in means",
    format = "latex", booktabs = T, linesep = "", label = "dmeans", align = "c") %>% 
  kableExtra::kable_styling(latex_options = c("hold_position"),
    font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 9 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. p-values come from a two-sided t-test from a linear regression.",
    threeparttable = T) 

diffmeans %>%
  filter(Variable != "") %>%
  knitr::kable(
  caption =  "Differences in means",
  booktabs = T, linesep = "", label = "dmeans", digits = 2) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 8 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. The differences in means for gender and age do not include the Uganda 1 study, which only included female respondents under the age of 55.",
    threeparttable = T)

Differences in means
Estimate	Std.error	P-value	Degrees of freedom	Baseline category	Variable
0.04	0.01	0.00	10	Male	Gender (Female)
-0.01	0.01	0.59	10	55+	Age
0.02	0.03	0.38	10	Up to secondary	Education (Secondary +)
Table 8 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. The differences in means for gender and age do not include the Uganda 1 study, which only included female respondents under the age of 55.

2.3.6 Differences in means: Age with three categories

diffmeans <- 
  diffmeans %>%
  filter(Variable != "Age") %>%
  dplyr::mutate(
    Variable = ifelse(`Baseline category` == "age_groups_three25-54", 
                      "Age (25-54)", Variable),
    Variable = ifelse(`Baseline category` == "age_groups_three55+", 
                      "Age (55+)", Variable),
    `Baseline category` = ifelse(`Baseline category` == "age_groups_three25-54", 
                                 "<25", `Baseline category`),
    `Baseline category` = ifelse(`Baseline category` == "age_groups_three55+", 
                                 "<25", `Baseline category`))

dmeans <- diffmeans %>%
  filter(Variable != "") %>%
  knitr::kable(
    digits = 2,
    caption =  "Differences in means",
    format = "latex", booktabs = T, linesep = "", label = "dmeans", align = "c") %>% 
  kableExtra::kable_styling(latex_options = c("hold_position"),
    font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 9 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. p-values come from a two-sided t-test from a linear regression.",
    threeparttable = T) 

diffmeans %>%
  filter(Variable != "") %>%
  knitr::kable(
  caption =  "Differences in means",
  booktabs = T, linesep = "", label = "dmeans", digits = 2) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 8 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. The differences in means for gender and age do not include the Uganda 1 study, which only included female respondents under the age of 55.",
    threeparttable = T)

Differences in means
Estimate	Std.error	P-value	Degrees of freedom	Baseline category	Variable
0.04	0.01	0.00	10	Male	Gender (Female)
-0.02	0.02	0.43	10	<25	Age (25-54)
-0.02	0.02	0.36	10	<25	Age (55+)
0.02	0.03	0.38	10	Up to secondary	Education (Secondary +)
Table 8 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. The differences in means for gender and age do not include the Uganda 1 study, which only included female respondents under the age of 55.

2.3.7 Differences in means: by study

country_differences <-
  unique(df$country) %>%
  lapply(function(j){{
    dff <- filter(df, country == j)
    
    lapply(c("gender", "age_groups_three", "educ_binary"), function(i){
      if (length(table(dff[[i]])) < 2)  {
        return(NULL)
      } else {
        m <- estimatr::lm_robust(as.formula(paste("take_vaccine_num ~", i)),
                            weight = weight,
                            cluster = cluster,
                            se_type = "stata",
                            data = dff) 
          m %>% 
            tidy %>%
            dplyr::select(estimate, std.error, p.value, df, term) %>%
            dplyr::mutate(n = m$nobs)
        }}
    ) } %>%
      dplyr::bind_rows() %>% 
      dplyr::mutate(country = j)}) %>% 
  dplyr::bind_rows() %>% 
  dplyr::arrange(term, country) %>% 
  dplyr::relocate(country, term) %>%
  dplyr::filter(term != "(Intercept)") %>% 
  dplyr::mutate(significant = p.value <= .05) %>%
  dplyr::mutate(
    term = ifelse(term == "age_groups_three25-54", "25-54", term),
    term = ifelse(term == "age_groups_three55+", "55+", term),
    term = ifelse(term == "educ_binaryUp to Secondary", "Up to secondary", term),
    term = ifelse(term == "genderMale", "Male", term))

country_differences_summary <- 
  country_differences %>% 
  dplyr::filter(!(country %in% c("Russia", "USA"))) %>% 
  dplyr::group_by(term) %>% summarize(
    "positive " = sum(estimate > 0),
    "positive and significant" = sum(estimate > 0 & significant),
    "negative and significant" = sum(estimate < 0 & significant),
    "not significant" = sum(!significant),
    n = n()) 

t_country_differences <- country_differences %>%
  dplyr::mutate(
                baseline = ifelse(term == "Male", "Female", NA),
                baseline = ifelse(term == "Up to secondary", "Secondary +", baseline),
                baseline = ifelse(is.na(baseline), "<25", baseline),
                group = ifelse(baseline == "<25", "Age", NA),
                group = ifelse(baseline == "Secondary +", "Education", group),
                group = ifelse(baseline == "Female", "Gender", group),
                estimate = round(estimate, 2),
                std.error = round(std.error, 2),
                p.value = round(p.value, 2)) %>%
  dplyr::select(country, group, baseline, term, everything(), -significant)

t_country <- t_country_differences %>%
  kable(digits = 2, 
        col.names = c("Country", "Variable", "Baseline category", "Group", "Estimate", "Std. Error", "P-value", "Degrees of freedom", "N Obs"),
        caption = "Differences between groups within studies", 
        booktabs = TRUE, linesep = "", 
        format.args = list(big.mark = ",", scientific = FALSE),
  format = "latex", label = "countrydiff") %>% 
  kableExtra::kable_styling(latex_options = c("scale_down"), font_size = base_font_size - 2, full_width = FALSE) %>% 
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "6em") %>% 
  kableExtra::column_spec(2:3, width = "6em") %>% 
  kableExtra::column_spec(4, width = "9em")  %>% 
  kableExtra::column_spec(5:8, width = "6em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 11 shows differences of means between groups within single studies. Estimates are calculated through OLS and represent the difference in the average acceptance rate between the subgroup in column Group and that in column Baseline category.",
    threeparttable = T)


knitr::kable(t_country_differences, 
             digits = 2, caption = "Differences between groups within studies ")

Differences between groups within studies
country	group	baseline	term	estimate	std.error	p.value	df	n
Burkina Faso	Age	<25	25-54	-0.13	0.10	0.21	119	120
Colombia	Age	<25	25-54	-0.01	0.04	0.79	689	690
India	Age	<25	25-54	0.08	0.03	0.01	141	1680
Mozambique	Age	<25	25-54	-0.12	0.01	0.00	162	860
Nepal	Age	<25	25-54	-0.01	0.01	0.32	89	1324
Nigeria	Age	<25	25-54	0.09	0.03	0.01	1867	1868
Pakistan 1	Age	<25	25-54	-0.11	0.04	0.00	105	1633
Russia	Age	<25	25-54	-0.06	0.02	0.01	22124	22125
Rwanda	Age	<25	25-54	-0.04	0.02	0.03	1354	1355
Sierra Leone 1	Age	<25	25-54	0.00	0.03	0.94	1069	1070
Sierra Leone 2	Age	<25	25-54	0.05	0.04	0.30	190	2087
Uganda 1	Age	<25	25-54	0.00	0.02	0.83	497	3198
Uganda 2	Age	<25	25-54	0.00	0.03	0.89	309	1366
USA	Age	<25	25-54	0.14	0.04	0.00	1958	1959
Burkina Faso	Age	<25	55+	-0.15	0.24	0.53	119	120
Colombia	Age	<25	55+	-0.02	0.06	0.79	689	690
India	Age	<25	55+	0.05	0.04	0.19	141	1680
Mozambique	Age	<25	55+	-0.08	0.02	0.00	162	860
Nepal	Age	<25	55+	-0.04	0.02	0.06	89	1324
Nigeria	Age	<25	55+	0.06	0.05	0.28	1867	1868
Pakistan 1	Age	<25	55+	-0.06	0.07	0.45	105	1633
Russia	Age	<25	55+	0.07	0.03	0.03	22124	22125
Rwanda	Age	<25	55+	-0.15	0.07	0.04	1354	1355
Sierra Leone 1	Age	<25	55+	-0.04	0.07	0.56	1069	1070
Sierra Leone 2	Age	<25	55+	0.07	0.05	0.12	190	2087
Uganda 2	Age	<25	55+	-0.03	0.04	0.47	309	1366
USA	Age	<25	55+	0.18	0.04	0.00	1958	1959
Burkina Faso	Education	Secondary +	Up to secondary	0.09	0.03	0.00	976	977
Colombia	Education	Secondary +	Up to secondary	-0.05	0.03	0.10	1010	1011
India	Education	Secondary +	Up to secondary	-0.02	0.04	0.59	100	340
Mozambique	Education	Secondary +	Up to secondary	0.04	0.03	0.17	160	828
Pakistan 1	Education	Secondary +	Up to secondary	-0.10	0.04	0.01	105	1621
Pakistan 2	Education	Secondary +	Up to secondary	-0.07	0.03	0.00	1491	1492
Russia	Education	Secondary +	Up to secondary	-0.01	0.01	0.31	22124	22125
Rwanda	Education	Secondary +	Up to secondary	0.16	0.03	0.00	1354	1355
Sierra Leone 1	Education	Secondary +	Up to secondary	0.06	0.03	0.03	1037	1038
Sierra Leone 2	Education	Secondary +	Up to secondary	-0.01	0.02	0.63	190	2110
Uganda 1	Education	Secondary +	Up to secondary	0.05	0.03	0.12	494	2739
Uganda 2	Education	Secondary +	Up to secondary	0.11	0.03	0.00	309	1366
USA	Education	Secondary +	Up to secondary	-0.21	0.03	0.00	1958	1959
Burkina Faso	Gender	Female	Male	0.06	0.03	0.06	976	977
Colombia	Gender	Female	Male	0.04	0.03	0.18	1011	1012
India	Gender	Female	Male	0.02	0.02	0.22	141	1680
Mozambique	Gender	Female	Male	0.05	0.02	0.02	162	862
Nepal	Gender	Female	Male	0.00	0.01	0.98	89	1324
Nigeria	Gender	Female	Male	0.02	0.02	0.30	1867	1868
Pakistan 1	Gender	Female	Male	0.08	0.02	0.00	105	1629
Russia	Gender	Female	Male	0.16	0.01	0.00	22124	22125
Rwanda	Gender	Female	Male	0.09	0.02	0.00	1354	1355
Sierra Leone 1	Gender	Female	Male	0.06	0.03	0.03	1069	1070
Sierra Leone 2	Gender	Female	Male	-0.01	0.02	0.56	190	2110
Uganda 2	Gender	Female	Male	0.03	0.02	0.17	309	1366
USA	Gender	Female	Male	0.17	0.03	0.00	1958	1959

knitr::kable(country_differences_summary, 
             digits = 2, caption = "Differences between groups within studies (Summary)")

Differences between groups within studies (Summary)
term	positive	positive and significant	negative and significant	not significant	n
25-54	6	2	3	7	12
55+	3	0	2	9	11
Male	9	4	0	7	11
Up to secondary	6	4	2	5	11

2.3.8 Metaplus analysis

# Metaplus (variance between countries, included in text)
mp <- metaplus::metaplus(
  yi  = main_results %>% filter(!(group %in% c("USA", "Russia", "All"))) %>% pull(estimate),
  sei = main_results %>% filter(!(group %in% c("USA", "Russia", "All"))) %>% pull(std.error))
mp_tau <- mp$results[2,1]
mp_ratio <- sqrt(mp$results[2,1])/mp$results[1,1]

mp$results %>% kable(caption = "Metaplus  results")

Metaplus results
	Est.	95% ci.lb	95% ci.ub	pvalue
muhat	0.8033208	0.7532172	0.8530296	0
tau2	0.0070689	.	.	.

2.3.9 Generate quantities used in text

# Mean of LMICs
ans_mean <- 
  ans %>% 
  dplyr::filter(cat == "All", group == "All") %>% 
  pull(estimate)

# Lowest and highest bounds for All mean estimate
ans_mean_low <- 
  ans %>% 
  dplyr::filter(cat == "All", group == "All") %>% 
  pull(conf.low)

ans_mean_high <-  
  ans %>% 
  dplyr::filter(cat == "All", group == "All") %>% 
  pull(conf.high)

# df of LMICs (no USA, Russia or All)
ans_stats <- 
  ans %>% 
  dplyr::filter(cat == "All", !(group %in% c("USA", "Russia", "All")))

# Median of LMICs
ans_median <- median(ans_stats$estimate)

# Interquartile range of LMIcs
ans_iqr <- IQR(ans_stats$estimate)

# Smallest and biggest acceptability of LMICs
ans_min <- 
  ans %>% 
  dplyr::filter(cat == "All" & group != "Russia" & group != "USA") %>% 
  pull(estimate) %>% min

ans_max <- 
  ans %>% 
  dplyr::filter(cat == "All" & group != "Russia" & group != "USA") %>% 
  pull(estimate) %>% max

# Rank by estimate
top_all <- 
  ans %>%
  dplyr::filter(cat == "All") %>% 
  dplyr::arrange(desc(estimate))

bottom_all <- 
  ans %>%
  dplyr::filter(cat == "All" & group != "Russia" & group != "USA") %>% 
  dplyr::arrange(estimate)

# USA and Russia estimate acceptability
usa_ans <- 
  ans %>%
  dplyr::filter(cat == "All" & group == "USA") 

rus_ans <- 
  ans %>%
  dplyr::filter(cat == "All" & group == "Russia")

2.3.10 Table version of Figure 1

# Here we are making "percentages" from the estimates and putting them together with confidence intervals
# Also we are going from long to wide

main_table <- 
  main_results %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), list("_main" = ~ format(round(100 * ., 1), nsmall = 1))),
    conf_int__main = paste0("(", conf.low__main, ", ", conf.high__main, ")")) %>%
  dplyr::select(group, estimate__main, conf_int__main)


gender_table <- 
  acc_by_gender %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, gender, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = gender, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")

educ_table <- 
  acc_by_educ_binary %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, educ_binary, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = educ_binary, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")


age_table <- 
  acc_by_age %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, age_groups_three, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = age_groups_three, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")

main_table_n <- 
  main_results_n %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), list("_main" = ~ format(round(100 * ., 1), nsmall = 1))),
    conf_int__main = paste0("(", conf.low__main, ", ", conf.high__main, ")")) %>%
  dplyr::select(group, estimate__main, conf_int__main) 


gender_table_n <- 
  acc_by_gender_n %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, gender, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = gender, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")

educ_table_n <- 
  acc_by_educ_binary_n %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, educ_binary, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = educ_binary, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")


age_table_n <- 
  acc_by_age_n %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, age_groups_three, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = age_groups_three, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")

all_tables_n <- 
  main_table_n %>% 
  dplyr::left_join(gender_table_n) %>%
  dplyr::left_join(educ_table_n) %>% 
  dplyr::left_join(age_table_n) %>%
  dplyr::filter(group=="All") %>%
  dplyr::mutate(group="All LMICs (National)")

all_tables <- 
  main_table %>% 
  dplyr::left_join(gender_table) %>%
  dplyr::left_join(educ_table) %>% 
  dplyr::left_join(age_table) %>%
  dplyr::bind_rows(all_tables_n) %>%
  dplyr::select(group, everything()) %>%
  dplyr::mutate(group = as.factor(group),
                group = forcats::fct_relevel(group, "All", "All LMICs (National)", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__")),
                      names_to = c("type", ".value"),
                      names_pattern = "(.*)__(.*)") %>% 
  dplyr::mutate(group = ifelse(type == "conf_int", "", as.character(group)),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(-type) %>% 
  dplyr::rename("Country" = "group", 
                "Average acceptability" = "main")



tab_fig1 <- 
  all_tables %>%
  knitr::kable(
    all_tables,
    caption = "If a COVID-19 vaccine becomes available in [country], would you take it? Disaggregated by subgroups", 
    format = "latex",  booktabs = T, linesep = "", 
    format.args = list(big.mark = ",", scientific = FALSE), 
    escape = F, align = "lcccccccc", label = "maintabledis")  %>% 
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"),
                            font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::add_header_above(c("", "", "Gender" = 2, "Education" = 2, "Age" = 3), bold = TRUE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 4 shows percentage of respondents willing to take the COVID-19 vaccine as plotted in Figure 1. A 95% confidence interval is shown between parentheses",
    threeparttable = T) 


all_tables %>%
  knitr::kable(
    caption = "If a COVID-19 vaccine becomes available in [country], would you take it? Disaggregated by subgroups", 
    align = "lcccccccc", booktabs = T) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::add_header_above(c("", "", "Gender" = 2, "Education" = 2, "Age" = 3), bold = TRUE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 4 shows percentage of respondents willing to take the COVID-19 vaccine as plotted in Figure 1. A 95% confidence interval is shown between parentheses",
    threeparttable = T)

If a COVID-19 vaccine becomes available in [country], would you take it? Disaggregated by subgroups
		Gender		Education		Age
Country	Average acceptability	Female	Male	> Secondary	Up to Secondary	<25	25-54	55+
Burkina Faso	66.5	62.1	68.4	60.8	70.1	76.0	63.2	.
	(63.5, 69.5)	(56.3, 67.9)	(65.0, 71.9)	(55.9, 65.8)	(66.4, 73.8)	(58.0, 94.0)	(53.0, 73.4)	.
Colombia	74.9	73.5	77.3	78.1	73.4	75.4	74.2	73.8
	(72.2, 77.6)	(70.1, 77.0)	(73.0, 81.7)	(73.6, 82.5)	(70.1, 76.8)	(67.5, 83.4)	(70.2, 78.3)	(65.0, 82.6)
India	84.3	82.4	84.7	87.8	85.9	77.6	85.4	83.1
	(82.3, 86.3)	(79.0, 85.8)	(82.5, 87.0)	(81.1, 94.6)	(82.5, 89.2)	(71.6, 83.6)	(83.4, 87.3)	(77.6, 88.5)
Mozambique	89.1	86.2	91.3	86.1	89.7	.	88.3	91.7
	(86.5, 91.7)	(82.5, 90.0)	(88.4, 94.1)	(81.8, 90.4)	(86.5, 92.8)	.	(85.3, 91.2)	(88.1, 95.4)
Nepal	96.6	96.4	96.4	.	.	97.8	96.6	93.8
	(95.5, 97.6)	(94.6, 98.2)	(95.1, 97.7)	.	.	(95.7, 99.8)	(95.2, 97.9)	(90.4, 97.2)
Nigeria	76.2	74.9	77.0	.	.	69.0	77.6	74.7
	(74.3, 78.2)	(71.7, 78.1)	(74.6, 79.4)	.	.	(63.3, 74.7)	(75.5, 79.7)	(65.8, 83.6)
Pakistan 1	76.1	72.2	80.1	83.6	74.0	86.3	75.6	80.8
	(70.0, 82.3)	(65.6, 78.8)	(73.8, 86.4)	(76.6, 90.5)	(67.4, 80.5)	(78.4, 94.1)	(69.3, 81.8)	(64.9, 96.7)
Pakistan 2	66.5	.	.	71.4	64.2	.	.	.
	(64.1, 68.9)	.	.	(67.3, 75.5)	(61.2, 67.1)	.	.	.
Rwanda	84.9	79.4	88.0	71.4	87.7	88.1	83.8	73.3
	(82.9, 86.8)	(75.8, 83.0)	(85.8, 90.2)	(65.5, 77.2)	(85.8, 89.7)	(85.0, 91.1)	(81.3, 86.3)	(59.2, 87.3)
Sierra Leone 1	78.0	74.1	80.1	74.4	80.2	78.0	78.3	74.0
	(75.5, 80.5)	(69.5, 78.7)	(77.2, 83.1)	(70.1, 78.7)	(77.0, 83.3)	(72.4, 83.6)	(75.4, 81.1)	(61.4, 86.6)
Sierra Leone 2	87.9	88.6	87.7	88.8	87.8	82.9	87.6	90.0
	(86.2, 89.6)	(85.7, 91.5)	(85.9, 89.5)	(85.0, 92.5)	(86.0, 89.6)	(73.9, 91.9)	(85.6, 89.5)	(87.4, 92.6)
Uganda 1	85.8	85.8	.	80.1	84.8	85.5	85.9	.
	(84.4, 87.2)	(84.4, 87.2)	.	(74.4, 85.9)	(83.2, 86.5)	(82.7, 88.4)	(84.3, 87.4)	.
Uganda 2	76.5	74.9	78.0	68.6	79.8	76.5	77.0	73.7
	(74.3, 78.7)	(71.5, 78.3)	(75.2, 80.9)	(64.3, 72.9)	(77.3, 82.2)	(71.0, 82.1)	(74.4, 79.6)	(67.3, 80.0)
All LMICs	80.3	79.2	82.6	77.4	79.8	82.8	81.1	79.1
	(74.9, 85.6)	(73.4, 85.0)	(77.4, 87.9)	(71.4, 83.4)	(74.1, 85.4)	(76.9, 88.7)	(75.6, 86.6)	(72.5, 85.7)
All LMICs (National)	78.4	75.5	80.3	74.7	79.8	80.1	77.4	74.4
	(67.9, 89.0)	(63.6, 87.5)	(70.2, 90.4)	(62.1, 87.3)	(69.8, 89.9)	(73.4, 86.7)	(65.7, 89.2)	(61.7, 87.2)
Russia	30.4	22.6	38.5	31.0	29.6	33.5	27.6	40.0
	(29.1, 31.7)	(20.9, 24.2)	(36.5, 40.5)	(29.6, 32.5)	(27.3, 32.0)	(29.2, 37.7)	(26.2, 28.9)	(35.9, 44.0)
USA	64.6	56.1	73.4	72.3	51.5	51.0	64.9	69.4
	(61.8, 67.3)	(52.1, 60.1)	(69.8, 76.9)	(69.5, 75.0)	(46.0, 57.0)	(43.5, 58.6)	(61.1, 68.7)	(64.8, 73.9)
Table 4 shows percentage of respondents willing to take the COVID-19 vaccine as plotted in Figure 1. A 95% confidence interval is shown between parentheses

2.4 Reasons to take or not to take

2.4.1 Table: Reasons to take the vaccine.

#There are idiosyncratic reasons why people would take the vaccine. I recoded them. But we keep only the core, which is common almost in all studies.
yes_vars <- 
  df2 %>% 
  dplyr::select(yes_vaccine_1, yes_vaccine_2, yes_vaccine_3) %>% 
  names

## Generate data for analysis of yes reasons
yes_vacc1 <- 
  lapply(yes_vars, reasons_together, df = df2, num = "Yes") %>%
  dplyr::bind_rows() %>%
  dplyr::mutate(across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 0)))

#Get percentage per yes reason category and make wide table
yes_vacc2 <- 
  yes_vacc1 %>%
  dplyr::mutate(estimate = format(estimate, nsmall = 0),
                conf_int = paste0("(", conf.low, 
                                  ", ", conf.high, ")")) %>%
  dplyr::select(group, estimate, conf_int, outcome, n) %>%
  tidyr::pivot_wider(names_from = outcome, values_from = c(estimate, conf_int, n), names_sep = "__") %>%
  tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__")),
                      names_to = c("type", ".value"),
                      names_pattern = "(.*)__(.*)") %>%
  dplyr::rowwise() %>% 
  dplyr::mutate(
    n = ifelse(group == "All", NA, unique(na.omit(c_across(starts_with("n__")))))) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(
    group = forcats::fct_relevel(as.factor(group), "All", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  dplyr::mutate(across(c(group, n), ~ifelse(type == "conf_int", "", as.character(.))),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(group, n, type, starts_with("yes_vaccine_"), -starts_with("n_yes_vaccine"), -type)

cnames <- c("Study", "N", "Self", "Family", 
          "Community")


#Table to Latex
tab_reasons_y <- 
  yes_vacc2 %>%
  knitr::kable(col.names = cnames,
      caption = "Reasons to take the vaccine", format = "latex", booktabs = T, 
      linesep = "", label = "yes", 
      format.args = list(big.mark = ",", scientific = FALSE), 
      escape = F, align = "lcccc") %>%
  kableExtra::kable_styling(full_width = FALSE, 
                            font_size = base_font_size - 2) %>%
  kableExtra::add_header_above(c(" " = 2, "Protection" = 3), bold = TRUE) %>% 
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "8em") %>% 
  kableExtra::column_spec(2:4, width = "4em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 3 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage corresponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses. Studies India, Pakistan 1 and Pakistan 2 are not included because they either did not include the question or were not properly harmonized with the other studies.",
    threeparttable = T) 

yes_vacc2 %>%
  knitr::kable(col.names = cnames,
      caption = "\\label{yes}Reasons to take the vaccine", 
      booktabs = T, linesep = "", 
      format.args = list(big.mark = ",", scientific = FALSE)) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::add_header_above(c(" " = 2, "Protection" = 3), bold = TRUE) %>% 
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "8em") %>% 
  kableExtra::column_spec(2:5, width = "8em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 2 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses. Studies India, Pakistan 1 and Pakistan 2 are not included because they either did not include the question or were not properly harmonized with the other studies.",
    threeparttable = T)

Reasons to take the vaccine
		Protection
Study	N	Self	Family	Community
Burkina Faso	651	76	42	7
		(73, 79)	(38, 46)	(5, 9)
Colombia	756	91	23	12
		(88, 93)	(20, 26)	(10, 14)
Mozambique	768	83	32	4
		(80, 86)	(27, 38)	(2, 5)
Nepal	1341	96	34	20
		(95, 98)	(32, 37)	(17, 22)
Nigeria	1424	89	35	21
		(88, 91)	(33, 38)	(19, 23)
Rwanda	1152	98	26	11
		(97, 99)	(23, 28)	(9, 13)
Sierra Leone 1	836	94	37	21
		(92, 96)	(34, 40)	(18, 23)
Sierra Leone 2	1855	91	62	21
		(88, 93)	(57, 66)	(16, 27)
Uganda 1	2885	96	36	9
		(95, 97)	(34, 38)	(8, 10)
Uganda 2	1045	96	28	11
		(95, 97)	(25, 31)	(9, 12)
All LMICs	.	91	36	14
		(86, 96)	(28, 43)	(9, 18)
Russia	5887	76	69	41
		(74, 78)	(67, 71)	(38, 43)
USA	1313	94	92	89
		(92, 95)	(90, 94)	(87, 91)
Table 2 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses. Studies India, Pakistan 1 and Pakistan 2 are not included because they either did not include the question or were not properly harmonized with the other studies.

2.4.2 Calculate numbers used in text for acceptance

# All LMIcs estimate of self protection
yes_all <- 
  yes_vacc1 %>% 
  dplyr::filter(group == "All" & outcome == "yes_vaccine_1")

# Top and bottom LMICs countries for category self protection
yes_ <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group != "USA" & group != "Russia" & outcome == "yes_vaccine_1") 

yes_top <- yes_ %>% dplyr::arrange(desc(estimate))
yes_bottom <- yes_ %>% dplyr::arrange(estimate)

# All LMICs estimate of protection of family
yes_all_2 <- 
  yes_vacc1 %>% 
  dplyr::filter(group == "All" & outcome == "yes_vaccine_2")

# Top and bottom LMICs countries for category protection of family
yes_2 <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group != "USA" & group != "Russia" & outcome == "yes_vaccine_2") 

yes2_top <- yes_2 %>% dplyr::arrange(desc(estimate))
yes2_bottom <- yes_2 %>% dplyr::arrange(estimate)

# Estimate of self protection for Russia and the US
yes_usa <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group == "USA" & outcome == "yes_vaccine_1") 

yes_rus <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group == "Russia" & outcome == "yes_vaccine_1") 

# Estimate of protection of family for Russia and the US
yes2_usa <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group == "USA" & outcome == "yes_vaccine_2")

yes2_rus <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group == "Russia" & outcome == "yes_vaccine_2")

yes_vars <- 
  df2 %>% 
  dplyr::select(yes_vaccine_1, yes_vaccine_2, 
                yes_vaccine_3, yes_vaccine_4, 
                yes_vaccine_5, yes_vaccine_666) %>% 
  names

## Generate data for analysis of yes reasons
yes_vacc1 <- 
  lapply(yes_vars, reasons_together, df = df2, num = "Yes") %>%
  dplyr::bind_rows() %>%
  dplyr::mutate(across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 0)))


#Get percentage per yes reason category and make wide table
yes_vacc2 <- 
  yes_vacc1 %>%
  dplyr::mutate(estimate = format(estimate, nsmall = 0),
                conf_int = paste0("(", conf.low, 
                                  ", ", conf.high, ")")) %>%
  dplyr::select(group, estimate, conf_int, outcome, n) %>%
  tidyr::pivot_wider(names_from = outcome, values_from = c(estimate, conf_int, n), names_sep = "__") %>%
  tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__")),
                      names_to = c("type", ".value"),
                      names_pattern = "(.*)__(.*)") %>%
  dplyr::rowwise() %>% 
  dplyr::mutate(
    n = ifelse(group == "All", NA, unique(na.omit(c_across(starts_with("n__")))))) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(
    group = forcats::fct_relevel(as.factor(group), "All", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  dplyr::mutate(across(c(group, n), ~ifelse(type == "conf_int", "", as.character(.))),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(group, n, type, starts_with("yes_vaccine_"), -starts_with("n_yes_vaccine"), -type)

cnames <- c("Study", "N", "Self", "Family", 
          "Community", "Health workers", "Government", "Other")

#Table to Latex
tab_reasons_y_all <- 
  yes_vacc2  %>% 
  knitr::kable(col.names = cnames,
        caption = "Reasons to take the vaccine- all categories", 
        format = "latex", booktabs = T, linesep = "", 
        label = "yesall", 
        format.args = list(big.mark = ",", scientific = FALSE), 
        escape = F, align = c("l", rep("c", 7))) %>%
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"),
                            font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::add_header_above(c(" " = 2, "Protection" = 3, "If recommended by" = 2, " " = 1), 
                               bold = TRUE) %>% 
  kableExtra::column_spec(1:8, width = "7em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 5 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses",
    threeparttable = T) 

yes_vacc2 %>%
  knitr::kable(col.names = cnames,
               caption = "Reasons to take the vaccine. All categories", 
               booktabs = T, linesep = "", align = c("l", rep("c", 7)),
               format.args = list(big.mark = ",", scientific = FALSE)) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::add_header_above(c(" " = 2, "Protection" = 3, "If recommended by" = 2, " " = 1), 
                               bold = TRUE) %>% 
  kableExtra::column_spec(1:8, width = "7em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 5 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses",
    threeparttable = T)

Reasons to take the vaccine. All categories
		Protection			If recommended by
Study	N	Self	Family	Community	Health workers	Government	Other
Burkina Faso	651	76	42	7	6	19	2
		(73, 79)	(38, 46)	(5, 9)	(4, 8)	(16, 22)	(1, 3)
Colombia	756	91	23	12	1	2	6
		(88, 93)	(20, 26)	(10, 14)	(0, 2)	(1, 3)	(4, 7)
Mozambique	768	83	32	4	.	7	3
		(80, 86)	(27, 38)	(2, 5)	.	(5, 8)	(2, 4)
Nepal	1341	96	34	20	2	3	7
		(95, 98)	(32, 37)	(17, 22)	(1, 2)	(2, 4)	(5, 9)
Nigeria	1424	89	35	21	.	6	4
		(88, 91)	(33, 38)	(19, 23)	.	(4, 7)	(3, 5)
Rwanda	1152	98	26	11	1	5	1
		(97, 99)	(23, 28)	(9, 13)	(0, 1)	(4, 6)	(1, 2)
Sierra Leone 1	836	94	37	21	12	23	7
		(92, 96)	(34, 40)	(18, 23)	(10, 14)	(20, 25)	(5, 9)
Sierra Leone 2	1855	91	62	21	59	.	16
		(88, 93)	(57, 66)	(16, 27)	(54, 63)	.	(11, 21)
Uganda 1	2885	96	36	9	.	10	6
		(95, 97)	(34, 38)	(8, 10)	.	(9, 12)	(5, 7)
Uganda 2	1045	96	28	11	1	15	2
		(95, 97)	(25, 31)	(9, 12)	(1, 2)	(13, 17)	(1, 3)
All LMICs	.	91	36	14	12	10	5
		(86, 96)	(28, 43)	(9, 18)	(-8, 31)	(4, 16)	(2, 8)
Russia	5887	76	69	41	11	6	18
		(74, 78)	(67, 71)	(38, 43)	(10, 13)	(5, 7)	(16, 20)
USA	1313	94	92	89	.	67	.
		(92, 95)	(90, 94)	(87, 91)	.	(64, 70)	.
Table 5 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses

2.5 Reasons to take by age

yes_vars <- 
  df2 %>% 
  dplyr::select(yes_vaccine_1, yes_vaccine_2, yes_vaccine_3) %>% 
  names

## Generate data for analysis of yes reasons for different age groups
yes_vacc_age_1 <- 
  lapply(yes_vars, age_analysis, df = df2, num = "Yes", filter_by=`age_less24`) %>%
  dplyr::bind_rows() %>%
  mutate(age="<25")

yes_vacc_age_2 <- 
  lapply(yes_vars, age_analysis, df = df2, num = "Yes", filter_by=`age_25_54`) %>%
  dplyr::bind_rows() %>%
  mutate(age="25-54")

yes_vacc_age_3 <- 
  lapply(yes_vars, age_analysis, df = df2, num = "Yes", filter_by=`age_55_more`) %>%
  dplyr::bind_rows() %>%
  mutate(age="55+")

#Get percentage per yes reason category and make wide table
yes_vacc_age <- 
  rbind(yes_vacc_age_1, yes_vacc_age_2, yes_vacc_age_3) %>%
  dplyr::mutate(across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 0))) %>%
  dplyr::mutate(estimate = format(estimate, nsmall = 0),
                conf_int = paste0("(", conf.low, 
                                  ", ", conf.high, ")"),
                n=as.character(n)) %>%
  dplyr::select(group, estimate, conf_int, outcome, age, n) %>%
  tidyr::pivot_wider(names_from = c(outcome, age), values_from = c(estimate, conf_int, n), names_sep = "__") %>%
  tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__"), starts_with("n__")),
                      names_to = c("type", ".value"),
                      names_pattern = "(.*)__(.*)")

y1 <- yes_vacc_age %>%
  dplyr::filter(grepl("yes_vaccine_1", type)) %>%
  mutate(type=ifelse(grepl("estimate", type), "estimate", type),
         type=ifelse(grepl("conf_int", type), "conf_int", type),
         type=ifelse(grepl("n__", type), "n", type))


y2 <- yes_vacc_age %>%
  dplyr::filter(grepl("yes_vaccine_2", type))%>%
  mutate(type=ifelse(grepl("estimate", type), "estimate", type),
         type=ifelse(grepl("conf_int", type), "conf_int", type),
         type=ifelse(grepl("n__", type), "n", type))

y3 <- yes_vacc_age %>%
  dplyr::filter(grepl("yes_vaccine_3", type))%>%
  mutate(type=ifelse(grepl("estimate", type), "estimate", type),
         type=ifelse(grepl("conf_int", type), "conf_int", type),
         type=ifelse(grepl("n__", type), "n", type))

yes_vacc_age <- left_join(y1, y2, by=c("group", "type")) %>%
  left_join(y3, by=c("group", "type")) %>% 
  dplyr::mutate(
    group = forcats::fct_relevel(as.factor(group), "All", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  dplyr::mutate(across(c(group), ~ifelse(type == "conf_int", "Conf. interval", as.character(.))),
                across(c(group), ~ifelse(type == "n", "n", as.character(.))),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(-type)

cnames <- c("Study", "<25", "25-54", "55+", "<25", "25-54", "55+", "<25", "25-54", "55+")


#Table to Latex
tab_reasons_y_age <- 
  yes_vacc_age %>%
  knitr::kable(col.names = cnames,
      caption = "Reasons to take the vaccine- by age groups", format = "latex", booktabs = T, 
      linesep = "", label = "yes1", 
      format.args = list(big.mark = ",", scientific = FALSE), 
      escape = F, align = c("l", rep("c", 9))) %>%
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"),
                            font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::add_header_above(c(" " = 1, "Self" = 3, "Family" = 3, "Community" = 3), bold = TRUE) %>% 
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::row_spec(seq(from = 3, to = 36, by = 3), hline_after = TRUE) %>%
  kableExtra::column_spec(1, width = "7em") %>% 
  kableExtra::column_spec(2:10, width = "5em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 6 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine by age groups. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses",
    threeparttable = T) 


yes_vacc_age %>%
  knitr::kable(col.names = cnames,
      caption = "\\label{yes1}Reasons to take the vaccine", 
      booktabs = T, linesep = "", 
      format.args = list(big.mark = ",", scientific = FALSE)) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::add_header_above(c(" " = 1, "Self" = 3, "Family" = 3, "Community" = 3), bold = TRUE) %>% 
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "8em") %>% 
  kableExtra::column_spec(2:10, width = "4em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 2 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses. Studies India, Pakistan 1 and Pakistan 2 are not included because they either did not include the question or were not properly harmonized with the other studies.",
    threeparttable = T)

Reasons to take the vaccine
	Self			Family			Community
Study	<25	25-54	55+	<25	25-54	55+	<25	25-54	55+
Burkina Faso	77	59	100	26	64	66	11	2	0
Conf. interval	(56, 99)	(46, 72)	(100, 100)	(4, 48)	(51, 77)	(-80, 211)	(-5, 26)	(-2, 5)	(0, 0)
n	19	57	3	19	57	3	19	57	3
Colombia	91	91	90	26	26	16	12	13	14
Conf. interval	(86, 97)	(88, 94)	(83, 97)	(17, 35)	(21, 31)	(8, 25)	(4, 20)	(9, 16)	(6, 22)
n	90	349	73	90	349	73	90	349	73
Mozambique	62	84	80	50	32	34	12	4	2
Conf. interval	(19, 106)	(81, 87)	(75, 86)	(5, 95)	(26, 38)	(27, 41)	(-17, 42)	(2, 6)	(0, 4)
n	8	571	188	8	571	188	8	571	188
Nepal	97	97	92	31	36	27	15	20	19
Conf. interval	(94, 100)	(96, 98)	(87, 97)	(25, 37)	(33, 39)	(19, 36)	(10, 20)	(17, 23)	(13, 25)
n	225	890	162	225	890	162	225	890	162
Nigeria	91	89	94	31	36	31	22	21	21
Conf. interval	(87, 95)	(87, 91)	(89, 100)	(25, 38)	(33, 39)	(20, 42)	(16, 29)	(18, 23)	(11, 31)
n	178	1175	71	178	1175	71	178	1175	71
Rwanda	98	98	100	22	28	29	10	11	10
Conf. interval	(97, 100)	(97, 99)	(100, 100)	(17, 26)	(24, 31)	(12, 46)	(7, 13)	(9, 14)	(-1, 21)
n	389	732	31	389	732	31	389	732	31
Sierra Leone 1	96	94	94	36	38	27	24	20	22
Conf. interval	(93, 99)	(92, 95)	(86, 102)	(29, 44)	(34, 41)	(12, 42)	(17, 31)	(16, 23)	(8, 36)
n	167	632	37	167	632	37	167	632	37
Sierra Leone 2	87	90	93	52	62	62	29	22	18
Conf. interval	(78, 97)	(88, 92)	(89, 97)	(39, 66)	(58, 67)	(56, 67)	(16, 42)	(16, 28)	(12, 25)
n	63	1376	396	63	1376	396	63	1376	396
Uganda 1	96	96	.	34	36	.	9	9	.
Conf. interval	(95, 98)	(96, 97)	.	(30, 39)	(34, 39)	.	(6, 11)	(8, 11)	.
n	526	2218	.	526	2218	.	526	2218	.
Uganda 2	97	96	97	20	30	28	8	11	13
Conf. interval	(94, 99)	(95, 97)	(94, 100)	(14, 26)	(27, 33)	(21, 36)	(4, 11)	(9, 13)	(7, 19)
n	173	749	123	173	749	123	173	749	123
All LMICs	89	89	93	33	39	36	15	13	13
Conf. interval	(81, 97)	(81, 98)	(89, 98)	(25, 41)	(29, 48)	(23, 48)	(10, 20)	(8, 18)	(7, 19)
n	1838	8749	1084	1838	8749	1084	1838	8749	1084
Russia	67	76	81	74	68	68	46	40	38
Conf. interval	(59, 74)	(73, 78)	(76, 87)	(68, 81)	(66, 71)	(62, 74)	(38, 54)	(38, 43)	(32, 44)
n	552	5108	227	552	5108	227	552	5108	227
USA	92	91	97	89	91	94	90	89	89
Conf. interval	(88, 96)	(89, 94)	(95, 99)	(83, 95)	(88, 93)	(91, 97)	(85, 95)	(86, 92)	(85, 93)
n	153	687	473	153	687	473	153	687	473
Table 2 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses. Studies India, Pakistan 1 and Pakistan 2 are not included because they either did not include the question or were not properly harmonized with the other studies.

2.6 Figure 2: reasons not to take

2.6.1 Figure

#Import df with tags (names of categories)
dictionary <- read_excel("2_input_data/dictionary.xlsx")

#Select core variables to be included in figure
no_vars <- 
  df2 %>% 
  dplyr::select(starts_with("no_vaccine_")) %>% 
  names

#Create estimates of % of each reason, re-level factors and get and categorize number of observations by subgroup
no_vacc <- 
  lapply(no_vars, reasons_together, df = df2, num = c("No", "DK")) %>%
  dplyr::bind_rows() %>% 
  dplyr::arrange(outcome) %>% 
  dplyr::mutate(
    across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 1)),
    n_sub = round(n * estimate, 0),
    n_sub = ifelse(n_sub == 0, NA_integer_, n_sub),
    group = 
      factor(group, 
             levels = rev(c("Burkina Faso", "Colombia", "Mozambique", 
                            "Nepal", "Nigeria", "Pakistan 1", "Rwanda", 
                            "Sierra Leone 1", "Sierra Leone 2", "Uganda 1", 
                            "Uganda 2", "All", "Russia", "USA" )))
  ) %>%
  dplyr::left_join(dictionary, by = "outcome") %>%
  dplyr::mutate(
    size = cut(n_sub, c(0, 50, 500, Inf), right = FALSE, include.lowest = FALSE),
    size = forcats::fct_recode(size, "500+" = "[500,Inf)"),
    tag = as.factor(tag),
    tag = 
      forcats::fct_relevel(tag,  
                           "Concerned about side effects", 
                           "Concerned about getting coronavirus from the vaccine", 
                           "Not concerned about getting seriously ill", 
                           "Doesn't think vaccines are effective", 
                           "Doesn't think Coronavirus outbreak is as serious as people say", 
                           "Doesn't like needles", 
                           "Allergic to vaccines", 
                           "Won't have time to get vaccinated", 
                           "Mentions a conspiracy theory", 
                           "Other reasons"))

fig_2 <- 
  no_vacc %>% 
  dplyr::filter(!is.na(n_sub)) %>%
  dplyr::mutate(group = plyr::mapvalues(group, "All", "All LMICs")) %>% 
  ggplot(aes(group, estimate, color = tag)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), 
                size = .5, width = .2, position = position_dodge(0.6)) + 
  geom_point(shape = 16, position = position_dodge(0.6), aes(size = size)) +
  facet_grid(.~tag,  space = "free", labeller = label_wrap_gen(width = 15)) +
  scale_size_discrete(range = c(1,3), name = "Number of observations" ) +
  geom_vline(xintercept = 3.5, color = "darkgrey") +
  geom_vline(xintercept = 2.5, color = "darkgrey") + 
  guides(color = FALSE) +
  scale_colour_manual(values = safe_colorblind_palette) + 
  coord_flip() + theme_bw() +
  labs(title = "Why would you not take the COVID-19 vaccine?",
       x = "") +
  theme_bw() + ylim(c(-2,100)) +
  theme(legend.position = "bottom",
        plot.caption = element_text(hjust = 0), #Default is hjust=1
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        plot.caption.position =  "plot",
        axis.text.y = element_text(hjust = 0))


fig_2

2.6.2 Calculate numbers used in text for refusal

## Generate data for analysis of no reasons

# Top LMIC countries concerned about side effects
side_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Concerned about side effects" & group != "All" & 
                  group != "USA" & group != "Russia") %>%
  dplyr::arrange(desc(estimate))

# Concern about side effects in Russia and the US
side_rus <- 
  no_vacc %>% 
  dplyr::filter(tag == "Concerned about side effects" &  group == "Russia") %>% 
  dplyr::arrange(desc(estimate))

side_usa <- 
  no_vacc %>% 
  dplyr::filter(tag == "Concerned about side effects" & group == "USA") %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC counties that are allergic to vaccines
allergies_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Allergic to vaccines" & group != "All") %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC countries that do not like needles
needles_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Doesn't like needles" & group != "All" ) %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC countries that wont have time to get vaccinated
time_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Won't have time to get vaccinated" & group != "All" ) %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC countries that are concerned about getting covid from the vaccine
get_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Concerned about getting coronavirus from the vaccine" & group != "All" ) %>%
  dplyr::arrange(desc(estimate))

# Top LMIC countries that thinks vaccines are not effective
effective_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Doesn't think vaccines are effective" & group != "USA" & 
           group != "Russia") %>% 
  dplyr::arrange(desc(estimate))

# Estimate of Doesn't think vaccines are effective in Russia and the USA
effective_rus <- 
  no_vacc %>% 
  dplyr::filter(tag == "Doesn't think vaccines are effective" & group == "Russia") %>% 
  dplyr::arrange(desc(estimate))

effective_usa <- 
  no_vacc %>% 
  dplyr::filter(tag == "Doesn't think vaccines are effective" & group == "USA" ) %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC countries that are not concerned of getting seriously ill
ill_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Not concerned about getting seriously ill" & group != "All" ) %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC countries that do not think covid outbreak is as serious as people say
serious_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Doesn't think Coronavirus outbreak is as serious as people say" & 
                  group != "All") %>% 
  dplyr::arrange(desc(estimate))

2.6.3 Table version

no_vacc2 <- 
  no_vacc %>%
  dplyr::mutate(estimate = format(estimate, nsmall = 0),
                conf_int = paste0("(", format(conf.low, nsmall = 0), 
                                  ", ", format(conf.high, nsmall = 0), ")")) %>%
  dplyr::select(group, estimate, conf_int, outcome, n) %>%
  tidyr::pivot_wider(names_from = outcome, values_from = c(estimate, conf_int, n), names_sep = "__") %>%
  tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__")),
                      names_to = c("type", ".value"),
                      names_pattern = "(.*)__(.*)") %>% 
  dplyr::rowwise() %>% 
  dplyr::mutate(
    n = ifelse(group == "All", NA, unique(na.omit(c_across(starts_with("n__")))))) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(
    group = forcats::fct_relevel(forcats::fct_rev(group), "All", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  dplyr::mutate(across(c(group, n), ~ifelse(type == "conf_int", "", as.character(.))),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(group, n, starts_with("no_vaccine_"), -starts_with("n__no_vaccine"), -type) %>% 
  dplyr::relocate("no_vaccine_666", .after = last_col())


tab_fig2 <- 
  no_vacc2 %>%
  knitr::kable(
    col.names = 
      c("Study", "N", 
        "Concerned about side effects", 
        "Concerned about getting coronavirus from the vaccine", 
        "Not concerned about getting seriously ill", 
        "Doesn't think vaccines are effective", 
        "Doesn't think Coronavirus outbreak is as serious as people say", 
        "Doesn't like needles", 
        "Allergic to vaccines", 
        "Won't have time to get vaccinated", 
        "Mentions a conspiracy theory", 
        "Other reasons"),
    caption = "Reasons not to take the vaccine",
    align = c("l", rep("c", 11)),
    format = "latex", booktabs = T, linesep = "", 
    format.args = list(big.mark = ",", scientific = FALSE), 
    label = "no") %>%
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"),
                            font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1:12, width = "7em") %>%
  # kableExtra::column_spec(1, width = "7em") %>%
  kableExtra::footnote(
    general_title = "",
    general = "Table 7 shows percentage of respondents mentioning reasons why they would not take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would NOT take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses",
    threeparttable = T)  %>%
  kableExtra::landscape()



no_vacc2 %>%
  knitr::kable(
    col.names = c("Study", "N", 
                  "Concerned about side effects", 
                  "Concerned about getting coronavirus from the vaccine", 
                  "Not concerned about getting seriously ill", 
                  "Doesn't think vaccines are effective", 
                  "Doesn't think Coronavirus outbreak is as serious as people say", 
                  "Doesn't like needles", 
                  "Allergic to vaccines", 
                  "Won't have time to get vaccinated", 
                  "Mentions a conspiracy theory", 
                  "Other reasons"),
    caption = "\\label{no}Reasons not to take the vaccine", 
    booktabs = T, linesep = "", 
    format.args = list(big.mark = ",",scientific = FALSE)) %>%
  kableExtra::kable_styling(full_width = F) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1:12, width = "7em") %>%
  # kableExtra::column_spec(1, width = "7em") %>%
  kableExtra::footnote(
    general_title = "",
    general = "Table 6 shows percentage of respondents mentioning reasons why they would not take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would NOT take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses",
    threeparttable = T)

Reasons not to take the vaccine
Study	N	Concerned about side effects	Concerned about getting coronavirus from the vaccine	Not concerned about getting seriously ill	Doesn’t think vaccines are effective	Doesn’t think Coronavirus outbreak is as serious as people say	Doesn’t like needles	Allergic to vaccines	Won’t have time to get vaccinated	Mentions a conspiracy theory	Other reasons
Burkina Faso	325	40.9	8.0	7.4	19.5	13.5	3.5	1.5	0.9	17.9	8.7
		(35.5, 46.3)	( 5.0, 11.0)	( 4.5, 10.2)	(15.1, 23.8)	( 9.8, 17.2)	( 1.5, 5.6)	( 0.2, 2.8)	(-0.1, 1.9)	(13.7, 22.1)	( 5.6, 11.8)
Colombia	202	31.0	18.1	8.0	10.2	2.3	0.6	0.4	0.5	10.0	31.6
		(24.4, 37.6)	(12.7, 23.4)	( 3.9, 12.0)	( 5.9, 14.5)	( 0.3, 4.3)	(-0.6, 1.8)	(-0.4, 1.3)	(-0.5, 1.5)	( 5.8, 14.2)	(25.1, 38.2)
Mozambique	74	.	.	2.7	29.7	.	.	.	.	.	21.6
		.	.	(-0.7, 6.1)	(18.6, 40.8)	.	.	.	.	.	(12.2, 31.0)
Nepal	48	9.3	7.9	20.4	15.2	15.7	4.4	1.8	.	2.8	12.1
		( 0.3, 18.2)	(-0.4, 16.3)	( 6.7, 34.1)	( 3.2, 27.2)	( 4.0, 27.3)	(-1.9, 10.6)	(-1.9, 5.5)	.	(-1.5, 7.2)	( 0.8, 23.5)
Nigeria	410	21.5	26.1	15.9	9.3	.	.	0.2	.	4.9	26.8
		(17.5, 25.5)	(21.8, 30.4)	(12.3, 19.4)	( 6.4, 12.1)	.	.	(-0.2, 0.7)	.	( 2.8, 7.0)	(22.5, 31.1)
Pakistan 1	441	23.0	21.9	29.4	26.0	22.1	11.5	.	.	13.2	19.6
		(15.1, 30.8)	(14.3, 29.4)	(20.9, 37.9)	(18.0, 34.0)	(12.8, 31.3)	( 5.5, 17.4)	.	.	( 7.1, 19.4)	(10.4, 28.8)
Rwanda	70	38.6	10.1	18.7	21.5	5.8	7.0	5.6	.	21.3	25.8
		(26.9, 50.3)	( 2.8, 17.3)	( 9.3, 28.1)	(11.6, 31.4)	( 0.1, 11.4)	( 0.9, 13.2)	( 0.1, 11.1)	.	(11.5, 31.1)	(15.3, 36.3)
Sierra Leone 1	234	53.5	37.9	14.6	7.5	4.2	3.0	0.9	4.0	20.3	5.7
		(47.1, 59.9)	(31.6, 44.2)	(10.1, 19.2)	( 4.2, 10.9)	( 1.6, 6.8)	( 0.8, 5.2)	(-0.4, 2.2)	( 1.4, 6.5)	(15.1, 25.5)	( 2.8, 8.7)
Sierra Leone 2	254	57.9	.	.	17.3	.	5.1	.	0.0	3.5	24.8
		(50.1, 65.7)	.	.	(11.9, 22.7)	.	( 2.5, 7.8)	.	( 0.0, 0.0)	( 1.3, 5.7)	(19.3, 30.3)
Uganda 1	289	85.1	.	3.8	24.2	1.7	1.7	.	1.0	.	8.0
		(80.7, 89.6)	.	( 1.7, 5.9)	(19.2, 29.2)	( 0.2, 3.2)	( 0.2, 3.2)	.	(-0.1, 2.2)	.	( 4.9, 11.0)
Uganda 2	319	47.3	10.7	5.0	31.0	4.1	1.6	0.3	.	10.3	6.0
		(42.2, 52.5)	( 7.1, 14.2)	( 2.7, 7.3)	(25.9, 36.2)	( 1.9, 6.2)	( 0.2, 2.9)	(-0.3, 0.9)	.	( 7.0, 13.7)	( 3.4, 8.5)
All LMICs	.	40.8	17.6	12.6	19.2	8.7	4.3	1.5	1.3	11.6	17.3
		(25.3, 56.3)	( 8.7, 26.5)	( 6.4, 18.8)	(13.8, 24.7)	( 2.4, 14.9)	( 1.7, 6.8)	(-0.2, 3.3)	(-0.6, 3.2)	( 6.1, 17.0)	(11.0, 23.7)
Russia	16238	36.8	13.9	5.4	29.6	6.4	3.7	10.2	1.0	21.4	5.1
		(35.2, 38.4)	(12.8, 15.1)	( 4.6, 6.1)	(28.1, 31.1)	( 5.6, 7.3)	( 3.1, 4.3)	( 9.2, 11.2)	( 0.7, 1.4)	(20.1, 22.8)	( 4.4, 5.8)
USA	462	79.3	.	39.3	46.8	.	.	.	.	6.0	49.1
		(74.6, 84.0)	.	(33.5, 45.0)	(41.0, 52.6)	.	.	.	.	( 3.4, 8.7)	(43.3, 54.9)
Table 6 shows percentage of respondents mentioning reasons why they would not take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would NOT take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses

2.7 Figure 3: Trust vaccines

2.7.1 Figure

#Group together categories

df2 %<>% 
  dplyr::mutate(
    trust_recode_1 = ifelse(trust_vaccine_1 == 1 | trust_vaccine_2 == 1, 1, 0),
    trust_recode_1 = ifelse((country=="Nigeria"| country=="USA") & is.na(trust_recode_1), 0, trust_recode_1),
    
    trust_recode_2 = ifelse(trust_vaccine_8 == 1 | trust_vaccine_9 == 1, 1, 0),
    trust_recode_2 = ifelse((country=="Sierra Leone 2") & is.na(trust_recode_2), 0, trust_recode_2),
    
    trust_recode_3 = ifelse(trust_vaccine_3 == 1 | 
                              trust_vaccine_7 == 1 | 
                              trust_vaccine_4 == 1, 1, 0),
    trust_recode_3 = ifelse((country=="Nigeria"| country=="USA" | country=="Russia") & is.na(trust_recode_3), 0, trust_recode_3),
    
    trust_recode_4 = ifelse(trust_vaccine_666 == 1 | trust_vaccine_other == 1, 1, 0),
    trust_recode_4 = ifelse((country=="Burkina Faso"| country=="Sierra Leone 2" | country=="Russia") & is.na(trust_recode_4), 0, trust_recode_4),
    
    trust_recode_5 = ifelse(trust_vaccine_dk == 1 | 
                              trust_vaccine_refuse == 1 | 
                              trust_vaccine_nr == 1, 1, 0),
    trust_recode_5 = ifelse((country=="Nigeria"| country=="Sierra Leone 2" | country=="USA") & is.na(trust_recode_5), 0, trust_recode_5))

#Recoded groups
trust_names <- c("trust_recode_1", "trust_recode_2", "trust_recode_3", 
           "trust_recode_4", "trust_recode_5", "trust_vaccine_5", "trust_vaccine_6")

studies_levels <- 
  c("Burkina Faso", "Colombia", "India", "Mozambique",
    "Nepal", "Nigeria", "Pakistan 1", "Rwanda",
    "Sierra Leone 1", "Sierra Leone 2", "Uganda 2",
    "All", "Russia", "USA" )

#Get estimates
trust_vacc_together <-
  list(
    All = lapply(trust_names, reasons_together, 
                 df = df2, num = c("Yes", "No", "DK")) %>% 
      dplyr::bind_rows(),
    Yes = lapply(trust_names, reasons_together, 
                 df = df2, num = c("Yes")) %>% 
      dplyr::bind_rows(),
    No = lapply(trust_names, reasons_together, 
                df = df2, num = c("No", "DK")) %>% 
      dplyr::bind_rows()) %>% 
  dplyr::bind_rows(.id = "sub") %>%
  dplyr::filter(!is.nan(statistic)) %>%
  dplyr::mutate(
    across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 1)),
    n_sub = round(n * estimate, 0),
    n_sub = ifelse(n_sub == 0, NA_integer_, n_sub),
    group = factor(group, levels = studies_levels)) %>%
  dplyr::left_join(dictionary, by = "outcome") %>%
  dplyr::mutate(
    size = cut(n_sub, c(0, 50, 500, Inf), include.lowest = TRUE),
    size = forcats::fct_recode(size, "500+" = "(500,Inf]"),
    tag = as.factor(tag),
    tag = forcats::fct_relevel(tag, 
                               "Health workers", 
                               "Government or MoH", 
                               "Family or Friends", 
                               "Famous person, religious leader or traditional healers", 
                               "Newspapers, radio or online groups", 
                               "Other", 
                               "Don't know or Refuse"),
    sub = forcats::fct_relevel(as.factor(sub), "No", "Yes", "All"),
    sub = plyr::mapvalues(sub, from = c("No", "Yes", "All"),
                          to = c("No, Don't know", "Yes", "Any")))

#Plot
fig_hist2 <- 
  trust_vacc_together %>%
  dplyr::mutate(group = plyr::mapvalues(group, "All", "All LMICs")) %>% 
  dplyr::filter(sub=="Any") %>% 
  ggplot(aes(estimate, tag)) + 
  geom_bar(stat = "identity", position = "dodge", fill="#DDCC77") + 
  facet_wrap(~group, ncol = 2, strip.position = "left")  +
  coord_flip() +
  scale_fill_manual(
    name = "Answer", 
    values = safe_colorblind_palette[c(1,3,2)]) + 
  scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 16), 
                   guide = guide_axis(angle = 90)) +
  labs(title = "Which of the following people would you trust MOST to help you decide whether you would get a COVID-19 vaccine?",
       y = "") +
  theme_bw() + 
  theme(legend.position = "bottom",
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        axis.text.y = element_text(hjust = 0))

fig_hist2

trust <- filter(trust_vacc_together, sub=="Any")

# All LMIcs estimate of trust HW

trust_all <- 
  trust %>% 
  dplyr::filter(group == "All" & tag=="Health workers") %>%
  dplyr::arrange(desc(estimate))

# Top and bottom LMICs countries for trust in Health works
trust_ <- 
  trust %>% 
  dplyr::filter(group != "All" & group != "USA" & group != "Russia" & tag == "Health workers") 

trust_top <- trust_ %>% dplyr::arrange(desc(estimate))

trust_rwa <- 
  trust %>% 
  dplyr::filter(group == "Rwanda") %>%
  dplyr::arrange(desc(estimate))

trust_npl <- 
  trust %>% 
  dplyr::filter(group == "Nepal" & tag=="Famous person, religious leader or traditional healers") %>%
  dplyr::arrange(desc(estimate))


# Top and bottom LMICs countries for trust in Family and friends
trust_fam <- 
  trust %>% 
  dplyr::filter(group != "All" & tag == "Family or Friends") %>%
  dplyr::arrange(desc(estimate))

# Top and bottom LMICs countries for trust in Gov
trust_gov <- 
  trust %>% 
  dplyr::filter(group != "All" & group!="Rwanda" & tag == "Government or MoH") %>%
  dplyr::arrange(desc(estimate))

2.8 Figure broken down by acceptance

#Plot
fig_hist_categories <- 
  trust_vacc_together %>%
  dplyr::mutate(group = plyr::mapvalues(group, "All", "All LMICs")) %>% 
  ggplot(aes(estimate, tag, fill = sub)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  facet_wrap(~group, ncol = 2, strip.position = "left")  +
  coord_flip() +
  scale_fill_manual(
    name = "Answer", 
    values = safe_colorblind_palette[c(1,3,2)]) + 
  scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 16), 
                   guide = guide_axis(angle = 90)) +
  labs(title = "Which of the following people would you trust MOST to help you decide whether you would get a COVID-19 vaccine?",
       y = "") +
  theme_bw() + 
  theme(legend.position = "bottom",
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        axis.text.y = element_text(hjust = 0))

fig_hist_categories

2.8.1 By gender

trust_vacc_gender <-  
  list(
    All = lapply(trust_names, reasons_together_subgroup, 
                 df = df2, 
                 num = c("Yes", "No", "DK"), 
                 dem_group = "gender", 
                 dem_subgroup = c("Female", "Male")) %>% dplyr::bind_rows(),
    Male = lapply(trust_names, reasons_together_subgroup, 
                  df = df2, 
                  num = c("Yes", "No", "DK"), 
                  dem_group = "gender", 
                  dem_subgroup = "Male") %>%
      dplyr::bind_rows(),
  Female = lapply(trust_names, reasons_together_subgroup, 
                  df = df2, 
                  num = c("Yes", "No", "DK"), 
                  dem_group = "gender", 
                  dem_subgroup = "Female") %>% 
  dplyr::bind_rows()) %>% 
  dplyr::bind_rows(.id = "sub") %>%
  dplyr::filter(!is.nan(statistic)) %>%
  dplyr::mutate(
    across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 1)),
    n_sub = round(n * estimate, 0),
    n_sub = ifelse(n_sub == 0, NA_integer_, n_sub),
    group = factor(group, levels = studies_levels)) %>%
  dplyr::left_join(dictionary, by = "outcome") %>%
  dplyr::mutate(
    size = cut(n_sub, c(0, 50, 500, Inf), include.lowest = TRUE),
    size = forcats::fct_recode(size, "500+" = "(500,Inf]"),
    tag = as.factor(tag),
    tag = forcats::fct_relevel(tag, 
                               "Health workers", 
                               "Government or MoH", 
                               "Family or Friends", 
                               "Famous person, religious leader or traditional healers", 
                               "Newspapers, radio or online groups", 
                               "Other", 
                               "Don't know or Refuse"),
    sub = forcats::fct_relevel(as.factor(sub), "Female", "Male", "All"))

#Plot
hist_gender <- 
  trust_vacc_gender %>%
  dplyr::mutate(group = plyr::mapvalues(group, "All", "All LMICs")) %>% 
  ggplot(aes(estimate, tag, fill = sub)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  facet_wrap(~group, ncol = 2, strip.position = "left")  +
  coord_flip() +
  scale_fill_manual(
    name = "Answer", 
    values = safe_colorblind_palette[c(1,3,2)]) + 
  scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 16), 
                   guide = guide_axis(angle = 90)) +
  labs(title = "Which of the following people would you trust MOST to help you decide whether you would get a COVID-19 vaccine?",
       y = "") +
  theme_bw() + 
  theme(legend.position = "bottom",
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        axis.text.y = element_text(hjust = 0))

hist_gender

2.8.2 Gender difference means

differences_means_gen <- 
  lapply(trust_names, function(i) {
    df2 %>% 
      dplyr::filter(group != "All" & group != "USA" & group != "Russia" & group != "Uganda 1") %>% 
      estimatr::lm_robust(as.formula(paste(i, "~gender")),
                          fixed_effects = ~country,
                          weight = weight,
                          cluster = country,
                          se_type = "stata",
                          data = .) %>% 
      tidy %>% 
      dplyr::select(estimate, std.error, p.value, df, term)%>%
      dplyr::mutate(outcome = paste(i))
  }) %>% 
  dplyr::bind_rows(.) %>%
  dplyr::left_join(dictionary) %>%
  dplyr::select(-outcome)

differences_means_gen %>% 
  dplyr::mutate(adjusted_p = p.adjust(p.value, method = "BH")) %>%
  knitr::kable(digits = 3, caption = "Differences in means trust actors (BH adjustment)")

Differences in means trust actors (BH adjustment)
estimate	std.error	p.value	df	term	tag	adjusted_p
-0.032	0.011	0.026	7	genderMale	Family or Friends	0.115
0.003	0.004	0.444	6	genderMale	Newspapers, radio or online groups	0.673
-0.002	0.003	0.481	7	genderMale	Famous person, religious leader or traditional healers	0.673
0.011	0.009	0.283	6	genderMale	Other	0.660
0.001	0.005	0.895	7	genderMale	Don’t know or Refuse	0.963
0.001	0.017	0.963	7	genderMale	Health workers	0.963
0.023	0.008	0.033	6	genderMale	Government or MoH	0.115

2.8.3 Table version

trust_vacc <- 
  plyr::ldply(
    .data = list("Yes", "No", "All"), 
    .fun = function(take_vac) {
      list(Yes = "Yes", 
           No = c("No", "DK"), 
           All = c("Yes", "No", "DK")) %>% 
        .[[take_vac]] %>% 
        plyr::ldply(trust_names, reasons_together, df = df2, num = .) %>%
        dplyr::mutate(
          across(c(conf.low, conf.high, estimate), 
                 ~ format(round(. * 100, digits = 1), nsmall = 1)),        
          conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
        dplyr::select(group, estimate, conf_int, outcome, n) %>%
        tidyr::pivot_wider(names_from = outcome, values_from = c(estimate, conf_int, n), 
                           names_sep = "__") %>%
        tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__")),
                            names_to = c("type", ".value"),
                            names_pattern = "(.*)__(.*)") %>%
        dplyr::rowwise() %>% 
        dplyr::mutate(
          n = ifelse(group == "All", NA, unique(na.omit(c_across(starts_with("n__")))))) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate("Take vaccine?" = take_vac) %>%
        dplyr::select(group, n, type, "Take vaccine?", starts_with("trust_")) %>%
        dplyr::filter(!(group %in% c("Mozambique", "Pakistan 1", "Pakistan 2", "Uganda 1", "India")))
    }
  ) %>% 
  dplyr::mutate(
    group = as.factor(group),
    group = forcats::fct_relevel(group, "All", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  dplyr::mutate(across(c(group, n, `Take vaccine?`), ~ifelse(type == "conf_int", "", as.character(.))),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(-type)

tab_trust <- 
  trust_vacc %>%
  dplyr::select("group", "n", 
                "Take vaccine?", "trust_vaccine_5", 
                "trust_vaccine_6", "trust_recode_1", 
                "trust_recode_3", "trust_recode_2", 
                "trust_recode_4", "trust_recode_5") %>%
  knitr::kable(
    col.names = c("Study", "N", "Take vaccine?", "Health workers", 
                  "Government or Ministry of Health", 
                  "Family or friends", 
                  "Famous person, religious leader or traditional healers", 
                  "Newspapers, radio or online groups", "Other", 
                  "Don't know or Refuse"),
    caption = "COVID-19 Vaccination Decision-making: most trusted source",
    align = c("l", rep("c", 9)),
    format = "latex", booktabs = T, linesep = "", longtable = TRUE,
    format.args = list(big.mark = ",", scientific = FALSE), 
    label = "trust") %>% 
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position", "repeat_header"),
                            font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "7em") %>% 
  kableExtra::column_spec(2:10, width = "4em") %>% 
  kableExtra::column_spec(4:10, width = "6em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 8 shows percentage of respondents that mention actors who they would trust the most to help them decide whether to get a COVID-19 vaccine. For all countries the questions was asked regardless if respondent would take a vaccine, would not take it, does not know or does not respond. For India respondents were able to mention more than one actor, for the rest of countries only one actor was allowed. While rows should sum to 100%, rounding makes number slightly above or below. A 95% confidence interval is shown between parentheses.",
    threeparttable = T) %>%
  kableExtra::landscape()



trust_vacc %>%
  dplyr::select("group", "n", 
                "Take vaccine?", "trust_vaccine_5", 
                "trust_vaccine_6", "trust_recode_1", 
                "trust_recode_3", "trust_recode_2", 
                "trust_recode_4", "trust_recode_5") %>%
  knitr::kable(
    col.names = c("Study", "N", "Take vaccine?", "Health workers", 
                  "Government or \n Ministry of Health", 
                  "Family or friends", 
                  "Famous person, \n religious leader or \n traditional healers", 
                  "Newspapers, radio \n or online groups", "Other", 
                  "Don't know or Refuse"),
    caption = "COVID-19 Vaccination Decision-making: most trusted source", 
    format.args = list(big.mark = ",", scientific = FALSE)) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "7em") %>% 
  kableExtra::column_spec(2:10, width = "4em") %>% 
  kableExtra::column_spec(4:10, width = "6em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 7 shows percentage of respondents that mention actors who they would trust the most to help them decide whether to get a COVID-19 vaccine. For all countries the questions was asked regardless if respondent would take a vaccine, would not take it, does not know or does not respond. For India respondents were able to mention more than one actor, for the rest of countries only one actor was allowed. While rows should sum to 100%, rounding makes number slightly above or below. A 95% confidence interval is shown between parentheses.",
    threeparttable = T)

COVID-19 Vaccination Decision-making: most trusted source
Study	N	Take vaccine?	Health workers	Government or Ministry of Health	Family or friends	Famous person, religious leader or traditional healers	Newspapers, radio or online groups	Other	Don’t know or Refuse
Burkina Faso	651	Yes	57.1	15.1	19.6	0.9	2.0	4.8	0.4
			(53.3, 60.9)	(12.4, 17.9)	(16.5, 22.7)	( 0.2, 1.6)	( 0.9, 3.1)	( 3.2, 6.4)	(-0.1, 0.9)
Burkina Faso	325	No	40.7	8.5	16.2	3.7	1.6	25.1	4.2
			(35.3, 46.1)	( 5.5, 11.6)	(12.1, 20.2)	( 1.6, 5.7)	( 0.2, 3.0)	(20.3, 29.8)	( 2.0, 6.4)
Burkina Faso	976	All	51.6	12.9	18.4	1.8	1.9	11.6	1.7
			(48.5, 54.8)	(10.8, 15.0)	(16.0, 20.9)	( 1.0, 2.7)	( 1.0, 2.7)	( 9.6, 13.6)	( 0.9, 2.5)
Colombia	756	Yes	41.4	12.7	36.9	0.9	1.7	.	6.3
			(37.8, 45.0)	(10.3, 15.2)	(33.4, 40.4)	( 0.2, 1.5)	( 0.8, 2.7)	.	( 4.6, 8.1)
Colombia	202	No	31.5	7.6	35.5	5.3	1.4	.	18.8
			(24.9, 38.1)	( 3.8, 11.3)	(28.8, 42.1)	( 2.2, 8.4)	(-0.2, 3.0)	.	(13.2, 24.3)
Colombia	958	All	39.3	11.6	36.6	1.8	1.7	.	8.9
			(36.2, 42.5)	( 9.6, 13.7)	(33.5, 39.7)	( 1.0, 2.6)	( 0.9, 2.5)	.	( 7.1, 10.7)
Nepal	1341	Yes	44.7	0.7	36.2	16.1	0.4	0.5	1.3
			(40.9, 48.6)	( 0.3, 1.1)	(33.5, 39.0)	(13.1, 19.1)	( 0.0, 0.9)	( 0.1, 0.8)	( 0.7, 2.0)
Nepal	48	No	30.2	2.1	18.7	16.8	0.0	1.0	31.2
			(14.6, 45.9)	(-2.1, 6.2)	( 5.6, 31.7)	( 4.0, 29.6)	( 0.0, 0.0)	(-1.1, 3.2)	(13.6, 48.9)
Nepal	1389	All	44.2	0.8	35.6	16.1	0.4	0.5	2.4
			(40.5, 47.9)	( 0.3, 1.2)	(32.9, 38.3)	(13.3, 18.9)	( 0.0, 0.8)	( 0.1, 0.8)	( 1.5, 3.3)
Nigeria	1424	Yes	63.8	21.6	6.3	5.1	.	2.6	0.6
			(61.3, 66.3)	(19.4, 23.7)	( 5.0, 7.5)	( 4.0, 6.3)	.	( 1.8, 3.4)	( 0.2, 1.0)
Nigeria	410	No	37.6	5.6	13.9	17.8	.	8.5	16.6
			(32.9, 42.3)	( 3.4, 7.8)	(10.5, 17.3)	(14.1, 21.5)	.	( 5.8, 11.3)	(13.0, 20.2)
Nigeria	1834	All	58.0	18.0	8.0	8.0	.	3.9	4.2
			(55.7, 60.2)	(16.2, 19.8)	( 6.7, 9.2)	( 6.7, 9.2)	.	( 3.0, 4.8)	( 3.3, 5.1)
Rwanda	1152	Yes	23.8	27.4	15.1	1.0	0.7	32.0	0.1
			(21.3, 26.2)	(24.9, 30.0)	(13.0, 17.2)	( 0.4, 1.5)	( 0.2, 1.2)	(29.3, 34.7)	(-0.1, 0.2)
Rwanda	70	No	10.1	15.6	12.8	2.9	0.0	53.2	5.5
			( 2.8, 17.4)	( 6.9, 24.3)	( 4.8, 20.8)	(-1.1, 6.9)	( 0.0, 0.0)	(41.2, 65.1)	( 0.1, 11.0)
Rwanda	1222	All	23.0	26.7	15.0	1.1	0.6	33.2	0.4
			(20.6, 25.3)	(24.3, 29.2)	(13.0, 17.0)	( 0.5, 1.7)	( 0.2, 1.1)	(30.5, 35.8)	( 0.0, 0.8)
Sierra Leone 1	836	Yes	47.6	36.9	7.3	3.8	0.5	3.1	0.8
			(44.2, 51.0)	(33.6, 40.2)	( 5.5, 9.1)	( 2.5, 5.1)	( 0.0, 1.0)	( 1.9, 4.2)	( 0.2, 1.4)
Sierra Leone 1	234	No	31.1	17.1	12.1	7.7	0.5	29.4	2.2
			(25.1, 37.1)	(12.2, 21.9)	( 7.9, 16.3)	( 4.3, 11.2)	(-0.4, 1.3)	(23.5, 35.3)	( 0.3, 4.1)
Sierra Leone 1	1070	All	44.0	32.5	8.4	4.7	0.5	8.9	1.1
			(41.0, 46.9)	(29.7, 35.4)	( 6.7, 10.0)	( 3.4, 6.0)	( 0.1, 0.9)	( 7.1, 10.6)	( 0.5, 1.8)
Sierra Leone 2	1855	Yes	94.1	.	3.0	0.9	0.1	1.9	0.0
			(92.5, 95.7)	.	( 2.0, 4.0)	( 0.3, 1.5)	(-0.1, 0.2)	( 1.2, 2.7)	( 0.0, 0.0)
Sierra Leone 2	254	No	54.7	.	3.9	7.5	0.0	33.9	0.0
			(46.5, 62.9)	.	( 1.4, 6.5)	( 2.9, 12.0)	( 0.0, 0.0)	(26.3, 41.4)	( 0.0, 0.0)
Sierra Leone 2	2109	All	89.3	.	3.1	1.7	0.0	5.8	0.0
			(87.2, 91.5)	.	( 2.2, 4.1)	( 0.8, 2.6)	( 0.0, 0.1)	( 4.4, 7.2)	( 0.0, 0.0)
Uganda 2	1045	Yes	38.3	36.5	9.8	7.0	5.0	3.5	0.0
			(35.5, 41.1)	(33.5, 39.4)	( 7.9, 11.6)	( 5.4, 8.6)	( 3.6, 6.3)	( 2.5, 4.6)	( 0.0, 0.0)
Uganda 2	319	No	24.5	19.1	8.5	7.8	7.5	32.0	0.6
			(19.9, 29.0)	(14.5, 23.7)	( 5.4, 11.5)	( 4.8, 10.9)	( 4.5, 10.5)	(26.7, 37.3)	(-0.2, 1.5)
Uganda 2	1364	All	35.0	32.4	9.5	7.2	5.6	10.2	0.1
			(32.7, 37.4)	(29.9, 35.0)	( 7.9, 11.1)	( 5.8, 8.6)	( 4.3, 6.8)	( 8.6, 11.8)	(-0.1, 0.3)
All LMICs	.	Yes	51.3	21.6	16.8	4.5	1.5	6.9	1.2
			(33.7, 68.9)	( 9.4, 33.8)	( 5.7, 27.9)	( 0.1, 8.8)	(-0.1, 3.1)	(-3.4, 17.2)	(-0.6, 3.0)
All LMICs	.	No	32.5	10.8	15.2	8.7	1.6	26.1	9.9
			(21.8, 43.3)	( 4.8, 16.8)	( 7.4, 23.0)	( 4.0, 13.4)	(-0.9, 4.1)	(10.2, 42.1)	( 0.6, 19.2)
All LMICs	.	All	48.1	19.3	16.8	5.3	1.5	10.6	2.4
			(31.6, 64.5)	( 8.3, 30.3)	( 6.1, 27.5)	( 1.0, 9.6)	(-0.2, 3.3)	( 0.7, 20.5)	(-0.1, 4.9)
Russia	5887	Yes	47.1	24.4	16.5	2.0	4.1	5.8	.
			(44.6, 49.7)	(22.2, 26.7)	(14.6, 18.5)	( 1.2, 2.8)	( 3.1, 5.1)	( 4.5, 7.0)	.
Russia	16238	No	31.1	6.9	33.1	2.2	5.3	21.3	.
			(29.6, 32.7)	( 6.1, 7.8)	(31.5, 34.7)	( 1.7, 2.8)	( 4.5, 6.0)	(20.0, 22.7)	.
Russia	22125	All	36.0	12.3	28.1	2.2	4.9	16.6	.
			(34.7, 37.3)	(11.3, 13.2)	(26.8, 29.3)	( 1.7, 2.6)	( 4.3, 5.5)	(15.6, 17.6)	.
USA	1313	Yes	38.1	33.0	8.7	1.7	.	18.6	0.0
			(34.8, 41.5)	(29.8, 36.1)	( 6.7, 10.7)	( 0.7, 2.6)	.	(16.1, 21.1)	( 0.0, 0.0)
USA	462	No	25.3	21.3	18.7	4.2	.	30.3	0.2
			(20.4, 30.3)	(16.6, 26.0)	(13.9, 23.4)	( 1.6, 6.9)	.	(25.0, 35.6)	(-0.2, 0.7)
USA	1775	All	34.5	29.7	11.5	2.4	.	21.9	0.1
			(31.7, 37.3)	(27.0, 32.3)	( 9.5, 13.4)	( 1.4, 3.4)	.	(19.5, 24.2)	(-0.1, 0.2)
Table 7 shows percentage of respondents that mention actors who they would trust the most to help them decide whether to get a COVID-19 vaccine. For all countries the questions was asked regardless if respondent would take a vaccine, would not take it, does not know or does not respond. For India respondents were able to mention more than one actor, for the rest of countries only one actor was allowed. While rows should sum to 100%, rounding makes number slightly above or below. A 95% confidence interval is shown between parentheses.

2.9 Summary stats

# Summary statistics table
# transform the categorical variables into dummy variables
df2 %<>% 
  fastDummies::dummy_cols(select_columns = c("age_groups","educ_binary","gender"))

get_stat <- function(.var, .data, ...) {
  return(
    paste0("`", .var, "` ~ 1") %>% 
      as.formula() %>% 
      lm_robust(formula = ., data = .data, ...) %>% 
      coef()
  )
}

data_sumstat <- 
  df2 %>% 
  dplyr::nest_by(group) %>% 
  dplyr::mutate(
    Female = get_stat("gender_Female", .data = data),
    age_18_30 = get_stat("age_groups_[18,30)", data),
    age_30_45 = get_stat("age_groups_[30,45)", data),
    age_45_60 = get_stat("age_groups_[45,60)", data),
    age_60    = get_stat("age_groups_[60, Inf)", data),
    Less_than_secondary = get_stat("educ_binary_Up to Secondary", data),
    More_than_secondary = get_stat("educ_binary_> Secondary", data)
    ) %>% 
  dplyr::select(-data, country = group) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(across(where(is.double), ~ . * 100)) 


#WGM data
wgmdata <- 
  readr::read_csv("2_input_data/wgm_2018_publiccsv.csv") %>% 
  dplyr::filter(WP5 %in% c(1,9,31,35,41,63,65,76,78,80,105,157),
                Age >= 18) %>% 
  dplyr::mutate(
    country = 
      plyr::mapvalues(WP5, 
                      from = c(1, 9, 31, 35, 41, 63, 65, 76, 78, 80, 105, 157),
                      to = c("USA","Pakistan","India","Nigeria","Uganda",
                             "Mozambique","Rwanda","Russia", 
                             "Burkina Faso", "Sierra Leone","Colombia","Nepal")),
    age_groups = cut(x = Age, breaks = c(-Inf, 18, 30, 45, 60, +Inf), right = F)) %>% 
  dplyr::select(country, wgt, gender = Gender, age = Age, educ = Education,
                age_groups) %>%
  fastDummies::dummy_cols(select_columns = "age_groups") %>% 
  dplyr::mutate(
    gender_Female = if_else(gender == 2, 1, 0), 
    `educ_binary_Up to Secondary` = if_else(educ == 1 | educ == 2, 1, 0),
    `educ_binary_> Secondary` = if_else(educ == 3, 1, 0))


wgmdata_sumstat <- 
  wgmdata %>% 
  dplyr::nest_by(country) %>% 
  dplyr::mutate(
    Female = get_stat("gender_Female", data, weight = wgt),
    age_18_30 = get_stat("age_groups_[18,30)", data, weight = wgt),
    age_30_45 = get_stat("age_groups_[30,45)", data, weight = wgt),
    age_45_60 = get_stat("age_groups_[45,60)", data, weight = wgt),
    age_60    = get_stat("age_groups_[60, Inf)", data, weight = wgt),
    Less_than_secondary = get_stat("educ_binary_Up to Secondary", data, weight = wgt),
    More_than_secondary = get_stat("educ_binary_> Secondary", data, weight = wgt)
    ) %>% 
  dplyr::select(-data) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(across(where(is.double), ~ . * 100)) 

sum_stat_col <- 
  dplyr::left_join(data_sumstat, 
                   wgmdata_sumstat, 
                   by = "country", all = TRUE, suffix = c("", "_wgm")) %>% 
  dplyr::select(
    country,
    starts_with("Female"),
    starts_with("age_18_30"),
    starts_with("age_30_45"),
    starts_with("age_45_60"),
    starts_with("age_60"),
    starts_with("Less_than_secondary"),
    starts_with("More_than_secondary")
  )


tab_sum_col <-
  knitr::kable(
    sum_stat_col, 
    caption = "Summary statistics for gender, age, education",
    col.names = c("Study", rep(c("COVID-19 Study", "Population"), 7)),
    digits = 1,
    format = "latex", 
    align = c("l", rep("c", 14)), 
    booktabs = T, linesep = "") %>%
  kableExtra::kable_styling(full_width = FALSE) %>% 
  kableExtra::add_header_above(
    c(" " = 1, 
      "% Women" = 2, 
      "% Age in [18,30)" = 2, 
      "% Age in [30,45)" = 2, 
      "% Age in [45,60)" = 2,
      "% Age 60+" = 2,
      "% Up to Secondary" = 2, 
      "% More than Secondary" = 2)) %>%
  kableExtra::column_spec(2:16, width = "3em") %>% 
  kableExtra::column_spec(1, width = "5em") %>% 
  kableExtra::footnote(
    general = "This table presents summarys statistics for our data and compares it with estimates from other sources of data. For each category, the left column and the right column correspond respectively to the statistics computed from our sample and from previous surveys. Data for Russia comes from census data from the Statistical Agency. For the USA, we use data from the 2019 American Community Survey. For all other countries, the Wellcome Global Monitor 2018 was used. Statistics for our surveys are not weighted, while estimates from benchmark sources are obtained using sampling weights.",
    threeparttable = T) %>% 
  kableExtra::landscape() 

wgmdata_sumstat_label <-
  wgmdata_sumstat %>% 
  dplyr::mutate(country = paste0(country, " (WGM)"))

sum_stat_row <- 
  dplyr::bind_rows(data_sumstat, wgmdata_sumstat_label) %>%
  arrange(country) %>% 
  .[c(1:13,15,16,14,19,20,22,23,21,25,26,24,17,18,27,28),]

tab_sum_row <- 
  knitr::kable(
    sum_stat_row,caption = "Summary statistics for gender, age, education",
    col.names = c("Study", "% Women",
                  "% Age in [18,30)", "% Age in [30,45)","% Age in [45,60)","% Age 60+",
                  "% Up to Secondary", "% More than Secondary"),
    format = "latex", booktabs = T, linesep = "", align = c("l", rep("c", 7)), digits = 1) %>%
  kableExtra::kable_styling(full_width = FALSE) %>% 
  kableExtra::footnote(
    general = "This table presents summarys statistics for our data and compares it with estimates from other sources of data. Data for Russia comes from census data from the Statistical Agency. For the USA, we use data from the 2019 American Community Survey. For all other countries, the Wellcome Global Monitor 2018 was used. Statistics for our surveys are not weighted, while estimates from benchmark sources are obtained using sampling weights.",
    threeparttable = T) %>%
  kableExtra::column_spec(1:8, width = "5em") 



knitr::kable(
  sum_stat_row,
  caption = "Summary statistics for gender, age, education",
  col.names = c("Study", "% Women", 
                "% Age in [18,30)", "% Age in [30,45)", "% Age in [45,60)", "% Age 60+",
                "% Up to Secondary", "% More than Secondary"),  
  booktabs = T, linesep = "", align = c("l", rep("c", 7)), digits = 1) %>%
  kableExtra::kable_styling(full_width = FALSE) %>% 
  kableExtra::footnote(
    general = "This table presents summarys statistics for our data and compares it with estimates from other sources of data. Data for Russia comes from census data from the Statistical Agency. For the USA, we use data from the 2019 American Community Survey. For all other countries, the Wellcome Global Monitor 2018 was used. Statistics for our surveys are not weighted, while estimates from benchmark sources are obtained using sampling weights.") %>%
  kableExtra::column_spec(1:8, width = "5em")

Summary statistics for gender, age, education
Study	% Women	% Age in [18,30)	% Age in [30,45)	% Age in [45,60)	% Age 60+	% Up to Secondary	% More than Secondary
All	48.9	34.4	43.5	16.9	5.1	77.5	22.5
Burkina Faso	27.9	37.5	50.0	9.2	3.3	61.1	38.9
Burkina Faso (WGM)	48.5	40.3	35.0	14.8	9.9	99.5	0.5
Colombia	63.6	32.2	34.9	25.4	7.5	65.9	34.1
Colombia (WGM)	51.9	29.4	29.5	23.5	17.6	93.7	6.3
India	19.8	19.9	45.7	29.3	5.1	82.4	17.6
India (WGM)	49.3	33.2	33.2	21.1	12.5	95.3	4.7
Mozambique	42.9	7.8	42.9	34.3	15.0	81.8	18.2
Mozambique (WGM)	51.8	41.7	34.1	13.7	10.5	100.0	0.0
Nepal	42.7	28.9	39.7	21.9	7.9	.	.
Nepal (WGM)	51.5	36.8	29.5	18.6	15.1	95.0	5.0
Nigeria	37.1	33.7	51.3	12.0	3.0	.	.
Nigeria (WGM)	48.3	43.3	32.3	13.5	11.0	98.4	1.6
Pakistan 1	49.8	14.6	68.4	16.4	0.6	77.6	22.4
Pakistan 2	.	.	.	.	.	67.9	32.1
Pakistan (WGM)	48.4	41.2	31.5	17.7	9.6	93.4	6.6
Rwanda	36.1	55.6	34.7	8.0	1.7	82.7	17.3
Rwanda (WGM)	53.7	43.2	31.6	16.2	9.0	100.0	0.0
Sierra Leone 1	33.6	44.3	42.2	10.9	2.5	61.0	39.0
Sierra Leone 2	25.7	13.8	39.7	33.6	12.8	87.3	12.7
Sierra Leone (WGM)	51.9	43.9	31.5	14.8	9.8	97.1	2.9
Uganda 1	100.0	61.8	38.2	0.0	0.0	90.3	9.7
Uganda 2	49.0	38.1	36.7	16.8	8.3	70.9	29.1
Uganda (WGM)	52.4	47.2	32.0	13.3	7.5	99.0	1.0
Russia	64.5	22.6	56.5	19.7	1.1	7.0	93.0
Russia (WGM)	56.0	20.4	27.9	27.5	24.2	73.2	26.8
USA	51.4	23.2	28.4	21.3	27.2	21.1	78.9
USA (WGM)	49.8	19.9	23.3	25.8	31.0	65.6	34.4
Note:
This table presents summarys statistics for our data and compares it with estimates from other sources of data. Data for Russia comes from census data from the Statistical Agency. For the USA, we use data from the 2019 American Community Survey. For all other countries, the Wellcome Global Monitor 2018 was used. Statistics for our surveys are not weighted, while estimates from benchmark sources are obtained using sampling weights.

COVID-19 Vaccine Acceptance and Hesitancy in Low and Middle Income Countries, and Implications for Messaging: Replication Code

07 June, 2021