1 Set up

This code takes cleaned and aggregated data as input. Cleaning and aggregation is done in 1_cleaning.Rmd.

1.1 Helper functions

Code used to prepare data (prepare suvery weights), run analyses and robustness checks.

# The helper renormalizes weights so that each study gets the 
# same total weight even if they are missing data

study_weighting <- function(data)
  data %>% 
    dplyr::group_by(country) %>% 
    dplyr::mutate(weight = weight/sum(weight)) %>% 
    dplyr::ungroup() 

lm_helper <- function(data,...) {
  data %>% 
    study_weighting() %>% 
    estimatr::lm_robust(data = .,...) %>% 
    {bind_cols( tidy(.), n = nobs(.) )}
}

# Leave-X-out helper that takes data and sample_var, 
# nests by sample_var performs LOO with loo_n observations out
# applying loo_fun to each sample

loo_helper <- 
  function(data, 
           sample_var, 
           loo_n = 1,
           loo_fun = 
             function(dat) lm_helper(data = dat, 
                                     formula = take_vaccine_num ~ 1, cluster = cluster,
                                     weight = weight, se_type = "stata")
  ) {
    
  .var <- data[[sample_var]]
  
  data %>% 
    {
      plyr::adply(.data = combn(unique(.var), loo_n), 
                  .margins = 2, 
                  .fun = function(x) loo_fun(.[!(.var %in% x), ]) )
    }
}

# Subgroup analysis : Function to apply analysis function over groups

grp_analysis <- function(df, y, x)
  
  df %>%
    dplyr::filter(if_all(c(all_of(x), all_of(y), cluster, weight), ~ !is.na(.))) %>%
    dplyr::nest_by(group, get(x)) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(y, "~ 1")), cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop") %>% 
    dplyr::rename(!!x := "get(x)")
  


# Reasons analysis: Function to apply analysis function over groups

reasons_together <- function(df, reason, num = "Yes")
  
  df %>%
    dplyr::filter(take_vaccine %in% num, 
                  if_all(c(all_of(reason), cluster, weight), ~ !is.na(.))) %>%
    dplyr::nest_by(group) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(reason, "~ 1")), 
                cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop")


reasons_together_subgroup <- function(df, reason, num = "Yes", 
                                      dem_group = NA, dem_subgroup = NA){
  
  if (dem_group == "gender")
    df <- filter(df, gender %in% dem_subgroup)
  
  df %>%
    dplyr::filter(take_vaccine %in% num,
                  !is.na(get(reason))) %>%
    dplyr::nest_by(group) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(reason, "~ 1")), cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop")
}

# Age analysis for reasons

age_analysis <- function(df, reason, num = "Yes", filter_by=NA){
  df %>%
    dplyr::filter({{filter_by}}==1)  %>%
    dplyr::filter(take_vaccine %in% num, 
                  if_all(c(all_of(reason), cluster, weight), ~ !is.na(.))) %>%
    dplyr::nest_by(group) %>%
    dplyr::summarize(
      lm_helper(data = data, 
                formula = as.formula(paste0(reason, "~ 1")), 
                cluster = cluster,
                weight = weight, se_type = "stata"), .groups = "drop")
}

1.2 Final cleaning and harmonization of weights

Groups variables into discrete categories and prepares survey weights.

# Call data created in 1_cleaning.Rmd
df <- readr::read_csv("3_rep_data/combined.csv", guess_max = 30000)

# If no cluster information given for a study then individuals are clusters 
# Ensure cluster ids are distinct across studies
df <- 
  df %>% 
  dplyr::group_by(study) %>% 
  dplyr::mutate(
    cluster = ifelse(is.na(cluster), paste(1:n()), cluster),
    cluster = paste0(gsub(" ", "_", tolower(country)), "_", cluster))


# Weights sum to 1 in each study and recode age and education into bins
df <- 
  df %>% 
  dplyr::group_by(study) %>% 
  dplyr::mutate(
         weight_replace = mean(weight, rm.na = TRUE),
         weight = if_else(is.na(weight), if_else(is.na(weight_replace), 1, weight_replace), weight),
         weight = weight/sum(weight)) %>% 
  dplyr::ungroup() %>%
  dplyr::mutate(
    age_groups = as.character(cut(x = age, breaks = c(-Inf, 18, 30, 45, 60, +Inf), right = F)),
    age_groups_binary = ifelse(age >= 55, "55+", NA),
    age_groups_binary = ifelse(age < 55, "<55", age_groups_binary),
    age_less24 = ifelse(age <= 24, 1, 0),
    age_25_54 = ifelse(age >= 25 & age <= 54, 1, 0),
    age_55_more = ifelse(age >= 55, 1, 0),
    age_groups_three = ifelse(age <= 24, "<25", NA),
    age_groups_three = ifelse(age >= 25 & age <= 54, "25-54", age_groups_three),
    age_groups_three = ifelse(age >= 55, "55+", age_groups_three),
    educ_binary = if_else(educ == "More than secondary", "> Secondary", "Up to Secondary")) 


# We create a new dataframe with countries and with "All" (only LMICs). Countries are clusters in "All" analysis
# USA and Russia excluded from "All" set

df2 <- 
  dplyr::bind_rows(
    mutate(df, group = country),
    mutate(filter(df, country != "USA" & country != "Russia"), group = "All")) %>% 
  mutate(
    cluster = if_else(group == "All", 
                      gsub(pattern = " ", replacement = "_", x = tolower(country)), 
                      cluster)) 

1.3 Data checks

Checks on data structure. Note n, missingness, presence of data on weights or clusters.

1.3.1 Data structure

df %>% group_by(country) %>%
  summarize(n = n(), 
            sd_wt = sd(weight, na.rm = TRUE)/mean(weight, na.rm = TRUE), 
            cl_size = n()/length(unique(cluster)), 
            take_1 = mean((take_vaccine == "Yes")[take_vaccine == "Yes" | take_vaccine == "No" ], na.rm = TRUE),
            take_2 = mean((take_vaccine == "Yes"), na.rm = TRUE),
            take_3 = mean(take_vaccine_num, na.rm = TRUE),
            take_dk = mean(take_vaccine == "DK"),
            .take = mean(is.na(take_vaccine_num)), .age = mean(is.na(age)),
            .gender = mean(is.na(gender)), .educ = mean(is.na(educ))) %>%
  kable(digits = 2, 
        caption = "Observations, missingness patterns, data structure. Column .var is the share missing for variable var.", booktabs = TRUE, linesep = "", format.args = list(big.mark = ",", 
  scientific = FALSE))
Observations, missingness patterns, data structure. Column .var is the share missing for variable var.
country n sd_wt cl_size take_1 take_2 take_3 take_dk .take .age .gender .educ
Burkina Faso 977 0.08 1.00 0.69 0.67 0.67 . 0 0.88 0.00 0.00
Colombia 1,012 0.16 1.00 0.79 0.79 0.75 . 0 0.32 0.00 0.00
India 1,680 0.62 11.83 0.83 0.83 0.84 0.00 0 0.00 0.00 0.80
Mozambique 862 0.00 5.29 0.91 0.91 0.89 . 0 0.00 0.00 0.04
Nepal 1,389 0.51 15.43 0.97 0.97 0.97 0.00 0 0.05 0.05 1.00
Nigeria 1,868 0.00 1.00 0.78 0.78 0.76 . 0 0.00 0.00 1.00
Pakistan 1 1,633 1.17 15.41 0.86 0.73 0.72 . 0 0.00 0.00 0.01
Pakistan 2 1,492 0.00 1.00 0.84 0.67 0.66 . 0 1.00 1.00 0.00
Russia 22,125 1.86 1.00 0.37 0.27 0.27 0.27 0 0.00 0.00 0.00
Rwanda 1,355 0.04 1.00 0.94 0.94 0.85 . 0 0.00 0.00 0.00
Sierra Leone 1 1,070 0.06 1.00 0.78 0.78 0.78 0.00 0 0.00 0.00 0.03
Sierra Leone 2 2,110 0.00 11.05 0.88 0.88 0.88 . 0 0.01 0.00 0.00
Uganda 1 3,362 0.00 6.75 0.91 0.91 0.86 . 0 0.05 0.00 0.19
Uganda 2 1,366 0.00 4.41 0.79 0.77 0.77 . 0 0.00 0.00 0.00
USA 1,959 0.77 1.00 0.74 0.74 0.67 . 0 0.00 0.00 0.00

1.3.2 Age distribution

df %>% 
  select(country, age) %>%
  gather(category, value, -country) %>%
  mutate(value = as.numeric(value)) %>%
  ggplot(aes(value)) + geom_density() + facet_wrap(~country, ncol = 3)

1.3.3 Gender, education distribution

df %>% 
  select(country, gender, educ) %>%
  gather(category, value, -country) %>%
    ggplot(aes(value)) +  geom_bar() + facet_grid(country ~ category, scales = "free")

2 Tables and Figures

2.1 Table 1: Vaccine data from WGM, WHO

# Call data from WGM
dfwgm <- read.csv("3_rep_data/table_wgm.csv")

# Call data from WHO
df_vacc_coveragebis <- read.csv("3_rep_data/vacc_cov.csv")

# Put together and order labels

table_1b <- dfwgm %>%
  left_join(df_vacc_coveragebis) %>%
  mutate(country = as.factor(country),
         country = forcats::fct_relevel(country, "Russia", "USA", after = Inf)) %>% 
  arrange(country) %>%
  select(country, Effectiveness, Safety, Important, BCG, DTP1, MCV1, Coverage)

# To Latex

tab_1b <- 
knitr::kable(
  table_1b,
  caption =  "Vaccination beliefs and coverage for the countries in our sample",
  col.names = c("",
                "Effective","Safe","Important for children to have",
                "Tuberculosis (BCG)", "Diphtheria, Tetanus and Pertussis (DTP1)",
                "Measles (MCV1)",
                "% of parents with any child that was ever vaccinated"),
  format = "latex", booktabs = T, linesep = "", align = c("l", rep("c", 7)), label = "otherv") %>% 
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"), 
                            full_width = FALSE, font_size = base_font_size - 2)  %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::add_header_above(c(" " = 1, 
                                 "% Respondents agreeing Vaccines are..." = 3,
                                 "Vaccine coverage in 2019 (% of infants)" = 3,
                                 " " = 1), bold = TRUE) %>%
  kableExtra::column_spec(1:5, width = "9em") %>% 
  kableExtra::column_spec(6:7, width = "5em") %>%
  kableExtra::column_spec(8, width = "9em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 1 presents an overview of vaccination beliefs and incidence across countries in our sample. Columns 2-4 and 8 use data from the Wellcome Global Monitor 2018. Column 8 shows the percentage of respondents who are parents and report having had any of their children ever vaccinated. Columns 2-4 show the percentage of all respondents that either strongly agree or somewhat agree with the statement above each column. All percentages are obtained using national weights. Columns 5-7 use data from the World Health Organization on vaccine incidence. Columns 5-7 report the percentage of infants per country receiving the vaccine indicated in each column.", 
    threeparttable = T) %>%
  kableExtra::landscape()

knitr::kable(
  table_1b,
  caption =  "Vaccination beliefs and coverage for the countries in our sample",
  col.names = c("",
                "Effective","Safe","Important for children to have",
                "Tuberculosis (BCG)", "Diphtheria, Tetanus and Pertussis (DTP1)",
                "Measles (MCV1)",
                "% of parents with any child that was ever vaccinated"),
  format = "html", booktabs = T, linesep = "", align = c("l", rep("c", 7)), label = "otherv") %>% 
  kableExtra::kable_styling(full_width = FALSE)  %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::add_header_above(c(" " = 1, 
                                 "% Respondents agreeing Vaccines are..." = 3,
                                 "Vaccine coverage in 2019 (% of infants)" = 3,
                                 " " = 1), bold = TRUE) %>%
  kableExtra::column_spec(1:5, width = "9em") %>% 
  kableExtra::column_spec(6:7, width = "5em") %>%
  kableExtra::column_spec(8, width = "9em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 1 presents an overview of vaccination beliefs and incidence across countries in our sample. Columns 2-5 use data from the Wellcome Global Monitor 2018. Column 2 shows the percentage of respondents who are parents and report having had any of their children ever vaccinated. Columns 3-5 show the percentage of all respondents that either strongly agree or somewhat agree with the statement above each column. All percentages are obtained using national weights. Columns 6-8 use data from the World Health Organization on vaccine incidence. Columns 6-8 report the percentage of infants per country receiving the vaccine indicated in each column.", 
    threeparttable = T)
Vaccination beliefs and coverage for the countries in our sample
% Respondents agreeing Vaccines are…
Vaccine coverage in 2019 (% of infants)
Effective Safe Important for children to have Tuberculosis (BCG) Diphtheria, Tetanus and Pertussis (DTP1) Measles (MCV1) % of parents with any child that was ever vaccinated
Burkina Faso 87 72 95 98 95 88 97
Colombia 83 84 99 89 92 95 95
India 96 97 98 92 94 95 92
Mozambique 87 93 98 94 93 87 95
Nepal 89 93 99 96 96 92 95
Nigeria 82 92 96 67 65 54 95
Pakistan 91 92 95 88 86 75 94
Rwanda 99 97 99 98 99 96 100
Sierra Leone 95 95 99 86 95 93 97
Uganda 82 87 98 88 99 87 98
Russia 67 48 80 96 97 98 96
USA 85 73 87 . 97 90 95
Table 1 presents an overview of vaccination beliefs and incidence across countries in our sample. Columns 2-5 use data from the Wellcome Global Monitor 2018. Column 2 shows the percentage of respondents who are parents and report having had any of their children ever vaccinated. Columns 3-5 show the percentage of all respondents that either strongly agree or somewhat agree with the statement above each column. All percentages are obtained using national weights. Columns 6-8 use data from the World Health Organization on vaccine incidence. Columns 6-8 report the percentage of infants per country receiving the vaccine indicated in each column.

2.2 Table with summary of samples

tab_sampling <- 
  readxl::read_excel("2_input_data/studies_info.xlsx", sheet = "sample") %>%
  dplyr::select("Study" = "country", "Date"="date",
                "Geographic scope", "Sampling methodology", "Survey modality", "Weights") %>%
  knitr::kable(
    caption =  "Summary of studies sampling",
    format = "latex", booktabs = T, linesep = "", label = "sampling") %>% 
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"),
                            font_size = base_font_size - 2) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1:2, width = "8em") %>%
  kableExtra::column_spec(3, width = "12em")  %>%
  kableExtra::column_spec(4, width = "30em") %>% 
  kableExtra::landscape()

readxl::read_excel("2_input_data/studies_info.xlsx", sheet = "sample") %>%
  dplyr::select("Study" = "country", "Date"="date",
                "Geographic scope", "Sampling methodology", "Survey modality", "Weights") %>%
  knitr::kable(caption =  "Summary of studies' sampling", linesep = "", format = "html") %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "8em") %>%
  kableExtra::column_spec(2, width = "12em")  %>%
  kableExtra::column_spec(3, width = "30em")
Summary of studies’ sampling
Study Date Geographic scope Sampling methodology Survey modality Weights
Burkina Faso October to December 2020 National Random digit dialing (RDD) Phone Yes
Colombia August 2020 National Random digit dialing (RDD) Phone Yes
India June 2020 to January 2021 Subnational, Slums in 2 cities Representative sample of slum dwellers living in vicinity of a community toilet and located in Uttar Pradesh Phone Yes
Mozambique October to November 2020 Subnational, 2 cities
  1. Random sample in urban and periurban markets stratified by gender and type of establishment in Maputo; 2) Random sample representative of communities in the Cabo Delgado, stratified on urban, semiurban, and rural areas
Phone No
Nepal December 2020 Subnational, 2 districts Random sample of poor households from randomly selected villages in Kanchanpur Phone Yes
Nigeria November to December 2020 Subnational, 1 state
  1. Random sample of individuals in Kaduna; 2) Sample of phone numbers from a phone list of Kaduna state residents
Phone No
Pakistan 1 July to September 2020 Subnational, 2 districts Random sample of individuals in administrative police units in two districts of Punjab Phone Yes
Pakistan 2 September to October 2020 Subnational, 1 province Random digit dialing (RDD) on a random sample of all numerically possible mobile phone numbers in the region of Punjab Phone No
Russia November to December 2020 Subnational, 61 regions Sample recruited from the Russian online survey company OMI (Online Market Intelligence). Sampling targeted at having a minimum of respondents per region, as well as representation of age, gender and education groups. Online Yes
Rwanda October to November 2020 National Random digit dialing (RDD) Phone Yes
Sierra Leone 1 October 2020 National Random digit dialing (RDD) Phone Yes
Sierra Leone 2 October 2020 to January 2021 National A random sample of households in 195 rural towns across all 14 districts of Sierra Leone Phone No
Uganda 1 September to December 2020 Subnational, 13 districts Sample of women in households from semi-rural and rural villages across 13 districts in Uganda, selected according to the likelihood of having children Phone No
Uganda 2 November to December 2020 Subnational, 1 district Random sample of households in Kampala Phone No
USA December 2020 National Nation-wide sample of adult internet users recruited through the market research firm Lucid Online Yes

2.3 Main Results: Acceptance Rates (disaggregated by group)

2.3.1 Figure

# Prep levels

main_results <- 
  df2 %>% 
  dplyr::filter(dplyr::if_all(c(take_vaccine_num, cluster, weight), ~ !is.na(.))) %>% 
  dplyr::nest_by(group) %>%
  dplyr::summarize(
    lm_helper(data = data, 
              formula = take_vaccine_num ~ 1, cluster = cluster,
              weight = weight, se_type = "stata"),
    .groups = "drop")

# Gender
acc_by_gender <- grp_analysis(df2, y = "take_vaccine_num", x = "gender")

# Education (all original categories and binary recoding)
acc_by_educ_binary <- grp_analysis(df2, y = "take_vaccine_num", x = "educ_binary")

# Age (all original categories and binary recoding)
acc_by_age <- grp_analysis(df2, y = "take_vaccine_num", x = "age_groups_three") %>%
  dplyr::filter(statistic!=Inf) %>%
  dplyr::filter(conf.low>0)
acc_by_age_binary <- grp_analysis(df2, y = "take_vaccine_num", x = "age_groups_binary")


# Put them together in a single df. Make estimates "percentages" and round
ans <- 
  dplyr::bind_rows(
    main_results %>% mutate(cat = "All", var = "All"),
    acc_by_gender %>% rename(cat = gender) %>% mutate(var = "By gender"),
    acc_by_educ_binary %>% rename(cat = educ_binary) %>% mutate(var = "By education"),
    acc_by_age %>% rename(cat = age_groups_three) %>% mutate(var = "By age")) %>%
  dplyr::mutate(across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 1)))

# Join with a tags df, which includes details on the study (national or subnational)
tags <- 
  readxl::read_excel("2_input_data/studies_info.xlsx", sheet = "sample") %>%
  dplyr::select(group = country, tag = "Geographic scope") %>%
  dplyr::left_join(filter(ans, cat == "All"), by = "group") %>%
  dplyr::mutate(tag = paste0(group, " (", tag, ", ", n, ")")) %>%
  dplyr::select(group, tag)

# Prepare df to plot. Important but ugly relevel of factors, happening all over the code

ans %<>% 
  dplyr::left_join(tags) %>%
  dplyr::mutate(tag = ifelse(group == "All", "All LMICs", tag)) %>%
  # group_by(var) %>% 
  # arrange(cat) %>%
  dplyr::mutate(
    var = factor(var, levels = c("All", "By gender", "By education", "By age")),
    cat = factor(
      cat, ordered = TRUE,
      levels = rev(c("Female", "Male", "Up to Secondary", 
                       "> Secondary", "<25", "25-54", "55+", "All")),
      labels = rev(c("Female", "Male", "Up to Secondary", 
                     "More than Secondary", "$< 25$", "$25-54$", "$55 +$", "All"))),
    tag = gsub(pattern = " \\(", "\\\n\\(", tag))


special_cases <- 
  sort(unique(ans$tag)[grep(unique(ans$tag), pattern = "All LMICs|Russia|USA")])

ans %<>% 
  dplyr::mutate(
    tag = 
      factor(x = tag, ordered = TRUE,
             levels = rev(c(sort(unique(tag)[!(unique(tag) %in% special_cases)]), special_cases))))

ans_loo <- ans

#Colour blind palette (available palletes are smaller so we expand it)
safe_colorblind_palette <- c("#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", 
                             "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888", 
                             "#88CCEE")


#Plot
fig_1_ages <- 
  ans %>% 
  ggplot(data = ., aes(x = tag, y = estimate, color = cat)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), 
                size = .5, width = .2, position = position_dodge(0.6)) + 
  geom_point(position = position_dodge(0.6)) +
  facet_grid(. ~ var, scales = "free_x", space = "free") + 
  coord_flip() +
  guides(color = guide_legend(reverse = TRUE, nrow = 2)) +
  geom_vline(xintercept = 3.5, color = "darkgrey") +
  geom_vline(xintercept = 2.5, color = "darkgrey") +
  scale_colour_manual(values = safe_colorblind_palette) +
  labs(title = "If a COVID-19 vaccine becomes available in [country], would you take it?", 
       color = "Subgroups", x = "") +
  theme_bw(base_size = (base_font_size - 2)) + ylim(c(0,100)) +
  theme(legend.position = "bottom",
        plot.caption = element_text(hjust = 0), #Default is hjust=1
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        plot.caption.position =  "plot",
        axis.text.y = element_text(hjust = 0))

fig_1_ages

2.3.2 Invariance to aggregation at country level

Weighting so that each country (rather than each study) has equal weight has only a marginal impact on estimated LMIC average acceptance.

df_country <- 
  df %>% 
  filter(!is.na(take_vaccine_num)) %>%
  filter(!(country %in% c("USA", "Russia"))) %>%
  mutate(cluster_1 = country,
         cluster_2 = recode(country, 
                          "Pakistan 1" = "Pakistan",
                          "Pakistan 2" = "Pakistan",
                          "Sierra Leone 1" = "Sierra Leone",
                          "Sierra Leone 2" = "Sierra Leone",
                          "Uganda 1" = "Uganda",
                          "Uganda 2" = "Uganda",
                          )) %>%
  group_by(country) %>% mutate(weight_1 = weight/sum(weight)) %>% ungroup() %>%
  group_by(cluster_2) %>% mutate(weight_2 = weight_1/sum(weight_1)) %>% ungroup() 
# Inspect weights 
df_country %>% group_by(country) %>% summarize(w1 = sum(weight_1), w2 = sum(weight_2)) %>% kable
country w1 w2
Burkina Faso 1 1.0
Colombia 1 1.0
India 1 1.0
Mozambique 1 1.0
Nepal 1 1.0
Nigeria 1 1.0
Pakistan 1 1 0.5
Pakistan 2 1 0.5
Rwanda 1 1.0
Sierra Leone 1 1 0.5
Sierra Leone 2 1 0.5
Uganda 1 1 0.5
Uganda 2 1 0.5
unit_check <- 
  list(
    study = lm_robust(data = df_country, 
              formula = take_vaccine_num ~ 1, cluster = cluster_1,
              weight = weight_1, se_type = "stata"),
    
    country = lm_robust(data = df_country, 
              formula = take_vaccine_num ~ 1, cluster = cluster_2,
              weight = weight_2, se_type = "stata")) %>%
  
  lapply(tidy) %>% bind_rows(.id = "Unit")
unit_check %>% kable(digits = 2)
Unit term estimate std.error statistic p.value conf.low conf.high df outcome
study (Intercept) 0.80 0.02 32.85 0 0.75 0.86 12 take_vaccine_num
country (Intercept) 0.81 0.03 28.95 0 0.74 0.87 9 take_vaccine_num

2.3.3 Robustness keeping only national samples

nationals <- df2 %>%
  filter(country=="Burkina Faso" |
           country=="Colombia" |
           country=="Rwanda" |
           country=="Sierra Leone 1" |
           country=="Sierra Leone 2")

main_results_n <- 
  nationals %>% 
  dplyr::filter(dplyr::if_all(c(take_vaccine_num, cluster, weight), ~ !is.na(.))) %>% 
  dplyr::nest_by(group) %>%
  dplyr::summarize(
    lm_helper(data = data, 
              formula = take_vaccine_num ~ 1, cluster = cluster,
              weight = weight, se_type = "stata"),
    .groups = "drop")

# Gender
acc_by_gender_n <- grp_analysis(nationals, y = "take_vaccine_num", x = "gender")

# Education (all original categories and binary recoding)
acc_by_educ_binary_n <- grp_analysis(nationals, y = "take_vaccine_num", x = "educ_binary")

# Age (all original categories and binary recoding)
acc_by_age_n <- grp_analysis(nationals, y = "take_vaccine_num", x = "age_groups_three") %>%
  dplyr::filter(statistic!=Inf) %>%
  dplyr::filter(conf.low>0)

ans_n <- 
  dplyr::bind_rows(
    main_results_n %>% mutate(cat = "All", var = "All"),
    acc_by_gender_n %>% rename(cat = gender) %>% mutate(var = "By gender"),
    acc_by_educ_binary_n %>% rename(cat = educ_binary) %>% mutate(var = "By education"),
    acc_by_age_n %>% rename(cat = age_groups_three) %>% mutate(var = "By age")) %>%
  dplyr::mutate(across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 1))) %>%
  filter(group=="All") %>%
  mutate(group="All LMICs (National samples)",
         tag="All LMICs (National samples)")  


# Prepare df to plot. Important but ugly relevel of factors, happening all over the code

ans_n %<>% 
  # group_by(var) %>% 
  # arrange(cat) %>%
  dplyr::mutate(
    var = factor(var, levels = c("All", "By gender", "By education", "By age")),
    cat = factor(
      cat, ordered = TRUE,
      levels = rev(c(c("Female", "Male", "Up to Secondary", 
                       "> Secondary", "<25", "25-54", "55+", "All"))),
      labels = rev(c("Female", "Male", "Up to Secondary", 
                     "More than Secondary", "$< 25$", "$25-54$", "$55 +$", "All"))),
    tag = gsub(pattern = " \\(", "\\\n\\(", tag)) %>%
  bind_rows(ans)

special_cases <- 
  sort(unique(ans_n$tag)[grep(unique(ans_n$tag), pattern = "All LMICs (National samples)|All LMICs|Russia|USA")])

ans_n %<>% 
  dplyr::mutate(
    tag = 
      factor(x = tag, ordered = TRUE,
             levels = rev(c(sort(unique(tag)[!(unique(tag) %in% special_cases)]), special_cases))))

ans_national <- ans_n %>% filter(group == "All LMICs (National samples)" & cat=="All") 



fig_1_ages <- 
  ans_n %>% 
  ggplot(data = ., aes(x = tag, y = estimate, color = cat)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), 
                size = .5, width = .2, position = position_dodge(0.6)) + 
  geom_point(position = position_dodge(0.6)) +
  facet_grid(. ~ var, scales = "free_x", space = "free") + 
  coord_flip() +
  guides(color = guide_legend(reverse = TRUE, nrow = 2)) +
  geom_vline(xintercept = 4.5, color = "darkgrey") +
  geom_vline(xintercept = 3.5, color = "darkgrey") +
  geom_vline(xintercept = 2.5, color = "darkgrey") +
  scale_colour_manual(values = safe_colorblind_palette) +
  labs(title = "If a COVID-19 vaccine becomes available in [country], would you take it?", 
       color = "Subgroups", x = "") +
  theme_bw(base_size = (base_font_size - 2)) + ylim(c(0,100)) +
  theme(legend.position = "bottom",
        plot.caption = element_text(hjust = 0), #Default is hjust=1
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        plot.caption.position =  "plot",
        axis.text.y = element_text(hjust = 0))

fig_1_ages

ans_national %>% kable(digits = 2)
group term estimate std.error statistic p.value conf.low conf.high df outcome n cat var tag
All LMICs (National samples) (Intercept) 78.4 0.04 20.72 0 67.9 89 4 take_vaccine_num 6524 All All All LMICs (National samples)

2.3.4 Robustness check using leave-m-out approach

The main estimates for average LMIC acceptance do not vary strongly if we exclude one or two samples at a time and re-estimate average each time

loo_estimates <- 
  plyr::ldply(
    .data = list(1,2), 
    .fun = function(x) {
      df %>% 
        dplyr::filter(dplyr::if_all(c(take_vaccine_num, cluster, weight), ~ !is.na(.))) %>% 
        dplyr::filter(!(country %in% c("USA", "Russia"))) %>% 
        loo_helper(., "country", loo_n = x) %>% 
        dplyr::mutate(m = paste0("Leaving ", x, " out"), tag = "All", var = "All")
    })

loo_estimates <- 
  plyr::mdply(
    .data = expand_grid(x = c(1,2), var = c("gender", "educ_binary", "age_groups_three")), 
    .fun = function(x, var) {
      df2 %>% 
        dplyr::filter(dplyr::if_all(c(take_vaccine_num, cluster, weight), ~ !is.na(.))) %>% 
        dplyr::filter(group == "All") %>% 
        loo_helper(., "country", 
                   loo_n = x, 
                   loo_fun = function(dat) grp_analysis(dat, y = "take_vaccine_num", x = var)) %>% 
        dplyr::mutate(m = paste0("Leaving ", x, " out"), var = var)
    }) %>% 
  dplyr::mutate(tag = dplyr::coalesce(gender, educ_binary, age_groups_three),
                var = plyr::mapvalues(var, 
                                      from = c("gender", "educ_binary", "age_groups_three"),
                                      to = c("By gender", "By education", "By age"))) %>%
  dplyr::bind_rows(loo_estimates, .)

loo_estimates %<>% 
  dplyr::mutate(
    var = factor(var, levels = c("All", "By gender", "By education", "By age")),
    tag = factor(
      tag, ordered = TRUE,
      levels = rev(c("Female", "Male", "Up to Secondary", 
                       "> Secondary", "<25", "25-54", "55+", "All")),
      labels = rev(c("Female", "Male", "Up to Secondary", 
                     "More than Secondary", "$< 25$", "$25-54$", "$55 +$", "All")))
  ) %>% 
  dplyr::select(var, m, tag, estimate)

safe_colorblind_palette <- c("#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", 
                             "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888", 
                             "#88CCEE")

ans_loo %<>% 
  dplyr::filter(group %in% c("All", "USA", "Russia")) %>% 
  dplyr::mutate(group = plyr::mapvalues(group, from = "All", to = "All LMIC"),
                tag = cat)

fig_hist_loo <- 
  loo_estimates %>%
  dplyr::mutate(estimate = estimate*100) %>% 
  ggplot(aes(estimate, color = tag, fill = tag)) + 
  geom_histogram(aes(y = ..density..), bins = 200, 
                 position = "dodge", alpha = .3, size = .3) +
  geom_vline(data = ans_loo, 
             aes(xintercept = estimate, color = cat, linetype = group), size = .9) +
  facet_grid_paginate(var + tag ~ m)  +
  scale_color_manual(
    name = "Subgroups", 
    values = safe_colorblind_palette) + 
  scale_fill_manual(
    name = "Subgroups", 
    values = safe_colorblind_palette) + 
  scale_linetype_manual(
    name = "Sample Average", 
    values = c("solid", "11", "dashed")) + 
  scale_x_continuous(n.breaks = 8, name = "estimate") +
  scale_y_continuous(labels = scales::percent_format(scale = 1, suffix = "", accuracy = 0.1)) +
  labs(title = "",
       x = "") +
  guides(color = guide_legend(reverse = TRUE, nrow = 2, keyheight = 1),
         fill = guide_legend(reverse = TRUE, nrow = 2, keyheight = 1),
         linetype = guide_legend(keyheight = 2)) +
  theme_bw(base_size = (base_font_size - 2)) +
  theme(legend.position = "bottom")

fig_hist_loo

2.3.5 Differences in means

# Analysis of differences in means only LMICs
# Notice that Uganda 1 is dropped, because of it does not have reference categories for gender or age
# Notice that we are using df (and not df2) as data, since it does not include "All"

# Population estimate (clustering on country)

differences_means_gen_age <- 
  lapply(c("gender", "age_groups_binary", "age_groups_three"), function(i) {
    df %>% 
      dplyr::filter(country != "USA" & country != "Russia" & country != "Uganda 1") %>%
      dplyr::filter(if_all(c(all_of(i), all_of("take_vaccine_num")), ~ !is.na(.))) %>%
      study_weighting() %>%
      estimatr::lm_robust(as.formula(paste("take_vaccine_num ~", i)),
                          fixed_effects = ~country,
                          weight = weight,
                          cluster = country,
                          se_type = "stata",
                          data = .) %>% 
      tidy %>% 
      dplyr::select(estimate, std.error, p.value, df, term)
  }) %>% 
  dplyr::bind_rows(.)

differences_means_educ <- 
    df %>% 
      dplyr::filter(country != "USA" & country != "Russia") %>% 
      dplyr::filter(if_all(c(all_of("educ_binary"), all_of("take_vaccine_num")), ~ !is.na(.))) %>%
      study_weighting() %>%
      estimatr::lm_robust(
        take_vaccine_num ~educ_binary, 
        fixed_effects = ~country, 
        weight = weight, 
        cluster = country,
        se_type = "stata",
        data = .) %>% 
      tidy %>% 
      dplyr::select(estimate, std.error, p.value, df, term)


dif_gender <- 
  differences_means_gen_age %>% 
  dplyr::filter(term == "genderMale") %>% 
  pull(estimate) %>% 
  {. * 100} %>% 
  round(., 1)

dif_age <- 
  differences_means_gen_age %>% 
  dplyr::filter(term == "age_groups_binary55+") %>% 
  pull(estimate) %>% 
  {. * 100} %>% 
  round(., 1)

dif_age_three <- 
  differences_means_gen_age %>%
  dplyr::filter(term == "age_groups_three25-54" | term == "age_groups_three55+") %>%
  pull(estimate) %>% 
  {. * 100} %>% 
  round(., 1)
 
dif_educ <- 
  differences_means_educ %>% 
  dplyr::filter(term == "educ_binaryUp to Secondary") %>% 
  pull(estimate) %>% 
  {. * 100} %>% 
  round(., 1)

diffmeans <- 
  rbind(differences_means_gen_age, differences_means_educ) %>%
  dplyr::rename(Estimate = estimate,
         Std.error = std.error,
         `P-value` = p.value,
         "Degrees of freedom" = df,
         "Baseline category" = term) %>%
  dplyr::mutate(
    Variable = ifelse(`Baseline category` == "genderMale", 
                      "Gender (Female)", ""),
    Variable = ifelse(`Baseline category` == "age_groups_binary55+", 
                      "Age", Variable),
    Variable = ifelse(`Baseline category` == "educ_binaryUp to Secondary", 
                      "Education (Secondary +)", Variable),
    `Baseline category` = ifelse(`Baseline category` == "genderMale", 
                                 "Male", `Baseline category`),
    `Baseline category` = ifelse(`Baseline category` == "age_groups_binary55+", 
                                 "55+", `Baseline category`),
    `Baseline category` = ifelse(`Baseline category` == "educ_binaryUp to Secondary", 
                                 "Up to secondary", `Baseline category`)) 

dmeans <- diffmeans %>%
  filter(Variable != "") %>%
  knitr::kable(
    digits = 2,
    caption =  "Differences in means",
    format = "latex", booktabs = T, linesep = "", label = "dmeans", align = "c") %>% 
  kableExtra::kable_styling(latex_options = c("hold_position"),
    font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 9 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. p-values come from a two-sided t-test from a linear regression.",
    threeparttable = T) 

diffmeans %>%
  filter(Variable != "") %>%
  knitr::kable(
  caption =  "Differences in means",
  booktabs = T, linesep = "", label = "dmeans", digits = 2) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 8 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. The differences in means for gender and age do not include the Uganda 1 study, which only included female respondents under the age of 55.",
    threeparttable = T) 
Differences in means
Estimate Std.error P-value Degrees of freedom Baseline category Variable
0.04 0.01 0.00 10 Male Gender (Female)
-0.01 0.01 0.59 10 55+ Age
0.02 0.03 0.38 10 Up to secondary Education (Secondary +)
Table 8 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. The differences in means for gender and age do not include the Uganda 1 study, which only included female respondents under the age of 55.

2.3.6 Differences in means: Age with three categories

diffmeans <- 
  diffmeans %>%
  filter(Variable != "Age") %>%
  dplyr::mutate(
    Variable = ifelse(`Baseline category` == "age_groups_three25-54", 
                      "Age (25-54)", Variable),
    Variable = ifelse(`Baseline category` == "age_groups_three55+", 
                      "Age (55+)", Variable),
    `Baseline category` = ifelse(`Baseline category` == "age_groups_three25-54", 
                                 "<25", `Baseline category`),
    `Baseline category` = ifelse(`Baseline category` == "age_groups_three55+", 
                                 "<25", `Baseline category`))

dmeans <- diffmeans %>%
  filter(Variable != "") %>%
  knitr::kable(
    digits = 2,
    caption =  "Differences in means",
    format = "latex", booktabs = T, linesep = "", label = "dmeans", align = "c") %>% 
  kableExtra::kable_styling(latex_options = c("hold_position"),
    font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 9 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. p-values come from a two-sided t-test from a linear regression.",
    threeparttable = T) 

diffmeans %>%
  filter(Variable != "") %>%
  knitr::kable(
  caption =  "Differences in means",
  booktabs = T, linesep = "", label = "dmeans", digits = 2) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 8 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. The differences in means for gender and age do not include the Uganda 1 study, which only included female respondents under the age of 55.",
    threeparttable = T) 
Differences in means
Estimate Std.error P-value Degrees of freedom Baseline category Variable
0.04 0.01 0.00 10 Male Gender (Female)
-0.02 0.02 0.43 10 <25 Age (25-54)
-0.02 0.02 0.36 10 <25 Age (55+)
0.02 0.03 0.38 10 Up to secondary Education (Secondary +)
Table 8 shows the results of subgroup mean differences. Subgroup differences were generated considering only LMICs. The differences in means for gender and age do not include the Uganda 1 study, which only included female respondents under the age of 55.

2.3.7 Differences in means: by study

country_differences <-
  unique(df$country) %>%
  lapply(function(j){{
    dff <- filter(df, country == j)
    
    lapply(c("gender", "age_groups_three", "educ_binary"), function(i){
      if (length(table(dff[[i]])) < 2)  {
        return(NULL)
      } else {
        m <- estimatr::lm_robust(as.formula(paste("take_vaccine_num ~", i)),
                            weight = weight,
                            cluster = cluster,
                            se_type = "stata",
                            data = dff) 
          m %>% 
            tidy %>%
            dplyr::select(estimate, std.error, p.value, df, term) %>%
            dplyr::mutate(n = m$nobs)
        }}
    ) } %>%
      dplyr::bind_rows() %>% 
      dplyr::mutate(country = j)}) %>% 
  dplyr::bind_rows() %>% 
  dplyr::arrange(term, country) %>% 
  dplyr::relocate(country, term) %>%
  dplyr::filter(term != "(Intercept)") %>% 
  dplyr::mutate(significant = p.value <= .05) %>%
  dplyr::mutate(
    term = ifelse(term == "age_groups_three25-54", "25-54", term),
    term = ifelse(term == "age_groups_three55+", "55+", term),
    term = ifelse(term == "educ_binaryUp to Secondary", "Up to secondary", term),
    term = ifelse(term == "genderMale", "Male", term))

country_differences_summary <- 
  country_differences %>% 
  dplyr::filter(!(country %in% c("Russia", "USA"))) %>% 
  dplyr::group_by(term) %>% summarize(
    "positive " = sum(estimate > 0),
    "positive and significant" = sum(estimate > 0 & significant),
    "negative and significant" = sum(estimate < 0 & significant),
    "not significant" = sum(!significant),
    n = n()) 

t_country_differences <- country_differences %>%
  dplyr::mutate(
                baseline = ifelse(term == "Male", "Female", NA),
                baseline = ifelse(term == "Up to secondary", "Secondary +", baseline),
                baseline = ifelse(is.na(baseline), "<25", baseline),
                group = ifelse(baseline == "<25", "Age", NA),
                group = ifelse(baseline == "Secondary +", "Education", group),
                group = ifelse(baseline == "Female", "Gender", group),
                estimate = round(estimate, 2),
                std.error = round(std.error, 2),
                p.value = round(p.value, 2)) %>%
  dplyr::select(country, group, baseline, term, everything(), -significant)

t_country <- t_country_differences %>%
  kable(digits = 2, 
        col.names = c("Country", "Variable", "Baseline category", "Group", "Estimate", "Std. Error", "P-value", "Degrees of freedom", "N Obs"),
        caption = "Differences between groups within studies", 
        booktabs = TRUE, linesep = "", 
        format.args = list(big.mark = ",", scientific = FALSE),
  format = "latex", label = "countrydiff") %>% 
  kableExtra::kable_styling(latex_options = c("scale_down"), font_size = base_font_size - 2, full_width = FALSE) %>% 
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "6em") %>% 
  kableExtra::column_spec(2:3, width = "6em") %>% 
  kableExtra::column_spec(4, width = "9em")  %>% 
  kableExtra::column_spec(5:8, width = "6em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 11 shows differences of means between groups within single studies. Estimates are calculated through OLS and represent the difference in the average acceptance rate between the subgroup in column Group and that in column Baseline category.",
    threeparttable = T)


knitr::kable(t_country_differences, 
             digits = 2, caption = "Differences between groups within studies ")
Differences between groups within studies
country group baseline term estimate std.error p.value df n
Burkina Faso Age <25 25-54 -0.13 0.10 0.21 119 120
Colombia Age <25 25-54 -0.01 0.04 0.79 689 690
India Age <25 25-54 0.08 0.03 0.01 141 1680
Mozambique Age <25 25-54 -0.12 0.01 0.00 162 860
Nepal Age <25 25-54 -0.01 0.01 0.32 89 1324
Nigeria Age <25 25-54 0.09 0.03 0.01 1867 1868
Pakistan 1 Age <25 25-54 -0.11 0.04 0.00 105 1633
Russia Age <25 25-54 -0.06 0.02 0.01 22124 22125
Rwanda Age <25 25-54 -0.04 0.02 0.03 1354 1355
Sierra Leone 1 Age <25 25-54 0.00 0.03 0.94 1069 1070
Sierra Leone 2 Age <25 25-54 0.05 0.04 0.30 190 2087
Uganda 1 Age <25 25-54 0.00 0.02 0.83 497 3198
Uganda 2 Age <25 25-54 0.00 0.03 0.89 309 1366
USA Age <25 25-54 0.14 0.04 0.00 1958 1959
Burkina Faso Age <25 55+ -0.15 0.24 0.53 119 120
Colombia Age <25 55+ -0.02 0.06 0.79 689 690
India Age <25 55+ 0.05 0.04 0.19 141 1680
Mozambique Age <25 55+ -0.08 0.02 0.00 162 860
Nepal Age <25 55+ -0.04 0.02 0.06 89 1324
Nigeria Age <25 55+ 0.06 0.05 0.28 1867 1868
Pakistan 1 Age <25 55+ -0.06 0.07 0.45 105 1633
Russia Age <25 55+ 0.07 0.03 0.03 22124 22125
Rwanda Age <25 55+ -0.15 0.07 0.04 1354 1355
Sierra Leone 1 Age <25 55+ -0.04 0.07 0.56 1069 1070
Sierra Leone 2 Age <25 55+ 0.07 0.05 0.12 190 2087
Uganda 2 Age <25 55+ -0.03 0.04 0.47 309 1366
USA Age <25 55+ 0.18 0.04 0.00 1958 1959
Burkina Faso Education Secondary + Up to secondary 0.09 0.03 0.00 976 977
Colombia Education Secondary + Up to secondary -0.05 0.03 0.10 1010 1011
India Education Secondary + Up to secondary -0.02 0.04 0.59 100 340
Mozambique Education Secondary + Up to secondary 0.04 0.03 0.17 160 828
Pakistan 1 Education Secondary + Up to secondary -0.10 0.04 0.01 105 1621
Pakistan 2 Education Secondary + Up to secondary -0.07 0.03 0.00 1491 1492
Russia Education Secondary + Up to secondary -0.01 0.01 0.31 22124 22125
Rwanda Education Secondary + Up to secondary 0.16 0.03 0.00 1354 1355
Sierra Leone 1 Education Secondary + Up to secondary 0.06 0.03 0.03 1037 1038
Sierra Leone 2 Education Secondary + Up to secondary -0.01 0.02 0.63 190 2110
Uganda 1 Education Secondary + Up to secondary 0.05 0.03 0.12 494 2739
Uganda 2 Education Secondary + Up to secondary 0.11 0.03 0.00 309 1366
USA Education Secondary + Up to secondary -0.21 0.03 0.00 1958 1959
Burkina Faso Gender Female Male 0.06 0.03 0.06 976 977
Colombia Gender Female Male 0.04 0.03 0.18 1011 1012
India Gender Female Male 0.02 0.02 0.22 141 1680
Mozambique Gender Female Male 0.05 0.02 0.02 162 862
Nepal Gender Female Male 0.00 0.01 0.98 89 1324
Nigeria Gender Female Male 0.02 0.02 0.30 1867 1868
Pakistan 1 Gender Female Male 0.08 0.02 0.00 105 1629
Russia Gender Female Male 0.16 0.01 0.00 22124 22125
Rwanda Gender Female Male 0.09 0.02 0.00 1354 1355
Sierra Leone 1 Gender Female Male 0.06 0.03 0.03 1069 1070
Sierra Leone 2 Gender Female Male -0.01 0.02 0.56 190 2110
Uganda 2 Gender Female Male 0.03 0.02 0.17 309 1366
USA Gender Female Male 0.17 0.03 0.00 1958 1959
knitr::kable(country_differences_summary, 
             digits = 2, caption = "Differences between groups within studies (Summary)")
Differences between groups within studies (Summary)
term positive positive and significant negative and significant not significant n
25-54 6 2 3 7 12
55+ 3 0 2 9 11
Male 9 4 0 7 11
Up to secondary 6 4 2 5 11

2.3.8 Metaplus analysis

# Metaplus (variance between countries, included in text)
mp <- metaplus::metaplus(
  yi  = main_results %>% filter(!(group %in% c("USA", "Russia", "All"))) %>% pull(estimate),
  sei = main_results %>% filter(!(group %in% c("USA", "Russia", "All"))) %>% pull(std.error))
mp_tau <- mp$results[2,1]
mp_ratio <- sqrt(mp$results[2,1])/mp$results[1,1]

mp$results %>% kable(caption = "Metaplus  results")
Metaplus results
Est. 95% ci.lb 95% ci.ub pvalue
muhat 0.8033208 0.7532172 0.8530296 0
tau2 0.0070689 . . .

2.3.9 Generate quantities used in text

# Mean of LMICs
ans_mean <- 
  ans %>% 
  dplyr::filter(cat == "All", group == "All") %>% 
  pull(estimate)

# Lowest and highest bounds for All mean estimate
ans_mean_low <- 
  ans %>% 
  dplyr::filter(cat == "All", group == "All") %>% 
  pull(conf.low)

ans_mean_high <-  
  ans %>% 
  dplyr::filter(cat == "All", group == "All") %>% 
  pull(conf.high)

# df of LMICs (no USA, Russia or All)
ans_stats <- 
  ans %>% 
  dplyr::filter(cat == "All", !(group %in% c("USA", "Russia", "All")))

# Median of LMICs
ans_median <- median(ans_stats$estimate)

# Interquartile range of LMIcs
ans_iqr <- IQR(ans_stats$estimate)

# Smallest and biggest acceptability of LMICs
ans_min <- 
  ans %>% 
  dplyr::filter(cat == "All" & group != "Russia" & group != "USA") %>% 
  pull(estimate) %>% min

ans_max <- 
  ans %>% 
  dplyr::filter(cat == "All" & group != "Russia" & group != "USA") %>% 
  pull(estimate) %>% max

# Rank by estimate
top_all <- 
  ans %>%
  dplyr::filter(cat == "All") %>% 
  dplyr::arrange(desc(estimate))

bottom_all <- 
  ans %>%
  dplyr::filter(cat == "All" & group != "Russia" & group != "USA") %>% 
  dplyr::arrange(estimate)

# USA and Russia estimate acceptability
usa_ans <- 
  ans %>%
  dplyr::filter(cat == "All" & group == "USA") 

rus_ans <- 
  ans %>%
  dplyr::filter(cat == "All" & group == "Russia") 

2.3.10 Table version of Figure 1

# Here we are making "percentages" from the estimates and putting them together with confidence intervals
# Also we are going from long to wide

main_table <- 
  main_results %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), list("_main" = ~ format(round(100 * ., 1), nsmall = 1))),
    conf_int__main = paste0("(", conf.low__main, ", ", conf.high__main, ")")) %>%
  dplyr::select(group, estimate__main, conf_int__main)


gender_table <- 
  acc_by_gender %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, gender, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = gender, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")

educ_table <- 
  acc_by_educ_binary %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, educ_binary, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = educ_binary, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")


age_table <- 
  acc_by_age %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, age_groups_three, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = age_groups_three, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")

main_table_n <- 
  main_results_n %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), list("_main" = ~ format(round(100 * ., 1), nsmall = 1))),
    conf_int__main = paste0("(", conf.low__main, ", ", conf.high__main, ")")) %>%
  dplyr::select(group, estimate__main, conf_int__main) 


gender_table_n <- 
  acc_by_gender_n %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, gender, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = gender, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")

educ_table_n <- 
  acc_by_educ_binary_n %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, educ_binary, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = educ_binary, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")


age_table_n <- 
  acc_by_age_n %>%
  dplyr::mutate(
    across(c(estimate, conf.low, conf.high), ~ format(round(100 * ., 1), nsmall = 1)),
    conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
  dplyr::select(group, age_groups_three, estimate, conf_int) %>% 
  tidyr::pivot_wider(names_from = age_groups_three, 
                     values_from = c(estimate, conf_int), 
                     names_sep = "__")

all_tables_n <- 
  main_table_n %>% 
  dplyr::left_join(gender_table_n) %>%
  dplyr::left_join(educ_table_n) %>% 
  dplyr::left_join(age_table_n) %>%
  dplyr::filter(group=="All") %>%
  dplyr::mutate(group="All LMICs (National)")

all_tables <- 
  main_table %>% 
  dplyr::left_join(gender_table) %>%
  dplyr::left_join(educ_table) %>% 
  dplyr::left_join(age_table) %>%
  dplyr::bind_rows(all_tables_n) %>%
  dplyr::select(group, everything()) %>%
  dplyr::mutate(group = as.factor(group),
                group = forcats::fct_relevel(group, "All", "All LMICs (National)", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__")),
                      names_to = c("type", ".value"),
                      names_pattern = "(.*)__(.*)") %>% 
  dplyr::mutate(group = ifelse(type == "conf_int", "", as.character(group)),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(-type) %>% 
  dplyr::rename("Country" = "group", 
                "Average acceptability" = "main")



tab_fig1 <- 
  all_tables %>%
  knitr::kable(
    all_tables,
    caption = "If a COVID-19 vaccine becomes available in [country], would you take it? Disaggregated by subgroups", 
    format = "latex",  booktabs = T, linesep = "", 
    format.args = list(big.mark = ",", scientific = FALSE), 
    escape = F, align = "lcccccccc", label = "maintabledis")  %>% 
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"),
                            font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::add_header_above(c("", "", "Gender" = 2, "Education" = 2, "Age" = 3), bold = TRUE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 4 shows percentage of respondents willing to take the COVID-19 vaccine as plotted in Figure 1. A 95% confidence interval is shown between parentheses",
    threeparttable = T) 


all_tables %>%
  knitr::kable(
    caption = "If a COVID-19 vaccine becomes available in [country], would you take it? Disaggregated by subgroups", 
    align = "lcccccccc", booktabs = T) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::add_header_above(c("", "", "Gender" = 2, "Education" = 2, "Age" = 3), bold = TRUE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 4 shows percentage of respondents willing to take the COVID-19 vaccine as plotted in Figure 1. A 95% confidence interval is shown between parentheses",
    threeparttable = T) 
If a COVID-19 vaccine becomes available in [country], would you take it? Disaggregated by subgroups
Gender
Education
Age
Country Average acceptability Female Male > Secondary Up to Secondary <25 25-54 55+
Burkina Faso 66.5 62.1 68.4 60.8 70.1 76.0 63.2 .
(63.5, 69.5) (56.3, 67.9) (65.0, 71.9) (55.9, 65.8) (66.4, 73.8) (58.0, 94.0) (53.0, 73.4) .
Colombia 74.9 73.5 77.3 78.1 73.4 75.4 74.2 73.8
(72.2, 77.6) (70.1, 77.0) (73.0, 81.7) (73.6, 82.5) (70.1, 76.8) (67.5, 83.4) (70.2, 78.3) (65.0, 82.6)
India 84.3 82.4 84.7 87.8 85.9 77.6 85.4 83.1
(82.3, 86.3) (79.0, 85.8) (82.5, 87.0) (81.1, 94.6) (82.5, 89.2) (71.6, 83.6) (83.4, 87.3) (77.6, 88.5)
Mozambique 89.1 86.2 91.3 86.1 89.7 . 88.3 91.7
(86.5, 91.7) (82.5, 90.0) (88.4, 94.1) (81.8, 90.4) (86.5, 92.8) . (85.3, 91.2) (88.1, 95.4)
Nepal 96.6 96.4 96.4 . . 97.8 96.6 93.8
(95.5, 97.6) (94.6, 98.2) (95.1, 97.7) . . (95.7, 99.8) (95.2, 97.9) (90.4, 97.2)
Nigeria 76.2 74.9 77.0 . . 69.0 77.6 74.7
(74.3, 78.2) (71.7, 78.1) (74.6, 79.4) . . (63.3, 74.7) (75.5, 79.7) (65.8, 83.6)
Pakistan 1 76.1 72.2 80.1 83.6 74.0 86.3 75.6 80.8
(70.0, 82.3) (65.6, 78.8) (73.8, 86.4) (76.6, 90.5) (67.4, 80.5) (78.4, 94.1) (69.3, 81.8) (64.9, 96.7)
Pakistan 2 66.5 . . 71.4 64.2 . . .
(64.1, 68.9) . . (67.3, 75.5) (61.2, 67.1) . . .
Rwanda 84.9 79.4 88.0 71.4 87.7 88.1 83.8 73.3
(82.9, 86.8) (75.8, 83.0) (85.8, 90.2) (65.5, 77.2) (85.8, 89.7) (85.0, 91.1) (81.3, 86.3) (59.2, 87.3)
Sierra Leone 1 78.0 74.1 80.1 74.4 80.2 78.0 78.3 74.0
(75.5, 80.5) (69.5, 78.7) (77.2, 83.1) (70.1, 78.7) (77.0, 83.3) (72.4, 83.6) (75.4, 81.1) (61.4, 86.6)
Sierra Leone 2 87.9 88.6 87.7 88.8 87.8 82.9 87.6 90.0
(86.2, 89.6) (85.7, 91.5) (85.9, 89.5) (85.0, 92.5) (86.0, 89.6) (73.9, 91.9) (85.6, 89.5) (87.4, 92.6)
Uganda 1 85.8 85.8 . 80.1 84.8 85.5 85.9 .
(84.4, 87.2) (84.4, 87.2) . (74.4, 85.9) (83.2, 86.5) (82.7, 88.4) (84.3, 87.4) .
Uganda 2 76.5 74.9 78.0 68.6 79.8 76.5 77.0 73.7
(74.3, 78.7) (71.5, 78.3) (75.2, 80.9) (64.3, 72.9) (77.3, 82.2) (71.0, 82.1) (74.4, 79.6) (67.3, 80.0)
All LMICs 80.3 79.2 82.6 77.4 79.8 82.8 81.1 79.1
(74.9, 85.6) (73.4, 85.0) (77.4, 87.9) (71.4, 83.4) (74.1, 85.4) (76.9, 88.7) (75.6, 86.6) (72.5, 85.7)
All LMICs (National) 78.4 75.5 80.3 74.7 79.8 80.1 77.4 74.4
(67.9, 89.0) (63.6, 87.5) (70.2, 90.4) (62.1, 87.3) (69.8, 89.9) (73.4, 86.7) (65.7, 89.2) (61.7, 87.2)
Russia 30.4 22.6 38.5 31.0 29.6 33.5 27.6 40.0
(29.1, 31.7) (20.9, 24.2) (36.5, 40.5) (29.6, 32.5) (27.3, 32.0) (29.2, 37.7) (26.2, 28.9) (35.9, 44.0)
USA 64.6 56.1 73.4 72.3 51.5 51.0 64.9 69.4
(61.8, 67.3) (52.1, 60.1) (69.8, 76.9) (69.5, 75.0) (46.0, 57.0) (43.5, 58.6) (61.1, 68.7) (64.8, 73.9)
Table 4 shows percentage of respondents willing to take the COVID-19 vaccine as plotted in Figure 1. A 95% confidence interval is shown between parentheses

2.4 Reasons to take or not to take

2.4.1 Table: Reasons to take the vaccine.

#There are idiosyncratic reasons why people would take the vaccine. I recoded them. But we keep only the core, which is common almost in all studies.
yes_vars <- 
  df2 %>% 
  dplyr::select(yes_vaccine_1, yes_vaccine_2, yes_vaccine_3) %>% 
  names

## Generate data for analysis of yes reasons
yes_vacc1 <- 
  lapply(yes_vars, reasons_together, df = df2, num = "Yes") %>%
  dplyr::bind_rows() %>%
  dplyr::mutate(across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 0)))

#Get percentage per yes reason category and make wide table
yes_vacc2 <- 
  yes_vacc1 %>%
  dplyr::mutate(estimate = format(estimate, nsmall = 0),
                conf_int = paste0("(", conf.low, 
                                  ", ", conf.high, ")")) %>%
  dplyr::select(group, estimate, conf_int, outcome, n) %>%
  tidyr::pivot_wider(names_from = outcome, values_from = c(estimate, conf_int, n), names_sep = "__") %>%
  tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__")),
                      names_to = c("type", ".value"),
                      names_pattern = "(.*)__(.*)") %>%
  dplyr::rowwise() %>% 
  dplyr::mutate(
    n = ifelse(group == "All", NA, unique(na.omit(c_across(starts_with("n__")))))) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(
    group = forcats::fct_relevel(as.factor(group), "All", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  dplyr::mutate(across(c(group, n), ~ifelse(type == "conf_int", "", as.character(.))),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(group, n, type, starts_with("yes_vaccine_"), -starts_with("n_yes_vaccine"), -type)

cnames <- c("Study", "N", "Self", "Family", 
          "Community")


#Table to Latex
tab_reasons_y <- 
  yes_vacc2 %>%
  knitr::kable(col.names = cnames,
      caption = "Reasons to take the vaccine", format = "latex", booktabs = T, 
      linesep = "", label = "yes", 
      format.args = list(big.mark = ",", scientific = FALSE), 
      escape = F, align = "lcccc") %>%
  kableExtra::kable_styling(full_width = FALSE, 
                            font_size = base_font_size - 2) %>%
  kableExtra::add_header_above(c(" " = 2, "Protection" = 3), bold = TRUE) %>% 
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "8em") %>% 
  kableExtra::column_spec(2:4, width = "4em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 3 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage corresponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses. Studies India, Pakistan 1 and Pakistan 2 are not included because they either did not include the question or were not properly harmonized with the other studies.",
    threeparttable = T) 

yes_vacc2 %>%
  knitr::kable(col.names = cnames,
      caption = "\\label{yes}Reasons to take the vaccine", 
      booktabs = T, linesep = "", 
      format.args = list(big.mark = ",", scientific = FALSE)) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::add_header_above(c(" " = 2, "Protection" = 3), bold = TRUE) %>% 
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "8em") %>% 
  kableExtra::column_spec(2:5, width = "8em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 2 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses. Studies India, Pakistan 1 and Pakistan 2 are not included because they either did not include the question or were not properly harmonized with the other studies.",
    threeparttable = T) 
Reasons to take the vaccine
Protection
Study N Self Family Community
Burkina Faso 651 76 42 7
(73, 79) (38, 46) (5, 9)
Colombia 756 91 23 12
(88, 93) (20, 26) (10, 14)
Mozambique 768 83 32 4
(80, 86) (27, 38) (2, 5)
Nepal 1341 96 34 20
(95, 98) (32, 37) (17, 22)
Nigeria 1424 89 35 21
(88, 91) (33, 38) (19, 23)
Rwanda 1152 98 26 11
(97, 99) (23, 28) (9, 13)
Sierra Leone 1 836 94 37 21
(92, 96) (34, 40) (18, 23)
Sierra Leone 2 1855 91 62 21
(88, 93) (57, 66) (16, 27)
Uganda 1 2885 96 36 9
(95, 97) (34, 38) (8, 10)
Uganda 2 1045 96 28 11
(95, 97) (25, 31) (9, 12)
All LMICs . 91 36 14
(86, 96) (28, 43) (9, 18)
Russia 5887 76 69 41
(74, 78) (67, 71) (38, 43)
USA 1313 94 92 89
(92, 95) (90, 94) (87, 91)
Table 2 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses. Studies India, Pakistan 1 and Pakistan 2 are not included because they either did not include the question or were not properly harmonized with the other studies.

2.4.2 Calculate numbers used in text for acceptance

# All LMIcs estimate of self protection
yes_all <- 
  yes_vacc1 %>% 
  dplyr::filter(group == "All" & outcome == "yes_vaccine_1")

# Top and bottom LMICs countries for category self protection
yes_ <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group != "USA" & group != "Russia" & outcome == "yes_vaccine_1") 

yes_top <- yes_ %>% dplyr::arrange(desc(estimate))
yes_bottom <- yes_ %>% dplyr::arrange(estimate)

# All LMICs estimate of protection of family
yes_all_2 <- 
  yes_vacc1 %>% 
  dplyr::filter(group == "All" & outcome == "yes_vaccine_2")

# Top and bottom LMICs countries for category protection of family
yes_2 <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group != "USA" & group != "Russia" & outcome == "yes_vaccine_2") 

yes2_top <- yes_2 %>% dplyr::arrange(desc(estimate))
yes2_bottom <- yes_2 %>% dplyr::arrange(estimate)

# Estimate of self protection for Russia and the US
yes_usa <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group == "USA" & outcome == "yes_vaccine_1") 

yes_rus <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group == "Russia" & outcome == "yes_vaccine_1") 

# Estimate of protection of family for Russia and the US
yes2_usa <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group == "USA" & outcome == "yes_vaccine_2")

yes2_rus <- 
  yes_vacc1 %>% 
  dplyr::filter(group != "All" & group == "Russia" & outcome == "yes_vaccine_2") 
yes_vars <- 
  df2 %>% 
  dplyr::select(yes_vaccine_1, yes_vaccine_2, 
                yes_vaccine_3, yes_vaccine_4, 
                yes_vaccine_5, yes_vaccine_666) %>% 
  names

## Generate data for analysis of yes reasons
yes_vacc1 <- 
  lapply(yes_vars, reasons_together, df = df2, num = "Yes") %>%
  dplyr::bind_rows() %>%
  dplyr::mutate(across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 0)))


#Get percentage per yes reason category and make wide table
yes_vacc2 <- 
  yes_vacc1 %>%
  dplyr::mutate(estimate = format(estimate, nsmall = 0),
                conf_int = paste0("(", conf.low, 
                                  ", ", conf.high, ")")) %>%
  dplyr::select(group, estimate, conf_int, outcome, n) %>%
  tidyr::pivot_wider(names_from = outcome, values_from = c(estimate, conf_int, n), names_sep = "__") %>%
  tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__")),
                      names_to = c("type", ".value"),
                      names_pattern = "(.*)__(.*)") %>%
  dplyr::rowwise() %>% 
  dplyr::mutate(
    n = ifelse(group == "All", NA, unique(na.omit(c_across(starts_with("n__")))))) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(
    group = forcats::fct_relevel(as.factor(group), "All", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  dplyr::mutate(across(c(group, n), ~ifelse(type == "conf_int", "", as.character(.))),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(group, n, type, starts_with("yes_vaccine_"), -starts_with("n_yes_vaccine"), -type)

cnames <- c("Study", "N", "Self", "Family", 
          "Community", "Health workers", "Government", "Other")

#Table to Latex
tab_reasons_y_all <- 
  yes_vacc2  %>% 
  knitr::kable(col.names = cnames,
        caption = "Reasons to take the vaccine- all categories", 
        format = "latex", booktabs = T, linesep = "", 
        label = "yesall", 
        format.args = list(big.mark = ",", scientific = FALSE), 
        escape = F, align = c("l", rep("c", 7))) %>%
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"),
                            font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::add_header_above(c(" " = 2, "Protection" = 3, "If recommended by" = 2, " " = 1), 
                               bold = TRUE) %>% 
  kableExtra::column_spec(1:8, width = "7em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 5 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses",
    threeparttable = T) 

yes_vacc2 %>%
  knitr::kable(col.names = cnames,
               caption = "Reasons to take the vaccine. All categories", 
               booktabs = T, linesep = "", align = c("l", rep("c", 7)),
               format.args = list(big.mark = ",", scientific = FALSE)) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::add_header_above(c(" " = 2, "Protection" = 3, "If recommended by" = 2, " " = 1), 
                               bold = TRUE) %>% 
  kableExtra::column_spec(1:8, width = "7em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 5 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses",
    threeparttable = T) 
Reasons to take the vaccine. All categories
Protection
If recommended by
Study N Self Family Community Health workers Government Other
Burkina Faso 651 76 42 7 6 19 2
(73, 79) (38, 46) (5, 9) (4, 8) (16, 22) (1, 3)
Colombia 756 91 23 12 1 2 6
(88, 93) (20, 26) (10, 14) (0, 2) (1, 3) (4, 7)
Mozambique 768 83 32 4 . 7 3
(80, 86) (27, 38) (2, 5) . (5, 8) (2, 4)
Nepal 1341 96 34 20 2 3 7
(95, 98) (32, 37) (17, 22) (1, 2) (2, 4) (5, 9)
Nigeria 1424 89 35 21 . 6 4
(88, 91) (33, 38) (19, 23) . (4, 7) (3, 5)
Rwanda 1152 98 26 11 1 5 1
(97, 99) (23, 28) (9, 13) (0, 1) (4, 6) (1, 2)
Sierra Leone 1 836 94 37 21 12 23 7
(92, 96) (34, 40) (18, 23) (10, 14) (20, 25) (5, 9)
Sierra Leone 2 1855 91 62 21 59 . 16
(88, 93) (57, 66) (16, 27) (54, 63) . (11, 21)
Uganda 1 2885 96 36 9 . 10 6
(95, 97) (34, 38) (8, 10) . (9, 12) (5, 7)
Uganda 2 1045 96 28 11 1 15 2
(95, 97) (25, 31) (9, 12) (1, 2) (13, 17) (1, 3)
All LMICs . 91 36 14 12 10 5
(86, 96) (28, 43) (9, 18) (-8, 31) (4, 16) (2, 8)
Russia 5887 76 69 41 11 6 18
(74, 78) (67, 71) (38, 43) (10, 13) (5, 7) (16, 20)
USA 1313 94 92 89 . 67 .
(92, 95) (90, 94) (87, 91) . (64, 70) .
Table 5 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses

2.5 Reasons to take by age

yes_vars <- 
  df2 %>% 
  dplyr::select(yes_vaccine_1, yes_vaccine_2, yes_vaccine_3) %>% 
  names

## Generate data for analysis of yes reasons for different age groups
yes_vacc_age_1 <- 
  lapply(yes_vars, age_analysis, df = df2, num = "Yes", filter_by=`age_less24`) %>%
  dplyr::bind_rows() %>%
  mutate(age="<25")

yes_vacc_age_2 <- 
  lapply(yes_vars, age_analysis, df = df2, num = "Yes", filter_by=`age_25_54`) %>%
  dplyr::bind_rows() %>%
  mutate(age="25-54")

yes_vacc_age_3 <- 
  lapply(yes_vars, age_analysis, df = df2, num = "Yes", filter_by=`age_55_more`) %>%
  dplyr::bind_rows() %>%
  mutate(age="55+")

#Get percentage per yes reason category and make wide table
yes_vacc_age <- 
  rbind(yes_vacc_age_1, yes_vacc_age_2, yes_vacc_age_3) %>%
  dplyr::mutate(across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 0))) %>%
  dplyr::mutate(estimate = format(estimate, nsmall = 0),
                conf_int = paste0("(", conf.low, 
                                  ", ", conf.high, ")"),
                n=as.character(n)) %>%
  dplyr::select(group, estimate, conf_int, outcome, age, n) %>%
  tidyr::pivot_wider(names_from = c(outcome, age), values_from = c(estimate, conf_int, n), names_sep = "__") %>%
  tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__"), starts_with("n__")),
                      names_to = c("type", ".value"),
                      names_pattern = "(.*)__(.*)")

y1 <- yes_vacc_age %>%
  dplyr::filter(grepl("yes_vaccine_1", type)) %>%
  mutate(type=ifelse(grepl("estimate", type), "estimate", type),
         type=ifelse(grepl("conf_int", type), "conf_int", type),
         type=ifelse(grepl("n__", type), "n", type))


y2 <- yes_vacc_age %>%
  dplyr::filter(grepl("yes_vaccine_2", type))%>%
  mutate(type=ifelse(grepl("estimate", type), "estimate", type),
         type=ifelse(grepl("conf_int", type), "conf_int", type),
         type=ifelse(grepl("n__", type), "n", type))

y3 <- yes_vacc_age %>%
  dplyr::filter(grepl("yes_vaccine_3", type))%>%
  mutate(type=ifelse(grepl("estimate", type), "estimate", type),
         type=ifelse(grepl("conf_int", type), "conf_int", type),
         type=ifelse(grepl("n__", type), "n", type))

yes_vacc_age <- left_join(y1, y2, by=c("group", "type")) %>%
  left_join(y3, by=c("group", "type")) %>% 
  dplyr::mutate(
    group = forcats::fct_relevel(as.factor(group), "All", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  dplyr::mutate(across(c(group), ~ifelse(type == "conf_int", "Conf. interval", as.character(.))),
                across(c(group), ~ifelse(type == "n", "n", as.character(.))),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(-type)

cnames <- c("Study", "<25", "25-54", "55+", "<25", "25-54", "55+", "<25", "25-54", "55+")


#Table to Latex
tab_reasons_y_age <- 
  yes_vacc_age %>%
  knitr::kable(col.names = cnames,
      caption = "Reasons to take the vaccine- by age groups", format = "latex", booktabs = T, 
      linesep = "", label = "yes1", 
      format.args = list(big.mark = ",", scientific = FALSE), 
      escape = F, align = c("l", rep("c", 9))) %>%
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"),
                            font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::add_header_above(c(" " = 1, "Self" = 3, "Family" = 3, "Community" = 3), bold = TRUE) %>% 
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::row_spec(seq(from = 3, to = 36, by = 3), hline_after = TRUE) %>%
  kableExtra::column_spec(1, width = "7em") %>% 
  kableExtra::column_spec(2:10, width = "5em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 6 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine by age groups. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses",
    threeparttable = T) 


yes_vacc_age %>%
  knitr::kable(col.names = cnames,
      caption = "\\label{yes1}Reasons to take the vaccine", 
      booktabs = T, linesep = "", 
      format.args = list(big.mark = ",", scientific = FALSE)) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::add_header_above(c(" " = 1, "Self" = 3, "Family" = 3, "Community" = 3), bold = TRUE) %>% 
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "8em") %>% 
  kableExtra::column_spec(2:10, width = "4em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 2 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses. Studies India, Pakistan 1 and Pakistan 2 are not included because they either did not include the question or were not properly harmonized with the other studies.",
    threeparttable = T) 
Reasons to take the vaccine
Self
Family
Community
Study <25 25-54 55+ <25 25-54 55+ <25 25-54 55+
Burkina Faso 77 59 100 26 64 66 11 2 0
Conf. interval (56, 99) (46, 72) (100, 100) (4, 48) (51, 77) (-80, 211) (-5, 26) (-2, 5) (0, 0)
n 19 57 3 19 57 3 19 57 3
Colombia 91 91 90 26 26 16 12 13 14
Conf. interval (86, 97) (88, 94) (83, 97) (17, 35) (21, 31) (8, 25) (4, 20) (9, 16) (6, 22)
n 90 349 73 90 349 73 90 349 73
Mozambique 62 84 80 50 32 34 12 4 2
Conf. interval (19, 106) (81, 87) (75, 86) (5, 95) (26, 38) (27, 41) (-17, 42) (2, 6) (0, 4)
n 8 571 188 8 571 188 8 571 188
Nepal 97 97 92 31 36 27 15 20 19
Conf. interval (94, 100) (96, 98) (87, 97) (25, 37) (33, 39) (19, 36) (10, 20) (17, 23) (13, 25)
n 225 890 162 225 890 162 225 890 162
Nigeria 91 89 94 31 36 31 22 21 21
Conf. interval (87, 95) (87, 91) (89, 100) (25, 38) (33, 39) (20, 42) (16, 29) (18, 23) (11, 31)
n 178 1175 71 178 1175 71 178 1175 71
Rwanda 98 98 100 22 28 29 10 11 10
Conf. interval (97, 100) (97, 99) (100, 100) (17, 26) (24, 31) (12, 46) (7, 13) (9, 14) (-1, 21)
n 389 732 31 389 732 31 389 732 31
Sierra Leone 1 96 94 94 36 38 27 24 20 22
Conf. interval (93, 99) (92, 95) (86, 102) (29, 44) (34, 41) (12, 42) (17, 31) (16, 23) (8, 36)
n 167 632 37 167 632 37 167 632 37
Sierra Leone 2 87 90 93 52 62 62 29 22 18
Conf. interval (78, 97) (88, 92) (89, 97) (39, 66) (58, 67) (56, 67) (16, 42) (16, 28) (12, 25)
n 63 1376 396 63 1376 396 63 1376 396
Uganda 1 96 96 . 34 36 . 9 9 .
Conf. interval (95, 98) (96, 97) . (30, 39) (34, 39) . (6, 11) (8, 11) .
n 526 2218 . 526 2218 . 526 2218 .
Uganda 2 97 96 97 20 30 28 8 11 13
Conf. interval (94, 99) (95, 97) (94, 100) (14, 26) (27, 33) (21, 36) (4, 11) (9, 13) (7, 19)
n 173 749 123 173 749 123 173 749 123
All LMICs 89 89 93 33 39 36 15 13 13
Conf. interval (81, 97) (81, 98) (89, 98) (25, 41) (29, 48) (23, 48) (10, 20) (8, 18) (7, 19)
n 1838 8749 1084 1838 8749 1084 1838 8749 1084
Russia 67 76 81 74 68 68 46 40 38
Conf. interval (59, 74) (73, 78) (76, 87) (68, 81) (66, 71) (62, 74) (38, 54) (38, 43) (32, 44)
n 552 5108 227 552 5108 227 552 5108 227
USA 92 91 97 89 91 94 90 89 89
Conf. interval (88, 96) (89, 94) (95, 99) (83, 95) (88, 93) (91, 97) (85, 95) (86, 92) (85, 93)
n 153 687 473 153 687 473 153 687 473
Table 2 shows percentage of respondents mentioning reasons why they would take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses. Studies India, Pakistan 1 and Pakistan 2 are not included because they either did not include the question or were not properly harmonized with the other studies.

2.6 Figure 2: reasons not to take

2.6.1 Figure

#Import df with tags (names of categories)
dictionary <- read_excel("2_input_data/dictionary.xlsx")

#Select core variables to be included in figure
no_vars <- 
  df2 %>% 
  dplyr::select(starts_with("no_vaccine_")) %>% 
  names

#Create estimates of % of each reason, re-level factors and get and categorize number of observations by subgroup
no_vacc <- 
  lapply(no_vars, reasons_together, df = df2, num = c("No", "DK")) %>%
  dplyr::bind_rows() %>% 
  dplyr::arrange(outcome) %>% 
  dplyr::mutate(
    across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 1)),
    n_sub = round(n * estimate, 0),
    n_sub = ifelse(n_sub == 0, NA_integer_, n_sub),
    group = 
      factor(group, 
             levels = rev(c("Burkina Faso", "Colombia", "Mozambique", 
                            "Nepal", "Nigeria", "Pakistan 1", "Rwanda", 
                            "Sierra Leone 1", "Sierra Leone 2", "Uganda 1", 
                            "Uganda 2", "All", "Russia", "USA" )))
  ) %>%
  dplyr::left_join(dictionary, by = "outcome") %>%
  dplyr::mutate(
    size = cut(n_sub, c(0, 50, 500, Inf), right = FALSE, include.lowest = FALSE),
    size = forcats::fct_recode(size, "500+" = "[500,Inf)"),
    tag = as.factor(tag),
    tag = 
      forcats::fct_relevel(tag,  
                           "Concerned about side effects", 
                           "Concerned about getting coronavirus from the vaccine", 
                           "Not concerned about getting seriously ill", 
                           "Doesn't think vaccines are effective", 
                           "Doesn't think Coronavirus outbreak is as serious as people say", 
                           "Doesn't like needles", 
                           "Allergic to vaccines", 
                           "Won't have time to get vaccinated", 
                           "Mentions a conspiracy theory", 
                           "Other reasons"))

fig_2 <- 
  no_vacc %>% 
  dplyr::filter(!is.na(n_sub)) %>%
  dplyr::mutate(group = plyr::mapvalues(group, "All", "All LMICs")) %>% 
  ggplot(aes(group, estimate, color = tag)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), 
                size = .5, width = .2, position = position_dodge(0.6)) + 
  geom_point(shape = 16, position = position_dodge(0.6), aes(size = size)) +
  facet_grid(.~tag,  space = "free", labeller = label_wrap_gen(width = 15)) +
  scale_size_discrete(range = c(1,3), name = "Number of observations" ) +
  geom_vline(xintercept = 3.5, color = "darkgrey") +
  geom_vline(xintercept = 2.5, color = "darkgrey") + 
  guides(color = FALSE) +
  scale_colour_manual(values = safe_colorblind_palette) + 
  coord_flip() + theme_bw() +
  labs(title = "Why would you not take the COVID-19 vaccine?",
       x = "") +
  theme_bw() + ylim(c(-2,100)) +
  theme(legend.position = "bottom",
        plot.caption = element_text(hjust = 0), #Default is hjust=1
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        plot.caption.position =  "plot",
        axis.text.y = element_text(hjust = 0))


fig_2

2.6.2 Calculate numbers used in text for refusal

## Generate data for analysis of no reasons

# Top LMIC countries concerned about side effects
side_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Concerned about side effects" & group != "All" & 
                  group != "USA" & group != "Russia") %>%
  dplyr::arrange(desc(estimate))

# Concern about side effects in Russia and the US
side_rus <- 
  no_vacc %>% 
  dplyr::filter(tag == "Concerned about side effects" &  group == "Russia") %>% 
  dplyr::arrange(desc(estimate))

side_usa <- 
  no_vacc %>% 
  dplyr::filter(tag == "Concerned about side effects" & group == "USA") %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC counties that are allergic to vaccines
allergies_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Allergic to vaccines" & group != "All") %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC countries that do not like needles
needles_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Doesn't like needles" & group != "All" ) %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC countries that wont have time to get vaccinated
time_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Won't have time to get vaccinated" & group != "All" ) %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC countries that are concerned about getting covid from the vaccine
get_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Concerned about getting coronavirus from the vaccine" & group != "All" ) %>%
  dplyr::arrange(desc(estimate))

# Top LMIC countries that thinks vaccines are not effective
effective_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Doesn't think vaccines are effective" & group != "USA" & 
           group != "Russia") %>% 
  dplyr::arrange(desc(estimate))

# Estimate of Doesn't think vaccines are effective in Russia and the USA
effective_rus <- 
  no_vacc %>% 
  dplyr::filter(tag == "Doesn't think vaccines are effective" & group == "Russia") %>% 
  dplyr::arrange(desc(estimate))

effective_usa <- 
  no_vacc %>% 
  dplyr::filter(tag == "Doesn't think vaccines are effective" & group == "USA" ) %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC countries that are not concerned of getting seriously ill
ill_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Not concerned about getting seriously ill" & group != "All" ) %>% 
  dplyr::arrange(desc(estimate))

# Top LMIC countries that do not think covid outbreak is as serious as people say
serious_top <- 
  no_vacc %>% 
  dplyr::filter(tag == "Doesn't think Coronavirus outbreak is as serious as people say" & 
                  group != "All") %>% 
  dplyr::arrange(desc(estimate))

2.6.3 Table version

no_vacc2 <- 
  no_vacc %>%
  dplyr::mutate(estimate = format(estimate, nsmall = 0),
                conf_int = paste0("(", format(conf.low, nsmall = 0), 
                                  ", ", format(conf.high, nsmall = 0), ")")) %>%
  dplyr::select(group, estimate, conf_int, outcome, n) %>%
  tidyr::pivot_wider(names_from = outcome, values_from = c(estimate, conf_int, n), names_sep = "__") %>%
  tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__")),
                      names_to = c("type", ".value"),
                      names_pattern = "(.*)__(.*)") %>% 
  dplyr::rowwise() %>% 
  dplyr::mutate(
    n = ifelse(group == "All", NA, unique(na.omit(c_across(starts_with("n__")))))) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(
    group = forcats::fct_relevel(forcats::fct_rev(group), "All", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  dplyr::mutate(across(c(group, n), ~ifelse(type == "conf_int", "", as.character(.))),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(group, n, starts_with("no_vaccine_"), -starts_with("n__no_vaccine"), -type) %>% 
  dplyr::relocate("no_vaccine_666", .after = last_col())


tab_fig2 <- 
  no_vacc2 %>%
  knitr::kable(
    col.names = 
      c("Study", "N", 
        "Concerned about side effects", 
        "Concerned about getting coronavirus from the vaccine", 
        "Not concerned about getting seriously ill", 
        "Doesn't think vaccines are effective", 
        "Doesn't think Coronavirus outbreak is as serious as people say", 
        "Doesn't like needles", 
        "Allergic to vaccines", 
        "Won't have time to get vaccinated", 
        "Mentions a conspiracy theory", 
        "Other reasons"),
    caption = "Reasons not to take the vaccine",
    align = c("l", rep("c", 11)),
    format = "latex", booktabs = T, linesep = "", 
    format.args = list(big.mark = ",", scientific = FALSE), 
    label = "no") %>%
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position"),
                            font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1:12, width = "7em") %>%
  # kableExtra::column_spec(1, width = "7em") %>%
  kableExtra::footnote(
    general_title = "",
    general = "Table 7 shows percentage of respondents mentioning reasons why they would not take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would NOT take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses",
    threeparttable = T)  %>%
  kableExtra::landscape()



no_vacc2 %>%
  knitr::kable(
    col.names = c("Study", "N", 
                  "Concerned about side effects", 
                  "Concerned about getting coronavirus from the vaccine", 
                  "Not concerned about getting seriously ill", 
                  "Doesn't think vaccines are effective", 
                  "Doesn't think Coronavirus outbreak is as serious as people say", 
                  "Doesn't like needles", 
                  "Allergic to vaccines", 
                  "Won't have time to get vaccinated", 
                  "Mentions a conspiracy theory", 
                  "Other reasons"),
    caption = "\\label{no}Reasons not to take the vaccine", 
    booktabs = T, linesep = "", 
    format.args = list(big.mark = ",",scientific = FALSE)) %>%
  kableExtra::kable_styling(full_width = F) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1:12, width = "7em") %>%
  # kableExtra::column_spec(1, width = "7em") %>%
  kableExtra::footnote(
    general_title = "",
    general = "Table 6 shows percentage of respondents mentioning reasons why they would not take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would NOT take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses",
    threeparttable = T)
Reasons not to take the vaccine
Study N Concerned about side effects Concerned about getting coronavirus from the vaccine Not concerned about getting seriously ill Doesn’t think vaccines are effective Doesn’t think Coronavirus outbreak is as serious as people say Doesn’t like needles Allergic to vaccines Won’t have time to get vaccinated Mentions a conspiracy theory Other reasons
Burkina Faso 325 40.9 8.0 7.4 19.5 13.5 3.5 1.5 0.9 17.9 8.7
(35.5, 46.3) ( 5.0, 11.0) ( 4.5, 10.2) (15.1, 23.8) ( 9.8, 17.2) ( 1.5, 5.6) ( 0.2, 2.8) (-0.1, 1.9) (13.7, 22.1) ( 5.6, 11.8)
Colombia 202 31.0 18.1 8.0 10.2 2.3 0.6 0.4 0.5 10.0 31.6
(24.4, 37.6) (12.7, 23.4) ( 3.9, 12.0) ( 5.9, 14.5) ( 0.3, 4.3) (-0.6, 1.8) (-0.4, 1.3) (-0.5, 1.5) ( 5.8, 14.2) (25.1, 38.2)
Mozambique 74 . . 2.7 29.7 . . . . . 21.6
. . (-0.7, 6.1) (18.6, 40.8) . . . . . (12.2, 31.0)
Nepal 48 9.3 7.9 20.4 15.2 15.7 4.4 1.8 . 2.8 12.1
( 0.3, 18.2) (-0.4, 16.3) ( 6.7, 34.1) ( 3.2, 27.2) ( 4.0, 27.3) (-1.9, 10.6) (-1.9, 5.5) . (-1.5, 7.2) ( 0.8, 23.5)
Nigeria 410 21.5 26.1 15.9 9.3 . . 0.2 . 4.9 26.8
(17.5, 25.5) (21.8, 30.4) (12.3, 19.4) ( 6.4, 12.1) . . (-0.2, 0.7) . ( 2.8, 7.0) (22.5, 31.1)
Pakistan 1 441 23.0 21.9 29.4 26.0 22.1 11.5 . . 13.2 19.6
(15.1, 30.8) (14.3, 29.4) (20.9, 37.9) (18.0, 34.0) (12.8, 31.3) ( 5.5, 17.4) . . ( 7.1, 19.4) (10.4, 28.8)
Rwanda 70 38.6 10.1 18.7 21.5 5.8 7.0 5.6 . 21.3 25.8
(26.9, 50.3) ( 2.8, 17.3) ( 9.3, 28.1) (11.6, 31.4) ( 0.1, 11.4) ( 0.9, 13.2) ( 0.1, 11.1) . (11.5, 31.1) (15.3, 36.3)
Sierra Leone 1 234 53.5 37.9 14.6 7.5 4.2 3.0 0.9 4.0 20.3 5.7
(47.1, 59.9) (31.6, 44.2) (10.1, 19.2) ( 4.2, 10.9) ( 1.6, 6.8) ( 0.8, 5.2) (-0.4, 2.2) ( 1.4, 6.5) (15.1, 25.5) ( 2.8, 8.7)
Sierra Leone 2 254 57.9 . . 17.3 . 5.1 . 0.0 3.5 24.8
(50.1, 65.7) . . (11.9, 22.7) . ( 2.5, 7.8) . ( 0.0, 0.0) ( 1.3, 5.7) (19.3, 30.3)
Uganda 1 289 85.1 . 3.8 24.2 1.7 1.7 . 1.0 . 8.0
(80.7, 89.6) . ( 1.7, 5.9) (19.2, 29.2) ( 0.2, 3.2) ( 0.2, 3.2) . (-0.1, 2.2) . ( 4.9, 11.0)
Uganda 2 319 47.3 10.7 5.0 31.0 4.1 1.6 0.3 . 10.3 6.0
(42.2, 52.5) ( 7.1, 14.2) ( 2.7, 7.3) (25.9, 36.2) ( 1.9, 6.2) ( 0.2, 2.9) (-0.3, 0.9) . ( 7.0, 13.7) ( 3.4, 8.5)
All LMICs . 40.8 17.6 12.6 19.2 8.7 4.3 1.5 1.3 11.6 17.3
(25.3, 56.3) ( 8.7, 26.5) ( 6.4, 18.8) (13.8, 24.7) ( 2.4, 14.9) ( 1.7, 6.8) (-0.2, 3.3) (-0.6, 3.2) ( 6.1, 17.0) (11.0, 23.7)
Russia 16238 36.8 13.9 5.4 29.6 6.4 3.7 10.2 1.0 21.4 5.1
(35.2, 38.4) (12.8, 15.1) ( 4.6, 6.1) (28.1, 31.1) ( 5.6, 7.3) ( 3.1, 4.3) ( 9.2, 11.2) ( 0.7, 1.4) (20.1, 22.8) ( 4.4, 5.8)
USA 462 79.3 . 39.3 46.8 . . . . 6.0 49.1
(74.6, 84.0) . (33.5, 45.0) (41.0, 52.6) . . . . ( 3.4, 8.7) (43.3, 54.9)
Table 6 shows percentage of respondents mentioning reasons why they would not take the Covid-19 vaccine. The number of observations and percentage correponds only to people who would NOT take the vaccine. Respondents in all countries could give more than one reason. A 95% confidence interval is shown between parentheses

2.7 Figure 3: Trust vaccines

2.7.1 Figure

#Group together categories

df2 %<>% 
  dplyr::mutate(
    trust_recode_1 = ifelse(trust_vaccine_1 == 1 | trust_vaccine_2 == 1, 1, 0),
    trust_recode_1 = ifelse((country=="Nigeria"| country=="USA") & is.na(trust_recode_1), 0, trust_recode_1),
    
    trust_recode_2 = ifelse(trust_vaccine_8 == 1 | trust_vaccine_9 == 1, 1, 0),
    trust_recode_2 = ifelse((country=="Sierra Leone 2") & is.na(trust_recode_2), 0, trust_recode_2),
    
    trust_recode_3 = ifelse(trust_vaccine_3 == 1 | 
                              trust_vaccine_7 == 1 | 
                              trust_vaccine_4 == 1, 1, 0),
    trust_recode_3 = ifelse((country=="Nigeria"| country=="USA" | country=="Russia") & is.na(trust_recode_3), 0, trust_recode_3),
    
    trust_recode_4 = ifelse(trust_vaccine_666 == 1 | trust_vaccine_other == 1, 1, 0),
    trust_recode_4 = ifelse((country=="Burkina Faso"| country=="Sierra Leone 2" | country=="Russia") & is.na(trust_recode_4), 0, trust_recode_4),
    
    trust_recode_5 = ifelse(trust_vaccine_dk == 1 | 
                              trust_vaccine_refuse == 1 | 
                              trust_vaccine_nr == 1, 1, 0),
    trust_recode_5 = ifelse((country=="Nigeria"| country=="Sierra Leone 2" | country=="USA") & is.na(trust_recode_5), 0, trust_recode_5))

#Recoded groups
trust_names <- c("trust_recode_1", "trust_recode_2", "trust_recode_3", 
           "trust_recode_4", "trust_recode_5", "trust_vaccine_5", "trust_vaccine_6")

studies_levels <- 
  c("Burkina Faso", "Colombia", "India", "Mozambique",
    "Nepal", "Nigeria", "Pakistan 1", "Rwanda",
    "Sierra Leone 1", "Sierra Leone 2", "Uganda 2",
    "All", "Russia", "USA" )

#Get estimates
trust_vacc_together <-
  list(
    All = lapply(trust_names, reasons_together, 
                 df = df2, num = c("Yes", "No", "DK")) %>% 
      dplyr::bind_rows(),
    Yes = lapply(trust_names, reasons_together, 
                 df = df2, num = c("Yes")) %>% 
      dplyr::bind_rows(),
    No = lapply(trust_names, reasons_together, 
                df = df2, num = c("No", "DK")) %>% 
      dplyr::bind_rows()) %>% 
  dplyr::bind_rows(.id = "sub") %>%
  dplyr::filter(!is.nan(statistic)) %>%
  dplyr::mutate(
    across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 1)),
    n_sub = round(n * estimate, 0),
    n_sub = ifelse(n_sub == 0, NA_integer_, n_sub),
    group = factor(group, levels = studies_levels)) %>%
  dplyr::left_join(dictionary, by = "outcome") %>%
  dplyr::mutate(
    size = cut(n_sub, c(0, 50, 500, Inf), include.lowest = TRUE),
    size = forcats::fct_recode(size, "500+" = "(500,Inf]"),
    tag = as.factor(tag),
    tag = forcats::fct_relevel(tag, 
                               "Health workers", 
                               "Government or MoH", 
                               "Family or Friends", 
                               "Famous person, religious leader or traditional healers", 
                               "Newspapers, radio or online groups", 
                               "Other", 
                               "Don't know or Refuse"),
    sub = forcats::fct_relevel(as.factor(sub), "No", "Yes", "All"),
    sub = plyr::mapvalues(sub, from = c("No", "Yes", "All"),
                          to = c("No, Don't know", "Yes", "Any")))

#Plot
fig_hist2 <- 
  trust_vacc_together %>%
  dplyr::mutate(group = plyr::mapvalues(group, "All", "All LMICs")) %>% 
  dplyr::filter(sub=="Any") %>% 
  ggplot(aes(estimate, tag)) + 
  geom_bar(stat = "identity", position = "dodge", fill="#DDCC77") + 
  facet_wrap(~group, ncol = 2, strip.position = "left")  +
  coord_flip() +
  scale_fill_manual(
    name = "Answer", 
    values = safe_colorblind_palette[c(1,3,2)]) + 
  scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 16), 
                   guide = guide_axis(angle = 90)) +
  labs(title = "Which of the following people would you trust MOST to help you decide whether you would get a COVID-19 vaccine?",
       y = "") +
  theme_bw() + 
  theme(legend.position = "bottom",
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        axis.text.y = element_text(hjust = 0))

fig_hist2

trust <- filter(trust_vacc_together, sub=="Any")

# All LMIcs estimate of trust HW

trust_all <- 
  trust %>% 
  dplyr::filter(group == "All" & tag=="Health workers") %>%
  dplyr::arrange(desc(estimate))

# Top and bottom LMICs countries for trust in Health works
trust_ <- 
  trust %>% 
  dplyr::filter(group != "All" & group != "USA" & group != "Russia" & tag == "Health workers") 

trust_top <- trust_ %>% dplyr::arrange(desc(estimate))

trust_rwa <- 
  trust %>% 
  dplyr::filter(group == "Rwanda") %>%
  dplyr::arrange(desc(estimate))

trust_npl <- 
  trust %>% 
  dplyr::filter(group == "Nepal" & tag=="Famous person, religious leader or traditional healers") %>%
  dplyr::arrange(desc(estimate))


# Top and bottom LMICs countries for trust in Family and friends
trust_fam <- 
  trust %>% 
  dplyr::filter(group != "All" & tag == "Family or Friends") %>%
  dplyr::arrange(desc(estimate))

# Top and bottom LMICs countries for trust in Gov
trust_gov <- 
  trust %>% 
  dplyr::filter(group != "All" & group!="Rwanda" & tag == "Government or MoH") %>%
  dplyr::arrange(desc(estimate))

2.8 Figure broken down by acceptance

#Plot
fig_hist_categories <- 
  trust_vacc_together %>%
  dplyr::mutate(group = plyr::mapvalues(group, "All", "All LMICs")) %>% 
  ggplot(aes(estimate, tag, fill = sub)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  facet_wrap(~group, ncol = 2, strip.position = "left")  +
  coord_flip() +
  scale_fill_manual(
    name = "Answer", 
    values = safe_colorblind_palette[c(1,3,2)]) + 
  scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 16), 
                   guide = guide_axis(angle = 90)) +
  labs(title = "Which of the following people would you trust MOST to help you decide whether you would get a COVID-19 vaccine?",
       y = "") +
  theme_bw() + 
  theme(legend.position = "bottom",
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        axis.text.y = element_text(hjust = 0))

fig_hist_categories

2.8.1 By gender

trust_vacc_gender <-  
  list(
    All = lapply(trust_names, reasons_together_subgroup, 
                 df = df2, 
                 num = c("Yes", "No", "DK"), 
                 dem_group = "gender", 
                 dem_subgroup = c("Female", "Male")) %>% dplyr::bind_rows(),
    Male = lapply(trust_names, reasons_together_subgroup, 
                  df = df2, 
                  num = c("Yes", "No", "DK"), 
                  dem_group = "gender", 
                  dem_subgroup = "Male") %>%
      dplyr::bind_rows(),
  Female = lapply(trust_names, reasons_together_subgroup, 
                  df = df2, 
                  num = c("Yes", "No", "DK"), 
                  dem_group = "gender", 
                  dem_subgroup = "Female") %>% 
  dplyr::bind_rows()) %>% 
  dplyr::bind_rows(.id = "sub") %>%
  dplyr::filter(!is.nan(statistic)) %>%
  dplyr::mutate(
    across(c(conf.low, conf.high, estimate), ~ round(. * 100, digits = 1)),
    n_sub = round(n * estimate, 0),
    n_sub = ifelse(n_sub == 0, NA_integer_, n_sub),
    group = factor(group, levels = studies_levels)) %>%
  dplyr::left_join(dictionary, by = "outcome") %>%
  dplyr::mutate(
    size = cut(n_sub, c(0, 50, 500, Inf), include.lowest = TRUE),
    size = forcats::fct_recode(size, "500+" = "(500,Inf]"),
    tag = as.factor(tag),
    tag = forcats::fct_relevel(tag, 
                               "Health workers", 
                               "Government or MoH", 
                               "Family or Friends", 
                               "Famous person, religious leader or traditional healers", 
                               "Newspapers, radio or online groups", 
                               "Other", 
                               "Don't know or Refuse"),
    sub = forcats::fct_relevel(as.factor(sub), "Female", "Male", "All"))

#Plot
hist_gender <- 
  trust_vacc_gender %>%
  dplyr::mutate(group = plyr::mapvalues(group, "All", "All LMICs")) %>% 
  ggplot(aes(estimate, tag, fill = sub)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  facet_wrap(~group, ncol = 2, strip.position = "left")  +
  coord_flip() +
  scale_fill_manual(
    name = "Answer", 
    values = safe_colorblind_palette[c(1,3,2)]) + 
  scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 16), 
                   guide = guide_axis(angle = 90)) +
  labs(title = "Which of the following people would you trust MOST to help you decide whether you would get a COVID-19 vaccine?",
       y = "") +
  theme_bw() + 
  theme(legend.position = "bottom",
        plot.title.position = "plot", #NEW parameter. Apply for subtitle too.
        axis.text.y = element_text(hjust = 0))

hist_gender

2.8.2 Gender difference means

differences_means_gen <- 
  lapply(trust_names, function(i) {
    df2 %>% 
      dplyr::filter(group != "All" & group != "USA" & group != "Russia" & group != "Uganda 1") %>% 
      estimatr::lm_robust(as.formula(paste(i, "~gender")),
                          fixed_effects = ~country,
                          weight = weight,
                          cluster = country,
                          se_type = "stata",
                          data = .) %>% 
      tidy %>% 
      dplyr::select(estimate, std.error, p.value, df, term)%>%
      dplyr::mutate(outcome = paste(i))
  }) %>% 
  dplyr::bind_rows(.) %>%
  dplyr::left_join(dictionary) %>%
  dplyr::select(-outcome)

differences_means_gen %>% 
  dplyr::mutate(adjusted_p = p.adjust(p.value, method = "BH")) %>%
  knitr::kable(digits = 3, caption = "Differences in means trust actors (BH adjustment)")
Differences in means trust actors (BH adjustment)
estimate std.error p.value df term tag adjusted_p
-0.032 0.011 0.026 7 genderMale Family or Friends 0.115
0.003 0.004 0.444 6 genderMale Newspapers, radio or online groups 0.673
-0.002 0.003 0.481 7 genderMale Famous person, religious leader or traditional healers 0.673
0.011 0.009 0.283 6 genderMale Other 0.660
0.001 0.005 0.895 7 genderMale Don’t know or Refuse 0.963
0.001 0.017 0.963 7 genderMale Health workers 0.963
0.023 0.008 0.033 6 genderMale Government or MoH 0.115

2.8.3 Table version

trust_vacc <- 
  plyr::ldply(
    .data = list("Yes", "No", "All"), 
    .fun = function(take_vac) {
      list(Yes = "Yes", 
           No = c("No", "DK"), 
           All = c("Yes", "No", "DK")) %>% 
        .[[take_vac]] %>% 
        plyr::ldply(trust_names, reasons_together, df = df2, num = .) %>%
        dplyr::mutate(
          across(c(conf.low, conf.high, estimate), 
                 ~ format(round(. * 100, digits = 1), nsmall = 1)),        
          conf_int = paste0("(", conf.low, ", ", conf.high, ")")) %>%
        dplyr::select(group, estimate, conf_int, outcome, n) %>%
        tidyr::pivot_wider(names_from = outcome, values_from = c(estimate, conf_int, n), 
                           names_sep = "__") %>%
        tidyr::pivot_longer(cols = c(starts_with("estimate__"), starts_with("conf_int__")),
                            names_to = c("type", ".value"),
                            names_pattern = "(.*)__(.*)") %>%
        dplyr::rowwise() %>% 
        dplyr::mutate(
          n = ifelse(group == "All", NA, unique(na.omit(c_across(starts_with("n__")))))) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate("Take vaccine?" = take_vac) %>%
        dplyr::select(group, n, type, "Take vaccine?", starts_with("trust_")) %>%
        dplyr::filter(!(group %in% c("Mozambique", "Pakistan 1", "Pakistan 2", "Uganda 1", "India")))
    }
  ) %>% 
  dplyr::mutate(
    group = as.factor(group),
    group = forcats::fct_relevel(group, "All", "Russia", "USA", after = Inf)) %>% 
  dplyr::arrange(group) %>% 
  dplyr::mutate(across(c(group, n, `Take vaccine?`), ~ifelse(type == "conf_int", "", as.character(.))),
                group = ifelse(group == "All", "All LMICs", group)) %>% 
  dplyr::select(-type)

tab_trust <- 
  trust_vacc %>%
  dplyr::select("group", "n", 
                "Take vaccine?", "trust_vaccine_5", 
                "trust_vaccine_6", "trust_recode_1", 
                "trust_recode_3", "trust_recode_2", 
                "trust_recode_4", "trust_recode_5") %>%
  knitr::kable(
    col.names = c("Study", "N", "Take vaccine?", "Health workers", 
                  "Government or Ministry of Health", 
                  "Family or friends", 
                  "Famous person, religious leader or traditional healers", 
                  "Newspapers, radio or online groups", "Other", 
                  "Don't know or Refuse"),
    caption = "COVID-19 Vaccination Decision-making: most trusted source",
    align = c("l", rep("c", 9)),
    format = "latex", booktabs = T, linesep = "", longtable = TRUE,
    format.args = list(big.mark = ",", scientific = FALSE), 
    label = "trust") %>% 
  kableExtra::kable_styling(latex_options = c("scale_down", "hold_position", "repeat_header"),
                            font_size = base_font_size - 2, full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "7em") %>% 
  kableExtra::column_spec(2:10, width = "4em") %>% 
  kableExtra::column_spec(4:10, width = "6em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 8 shows percentage of respondents that mention actors who they would trust the most to help them decide whether to get a COVID-19 vaccine. For all countries the questions was asked regardless if respondent would take a vaccine, would not take it, does not know or does not respond. For India respondents were able to mention more than one actor, for the rest of countries only one actor was allowed. While rows should sum to 100%, rounding makes number slightly above or below. A 95% confidence interval is shown between parentheses.",
    threeparttable = T) %>%
  kableExtra::landscape()



trust_vacc %>%
  dplyr::select("group", "n", 
                "Take vaccine?", "trust_vaccine_5", 
                "trust_vaccine_6", "trust_recode_1", 
                "trust_recode_3", "trust_recode_2", 
                "trust_recode_4", "trust_recode_5") %>%
  knitr::kable(
    col.names = c("Study", "N", "Take vaccine?", "Health workers", 
                  "Government or \n Ministry of Health", 
                  "Family or friends", 
                  "Famous person, \n religious leader or \n traditional healers", 
                  "Newspapers, radio \n or online groups", "Other", 
                  "Don't know or Refuse"),
    caption = "COVID-19 Vaccination Decision-making: most trusted source", 
    format.args = list(big.mark = ",", scientific = FALSE)) %>% 
  kableExtra::kable_styling(full_width = FALSE) %>%
  kableExtra::row_spec(0, bold = TRUE) %>% 
  kableExtra::column_spec(1, width = "7em") %>% 
  kableExtra::column_spec(2:10, width = "4em") %>% 
  kableExtra::column_spec(4:10, width = "6em") %>% 
  kableExtra::footnote(
    general_title = "",
    general = "Table 7 shows percentage of respondents that mention actors who they would trust the most to help them decide whether to get a COVID-19 vaccine. For all countries the questions was asked regardless if respondent would take a vaccine, would not take it, does not know or does not respond. For India respondents were able to mention more than one actor, for the rest of countries only one actor was allowed. While rows should sum to 100%, rounding makes number slightly above or below. A 95% confidence interval is shown between parentheses.",
    threeparttable = T)
COVID-19 Vaccination Decision-making: most trusted source
Study N Take vaccine? Health workers Government or Ministry of Health Family or friends Famous person, religious leader or traditional healers Newspapers, radio or online groups Other Don’t know or Refuse
Burkina Faso 651 Yes 57.1 15.1 19.6 0.9 2.0 4.8 0.4
(53.3, 60.9) (12.4, 17.9) (16.5, 22.7) ( 0.2, 1.6) ( 0.9, 3.1) ( 3.2, 6.4) (-0.1, 0.9)
Burkina Faso 325 No 40.7 8.5 16.2 3.7 1.6 25.1 4.2
(35.3, 46.1) ( 5.5, 11.6) (12.1, 20.2) ( 1.6, 5.7) ( 0.2, 3.0) (20.3, 29.8) ( 2.0, 6.4)
Burkina Faso 976 All 51.6 12.9 18.4 1.8 1.9 11.6 1.7
(48.5, 54.8) (10.8, 15.0) (16.0, 20.9) ( 1.0, 2.7) ( 1.0, 2.7) ( 9.6, 13.6) ( 0.9, 2.5)
Colombia 756 Yes 41.4 12.7 36.9 0.9 1.7 . 6.3
(37.8, 45.0) (10.3, 15.2) (33.4, 40.4) ( 0.2, 1.5) ( 0.8, 2.7) . ( 4.6, 8.1)
Colombia 202 No 31.5 7.6 35.5 5.3 1.4 . 18.8
(24.9, 38.1) ( 3.8, 11.3) (28.8, 42.1) ( 2.2, 8.4) (-0.2, 3.0) . (13.2, 24.3)
Colombia 958 All 39.3 11.6 36.6 1.8 1.7 . 8.9
(36.2, 42.5) ( 9.6, 13.7) (33.5, 39.7) ( 1.0, 2.6) ( 0.9, 2.5) . ( 7.1, 10.7)
Nepal 1341 Yes 44.7 0.7 36.2 16.1 0.4 0.5 1.3
(40.9, 48.6) ( 0.3, 1.1) (33.5, 39.0) (13.1, 19.1) ( 0.0, 0.9) ( 0.1, 0.8) ( 0.7, 2.0)
Nepal 48 No 30.2 2.1 18.7 16.8 0.0 1.0 31.2
(14.6, 45.9) (-2.1, 6.2) ( 5.6, 31.7) ( 4.0, 29.6) ( 0.0, 0.0) (-1.1, 3.2) (13.6, 48.9)
Nepal 1389 All 44.2 0.8 35.6 16.1 0.4 0.5 2.4
(40.5, 47.9) ( 0.3, 1.2) (32.9, 38.3) (13.3, 18.9) ( 0.0, 0.8) ( 0.1, 0.8) ( 1.5, 3.3)
Nigeria 1424 Yes 63.8 21.6 6.3 5.1 . 2.6 0.6
(61.3, 66.3) (19.4, 23.7) ( 5.0, 7.5) ( 4.0, 6.3) . ( 1.8, 3.4) ( 0.2, 1.0)
Nigeria 410 No 37.6 5.6 13.9 17.8 . 8.5 16.6
(32.9, 42.3) ( 3.4, 7.8) (10.5, 17.3) (14.1, 21.5) . ( 5.8, 11.3) (13.0, 20.2)
Nigeria 1834 All 58.0 18.0 8.0 8.0 . 3.9 4.2
(55.7, 60.2) (16.2, 19.8) ( 6.7, 9.2) ( 6.7, 9.2) . ( 3.0, 4.8) ( 3.3, 5.1)
Rwanda 1152 Yes 23.8 27.4 15.1 1.0 0.7 32.0 0.1
(21.3, 26.2) (24.9, 30.0) (13.0, 17.2) ( 0.4, 1.5) ( 0.2, 1.2) (29.3, 34.7) (-0.1, 0.2)
Rwanda 70 No 10.1 15.6 12.8 2.9 0.0 53.2 5.5
( 2.8, 17.4) ( 6.9, 24.3) ( 4.8, 20.8) (-1.1, 6.9) ( 0.0, 0.0) (41.2, 65.1) ( 0.1, 11.0)
Rwanda 1222 All 23.0 26.7 15.0 1.1 0.6 33.2 0.4
(20.6, 25.3) (24.3, 29.2) (13.0, 17.0) ( 0.5, 1.7) ( 0.2, 1.1) (30.5, 35.8) ( 0.0, 0.8)
Sierra Leone 1 836 Yes 47.6 36.9 7.3 3.8 0.5 3.1 0.8
(44.2, 51.0) (33.6, 40.2) ( 5.5, 9.1) ( 2.5, 5.1) ( 0.0, 1.0) ( 1.9, 4.2) ( 0.2, 1.4)
Sierra Leone 1 234 No 31.1 17.1 12.1 7.7 0.5 29.4 2.2
(25.1, 37.1) (12.2, 21.9) ( 7.9, 16.3) ( 4.3, 11.2) (-0.4, 1.3) (23.5, 35.3) ( 0.3, 4.1)
Sierra Leone 1 1070 All 44.0 32.5 8.4 4.7 0.5 8.9 1.1
(41.0, 46.9) (29.7, 35.4) ( 6.7, 10.0) ( 3.4, 6.0) ( 0.1, 0.9) ( 7.1, 10.6) ( 0.5, 1.8)
Sierra Leone 2 1855 Yes 94.1 . 3.0 0.9 0.1 1.9 0.0
(92.5, 95.7) . ( 2.0, 4.0) ( 0.3, 1.5) (-0.1, 0.2) ( 1.2, 2.7) ( 0.0, 0.0)
Sierra Leone 2 254 No 54.7 . 3.9 7.5 0.0 33.9 0.0
(46.5, 62.9) . ( 1.4, 6.5) ( 2.9, 12.0) ( 0.0, 0.0) (26.3, 41.4) ( 0.0, 0.0)
Sierra Leone 2 2109 All 89.3 . 3.1 1.7 0.0 5.8 0.0
(87.2, 91.5) . ( 2.2, 4.1) ( 0.8, 2.6) ( 0.0, 0.1) ( 4.4, 7.2) ( 0.0, 0.0)
Uganda 2 1045 Yes 38.3 36.5 9.8 7.0 5.0 3.5 0.0
(35.5, 41.1) (33.5, 39.4) ( 7.9, 11.6) ( 5.4, 8.6) ( 3.6, 6.3) ( 2.5, 4.6) ( 0.0, 0.0)
Uganda 2 319 No 24.5 19.1 8.5 7.8 7.5 32.0 0.6
(19.9, 29.0) (14.5, 23.7) ( 5.4, 11.5) ( 4.8, 10.9) ( 4.5, 10.5) (26.7, 37.3) (-0.2, 1.5)
Uganda 2 1364 All 35.0 32.4 9.5 7.2 5.6 10.2 0.1
(32.7, 37.4) (29.9, 35.0) ( 7.9, 11.1) ( 5.8, 8.6) ( 4.3, 6.8) ( 8.6, 11.8) (-0.1, 0.3)
All LMICs . Yes 51.3 21.6 16.8 4.5 1.5 6.9 1.2
(33.7, 68.9) ( 9.4, 33.8) ( 5.7, 27.9) ( 0.1, 8.8) (-0.1, 3.1) (-3.4, 17.2) (-0.6, 3.0)
All LMICs . No 32.5 10.8 15.2 8.7 1.6 26.1 9.9
(21.8, 43.3) ( 4.8, 16.8) ( 7.4, 23.0) ( 4.0, 13.4) (-0.9, 4.1) (10.2, 42.1) ( 0.6, 19.2)
All LMICs . All 48.1 19.3 16.8 5.3 1.5 10.6 2.4
(31.6, 64.5) ( 8.3, 30.3) ( 6.1, 27.5) ( 1.0, 9.6) (-0.2, 3.3) ( 0.7, 20.5) (-0.1, 4.9)
Russia 5887 Yes 47.1 24.4 16.5 2.0 4.1 5.8 .
(44.6, 49.7) (22.2, 26.7) (14.6, 18.5) ( 1.2, 2.8) ( 3.1, 5.1) ( 4.5, 7.0) .
Russia 16238 No 31.1 6.9 33.1 2.2 5.3 21.3 .
(29.6, 32.7) ( 6.1, 7.8) (31.5, 34.7) ( 1.7, 2.8) ( 4.5, 6.0) (20.0, 22.7) .
Russia 22125 All 36.0 12.3 28.1 2.2 4.9 16.6 .
(34.7, 37.3) (11.3, 13.2) (26.8, 29.3) ( 1.7, 2.6) ( 4.3, 5.5) (15.6, 17.6) .
USA 1313 Yes 38.1 33.0 8.7 1.7 . 18.6 0.0
(34.8, 41.5) (29.8, 36.1) ( 6.7, 10.7) ( 0.7, 2.6) . (16.1, 21.1) ( 0.0, 0.0)
USA 462 No 25.3 21.3 18.7 4.2 . 30.3 0.2
(20.4, 30.3) (16.6, 26.0) (13.9, 23.4) ( 1.6, 6.9) . (25.0, 35.6) (-0.2, 0.7)
USA 1775 All 34.5 29.7 11.5 2.4 . 21.9 0.1
(31.7, 37.3) (27.0, 32.3) ( 9.5, 13.4) ( 1.4, 3.4) . (19.5, 24.2) (-0.1, 0.2)
Table 7 shows percentage of respondents that mention actors who they would trust the most to help them decide whether to get a COVID-19 vaccine. For all countries the questions was asked regardless if respondent would take a vaccine, would not take it, does not know or does not respond. For India respondents were able to mention more than one actor, for the rest of countries only one actor was allowed. While rows should sum to 100%, rounding makes number slightly above or below. A 95% confidence interval is shown between parentheses.

2.9 Summary stats

# Summary statistics table
# transform the categorical variables into dummy variables
df2 %<>% 
  fastDummies::dummy_cols(select_columns = c("age_groups","educ_binary","gender"))

get_stat <- function(.var, .data, ...) {
  return(
    paste0("`", .var, "` ~ 1") %>% 
      as.formula() %>% 
      lm_robust(formula = ., data = .data, ...) %>% 
      coef()
  )
}

data_sumstat <- 
  df2 %>% 
  dplyr::nest_by(group) %>% 
  dplyr::mutate(
    Female = get_stat("gender_Female", .data = data),
    age_18_30 = get_stat("age_groups_[18,30)", data),
    age_30_45 = get_stat("age_groups_[30,45)", data),
    age_45_60 = get_stat("age_groups_[45,60)", data),
    age_60    = get_stat("age_groups_[60, Inf)", data),
    Less_than_secondary = get_stat("educ_binary_Up to Secondary", data),
    More_than_secondary = get_stat("educ_binary_> Secondary", data)
    ) %>% 
  dplyr::select(-data, country = group) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(across(where(is.double), ~ . * 100)) 


#WGM data
wgmdata <- 
  readr::read_csv("2_input_data/wgm_2018_publiccsv.csv") %>% 
  dplyr::filter(WP5 %in% c(1,9,31,35,41,63,65,76,78,80,105,157),
                Age >= 18) %>% 
  dplyr::mutate(
    country = 
      plyr::mapvalues(WP5, 
                      from = c(1, 9, 31, 35, 41, 63, 65, 76, 78, 80, 105, 157),
                      to = c("USA","Pakistan","India","Nigeria","Uganda",
                             "Mozambique","Rwanda","Russia", 
                             "Burkina Faso", "Sierra Leone","Colombia","Nepal")),
    age_groups = cut(x = Age, breaks = c(-Inf, 18, 30, 45, 60, +Inf), right = F)) %>% 
  dplyr::select(country, wgt, gender = Gender, age = Age, educ = Education,
                age_groups) %>%
  fastDummies::dummy_cols(select_columns = "age_groups") %>% 
  dplyr::mutate(
    gender_Female = if_else(gender == 2, 1, 0), 
    `educ_binary_Up to Secondary` = if_else(educ == 1 | educ == 2, 1, 0),
    `educ_binary_> Secondary` = if_else(educ == 3, 1, 0))


wgmdata_sumstat <- 
  wgmdata %>% 
  dplyr::nest_by(country) %>% 
  dplyr::mutate(
    Female = get_stat("gender_Female", data, weight = wgt),
    age_18_30 = get_stat("age_groups_[18,30)", data, weight = wgt),
    age_30_45 = get_stat("age_groups_[30,45)", data, weight = wgt),
    age_45_60 = get_stat("age_groups_[45,60)", data, weight = wgt),
    age_60    = get_stat("age_groups_[60, Inf)", data, weight = wgt),
    Less_than_secondary = get_stat("educ_binary_Up to Secondary", data, weight = wgt),
    More_than_secondary = get_stat("educ_binary_> Secondary", data, weight = wgt)
    ) %>% 
  dplyr::select(-data) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(across(where(is.double), ~ . * 100)) 

sum_stat_col <- 
  dplyr::left_join(data_sumstat, 
                   wgmdata_sumstat, 
                   by = "country", all = TRUE, suffix = c("", "_wgm")) %>% 
  dplyr::select(
    country,
    starts_with("Female"),
    starts_with("age_18_30"),
    starts_with("age_30_45"),
    starts_with("age_45_60"),
    starts_with("age_60"),
    starts_with("Less_than_secondary"),
    starts_with("More_than_secondary")
  )


tab_sum_col <-
  knitr::kable(
    sum_stat_col, 
    caption = "Summary statistics for gender, age, education",
    col.names = c("Study", rep(c("COVID-19 Study", "Population"), 7)),
    digits = 1,
    format = "latex", 
    align = c("l", rep("c", 14)), 
    booktabs = T, linesep = "") %>%
  kableExtra::kable_styling(full_width = FALSE) %>% 
  kableExtra::add_header_above(
    c(" " = 1, 
      "% Women" = 2, 
      "% Age in [18,30)" = 2, 
      "% Age in [30,45)" = 2, 
      "% Age in [45,60)" = 2,
      "% Age 60+" = 2,
      "% Up to Secondary" = 2, 
      "% More than Secondary" = 2)) %>%
  kableExtra::column_spec(2:16, width = "3em") %>% 
  kableExtra::column_spec(1, width = "5em") %>% 
  kableExtra::footnote(
    general = "This table presents summarys statistics for our data and compares it with estimates from other sources of data. For each category, the left column and the right column correspond respectively to the statistics computed from our sample and from previous surveys. Data for Russia comes from census data from the Statistical Agency. For the USA, we use data from the 2019 American Community Survey. For all other countries, the Wellcome Global Monitor 2018 was used. Statistics for our surveys are not weighted, while estimates from benchmark sources are obtained using sampling weights.",
    threeparttable = T) %>% 
  kableExtra::landscape() 

wgmdata_sumstat_label <-
  wgmdata_sumstat %>% 
  dplyr::mutate(country = paste0(country, " (WGM)"))

sum_stat_row <- 
  dplyr::bind_rows(data_sumstat, wgmdata_sumstat_label) %>%
  arrange(country) %>% 
  .[c(1:13,15,16,14,19,20,22,23,21,25,26,24,17,18,27,28),]

tab_sum_row <- 
  knitr::kable(
    sum_stat_row,caption = "Summary statistics for gender, age, education",
    col.names = c("Study", "% Women",
                  "% Age in [18,30)", "% Age in [30,45)","% Age in [45,60)","% Age 60+",
                  "% Up to Secondary", "% More than Secondary"),
    format = "latex", booktabs = T, linesep = "", align = c("l", rep("c", 7)), digits = 1) %>%
  kableExtra::kable_styling(full_width = FALSE) %>% 
  kableExtra::footnote(
    general = "This table presents summarys statistics for our data and compares it with estimates from other sources of data. Data for Russia comes from census data from the Statistical Agency. For the USA, we use data from the 2019 American Community Survey. For all other countries, the Wellcome Global Monitor 2018 was used. Statistics for our surveys are not weighted, while estimates from benchmark sources are obtained using sampling weights.",
    threeparttable = T) %>%
  kableExtra::column_spec(1:8, width = "5em") 



knitr::kable(
  sum_stat_row,
  caption = "Summary statistics for gender, age, education",
  col.names = c("Study", "% Women", 
                "% Age in [18,30)", "% Age in [30,45)", "% Age in [45,60)", "% Age 60+",
                "% Up to Secondary", "% More than Secondary"),  
  booktabs = T, linesep = "", align = c("l", rep("c", 7)), digits = 1) %>%
  kableExtra::kable_styling(full_width = FALSE) %>% 
  kableExtra::footnote(
    general = "This table presents summarys statistics for our data and compares it with estimates from other sources of data. Data for Russia comes from census data from the Statistical Agency. For the USA, we use data from the 2019 American Community Survey. For all other countries, the Wellcome Global Monitor 2018 was used. Statistics for our surveys are not weighted, while estimates from benchmark sources are obtained using sampling weights.") %>%
  kableExtra::column_spec(1:8, width = "5em") 
Summary statistics for gender, age, education
Study % Women % Age in [18,30) % Age in [30,45) % Age in [45,60) % Age 60+ % Up to Secondary % More than Secondary
All 48.9 34.4 43.5 16.9 5.1 77.5 22.5
Burkina Faso 27.9 37.5 50.0 9.2 3.3 61.1 38.9
Burkina Faso (WGM) 48.5 40.3 35.0 14.8 9.9 99.5 0.5
Colombia 63.6 32.2 34.9 25.4 7.5 65.9 34.1
Colombia (WGM) 51.9 29.4 29.5 23.5 17.6 93.7 6.3
India 19.8 19.9 45.7 29.3 5.1 82.4 17.6
India (WGM) 49.3 33.2 33.2 21.1 12.5 95.3 4.7
Mozambique 42.9 7.8 42.9 34.3 15.0 81.8 18.2
Mozambique (WGM) 51.8 41.7 34.1 13.7 10.5 100.0 0.0
Nepal 42.7 28.9 39.7 21.9 7.9 . .
Nepal (WGM) 51.5 36.8 29.5 18.6 15.1 95.0 5.0
Nigeria 37.1 33.7 51.3 12.0 3.0 . .
Nigeria (WGM) 48.3 43.3 32.3 13.5 11.0 98.4 1.6
Pakistan 1 49.8 14.6 68.4 16.4 0.6 77.6 22.4
Pakistan 2 . . . . . 67.9 32.1
Pakistan (WGM) 48.4 41.2 31.5 17.7 9.6 93.4 6.6
Rwanda 36.1 55.6 34.7 8.0 1.7 82.7 17.3
Rwanda (WGM) 53.7 43.2 31.6 16.2 9.0 100.0 0.0
Sierra Leone 1 33.6 44.3 42.2 10.9 2.5 61.0 39.0
Sierra Leone 2 25.7 13.8 39.7 33.6 12.8 87.3 12.7
Sierra Leone (WGM) 51.9 43.9 31.5 14.8 9.8 97.1 2.9
Uganda 1 100.0 61.8 38.2 0.0 0.0 90.3 9.7
Uganda 2 49.0 38.1 36.7 16.8 8.3 70.9 29.1
Uganda (WGM) 52.4 47.2 32.0 13.3 7.5 99.0 1.0
Russia 64.5 22.6 56.5 19.7 1.1 7.0 93.0
Russia (WGM) 56.0 20.4 27.9 27.5 24.2 73.2 26.8
USA 51.4 23.2 28.4 21.3 27.2 21.1 78.9
USA (WGM) 49.8 19.9 23.3 25.8 31.0 65.6 34.4
Note:
This table presents summarys statistics for our data and compares it with estimates from other sources of data. Data for Russia comes from census data from the Statistical Agency. For the USA, we use data from the 2019 American Community Survey. For all other countries, the Wellcome Global Monitor 2018 was used. Statistics for our surveys are not weighted, while estimates from benchmark sources are obtained using sampling weights.