Descriptives

This block is not run for the design diagnosis. We harvest the descriptives to get the population structure correct from these files.

##This is an exact copy of the randomization code w/o external write (Bwindi_Recognition.R)
sample <- read.csv("~/Google Drive/Bwindi project/Recognition phase/Village_Sample_MASTER.csv", stringsAsFactors=FALSE)
table(sample$district, sample$subcounty) #Note: all suncounty names are unique to districts

dta.tab <- data.frame(table(sample$district, sample$subcounty))
singles <- names(table(sample$subcounty))[table(sample$subcounty)==1]

sample$block <- ifelse(sample$subcounty %in% singles & sample$district=="Kanungu", "singles1",
                       ifelse(sample$subcounty %in% singles & sample$district!="Kanungu", "singles2", tolower(sample$subcounty)))

set.seed(202) #from random.org 1:10000
sample$treat <- block_ra(blocks=sample$block, conditions=c("eligible","ineligible"))

##Descriptives
sort(table(sample$block))

Declaring a design, number of checklist items completed (of 10)

################################################
##M: Model

#Village-level data-generating process
population <- declare_population(
  block = add_level(N=9, u=runif(9, min=-2, max=2)), #block-level variance
  committee = add_level(N=c(6,4,4,6,12,13,16,16,16),
                        v=runif(length(committee), min=0, max=10)),
  project = add_level(N=2,
                      w=runif(length(project), min=-1, max=1),
                      lambda = ifelse(u+v+w<0, 0, u+v+w))) #Village level lambdas for count draw

pop <- population()

#Draw potential outcomes
te = 2.5 #set treatment effect

potential_outcomes <- declare_potential_outcomes(
    Y_D_0 = draw_count(mean=lambda),
    Y_D_1 = draw_count(mean=lambda+te),
    Y_D_0 = ifelse(Y_D_0>10,10,Y_D_0),
    Y_D_1 = ifelse(Y_D_1>10,10,Y_D_1)
)

po.pop <- potential_outcomes(pop)

################################################
##I: Inquiry

my_estimand <- declare_estimand(ate=mean(Y_D_1-Y_D_0))

################################################
##D: Data Strategy

#Committee-level randomization
assignment <- declare_assignment(assignment_variable="D", blocks=block, clusters=committee)
assigned.po.pop <- assignment(po.pop)

#Committee-level attrition
attrition.rate <- 0.1
report <- declare_assignment(assignment_variable="R", prob = 1-attrition.rate)
full.assigned.po.pop <- report(assigned.po.pop)

reveal <- declare_reveal(Y, assignment_variables=D)
dta.reveal <- reveal(full.assigned.po.pop)


################################################
##A: Answer Strategies

A1 <- declare_estimator(Y ~ D, estimand = "ate",  
                               model =  difference_in_means, 
                               label = "DIM",
                               subset = (R == 1)) #Note: Blocked DIM does not work because of attrition

A2 <- declare_estimator(Y ~ D, estimand = "ate",  
                               model =  lm_robust,
                               clusters = committee,
                               label = "lm_robust",
                               subset = (R == 1))


##Design diagnosis
design <- population + potential_outcomes + my_estimand + assignment + report + reveal + A1 + A2
diagnosis <- diagnose_design(design, sims=1000)
kable(reshape_diagnosis(diagnosis))
Design Label Estimand Label Estimator Label Term N Sims Bias RMSE Power Coverage Mean Estimate SD Estimate Mean Se Type S Rate Mean Estimand
design ate DIM D 1000 0.00 0.54 0.95 0.91 1.96 0.58 0.48 0.00 1.96
(0.02) (0.01) (0.01) (0.01) (0.02) (0.02) (0.00) (0.00) (0.01)
design ate lm_robust D 1000 0.00 0.54 0.91 0.97 1.96 0.58 0.60 0.00 1.96
(0.02) (0.01) (0.01) (0.01) (0.02) (0.02) (0.00) (0.00) (0.01)
##Designs by treatment effects
designs <- redesign(design, te = seq(1, 3, 0.1))
diagnoses <- diagnose_design(designs)

#power
diagnoses$diagnosands_df %>%
  ggplot(aes(te, power, group=estimator_label)) +
  geom_line(aes(color = estimator_label))

#coverage
diagnoses$diagnosands_df %>%
  ggplot(aes(te, coverage, group=estimator_label)) +
  geom_line(aes(color = estimator_label))

Declaring a design, individual-level Likert-scale survey item with cross-sectional baseline

In this section, we first compare two designs:

  1. A design that includes a cross-sectional baseline and endline survey within limited individual-level panel data;
  2. A design that includes only an endline survey with three times the sample size;

These designs are both feasible given our budget constraint.

After concluding that the baseline and endline cross-sectional design is preferable from the perspective of power, we go on to evaluate minimum detectable effects.

################################################
##M: Model

#Village-level data-generating process
population <- declare_population(
  block = add_level(N=9, 
                    u=rnorm(N, mean=0, sd=1)), 
  committee = add_level(N=c(6,4,4,6,12,13,16,16,16),
                        v=rnorm(N, mean=0, sd=3)),
  individual = add_level(N=200,
                         w=correlate(given=v, rho=0.7,
                                     rnorm, mean=0, sd=2),
                         e=rnorm(N, mean=1, sd=1),
                         pre_base=u+v+w,
                         post_base=u+v+w+e)
  )

pop <- population()

#Draw potential outcomes
te.raw = 0.9 #set treatment effect

#note: we used trial and error to produce Likert-scale data similar to previous outcomes in this setting

potential_outcomes <- declare_potential_outcomes(
    Y_pre = as.numeric(draw_ordered(x=pre_base,
                        break_labels = c(1, 2, 3, 4, 5),
                        breaks = c(-Inf, -6, -4, -1, 2, Inf))),
    Y_D_0 = as.numeric(draw_ordered(x=post_base,
                        break_labels = c(1, 2, 3, 4, 5),
                        breaks = c(-Inf, -6, -4, -1, 2, Inf))),
    Y_D_1 = as.numeric(draw_ordered(x=post_base+te.raw,
                        break_labels = c(1, 2, 3, 4, 5),
                        breaks = c(-Inf, -6, -4, -1, 2, Inf)))
)

po.pop <- potential_outcomes(pop)

table(po.pop$Y_pre) 
## 
##    1    2    3    4    5 
## 1598 1599 3909 4655 6839
################################################
##I: Inquiry

my_estimand <- declare_estimand(ate=mean(Y_D_1-Y_D_0))
#my_estimand(po.pop)

################################################
##D: Data Strategy

#Committee-level randomization
assignment <- declare_assignment(assignment_variable="D", 
                                 blocks=block, clusters=committee)
assigned.po.pop <- assignment(po.pop)

#Indicators for baseline / endline survey sampling
baseline.rate <- 0.1
baseline <- declare_assignment(assignment_variable="B", prob = baseline.rate)
full.assigned.po.pop <- baseline(assigned.po.pop)

endline.rate <- 0.1
endline <- declare_assignment(assignment_variable="E", prob = endline.rate)
#full.assigned.po.pop <- endline(full.assigned.po.pop)

endline.rateX3 <- endline.rate*3
endlineX3 <- declare_assignment(assignment_variable="E", prob = endline.rateX3)
#full.assigned.po.pop <- endline(full.assigned.po.pop)

reveal <- declare_reveal(Y, assignment_variables=D)
dta.reveal <- reveal(full.assigned.po.pop)

rotating <- declare_potential_outcomes(
    Y_pre_reveal = ifelse(B==1,
                           Y_pre,
                           NA),
    Y_pre_reveal_mean = unsplit(lapply(split(Y_pre_reveal, committee), mean, na.rm=T), committee),
    Y_pre_used = ifelse(is.na(Y_pre_reveal),Y_pre_reveal_mean,Y_pre_reveal)
)
see <- rotating(dta.reveal)

################################################
##A: Answer Strategies

A1 <- declare_estimator(Y ~ D, estimand = "ate",  
                               model =  difference_in_means, 
                               label = "DIM",
                               blocks = block,
                               clusters = committee,
                               subset = (E == 1))

A2 <- declare_estimator(Y ~ D + block, 
                        estimand = "ate",  
                        model =  lm_robust, 
                        label = "lm_robust",
                        clusters = committee,
                        subset = (E==1))

A2.cov <- declare_estimator(Y ~ D + block + Y_pre_used, 
                        estimand = "ate",  
                        model =  lm_robust, 
                        label = "lm_robust_cov",
                        clusters = committee,
                        subset = (E == 1))

##Design diagnosis

design_baseline <- population + potential_outcomes + my_estimand + assignment + baseline + endline + reveal + rotating + A1 + A2 + A2.cov
design_X3endline <- population + potential_outcomes + my_estimand + assignment + baseline + endlineX3 + reveal + rotating + A1 + A2

diagnosis_baseline <- diagnose_design(design_baseline, sims=1000)
kable(reshape_diagnosis(diagnosis_baseline))