Machine learning as proxy
 Heterogeneity in CATE for Birthweight
## Function for Generic machine learning of Chernozhukov, Demirer, Duflo, & Fernandez-Val (2018)
genericML <- function(x,y,treat, fit.function, predict.function,
                      n.split=10, n.group=5, clusterid=NULL) {
  if (!is.null(clusterid)) require(sandwich) 
  blp <- matrix(NA, nrow=n.split, ncol=2)
  blp.se <- blp
  gate <- matrix(NA, nrow=n.split, ncol=n.group)
  gate.se <- gate 
  baseline <- matrix(NA, nrow=nrow(x), ncol=n.split)
  cate <- matrix(NA, nrow=nrow(x), ncol=n.split)
  Lambda <- matrix(NA, nrow=n.split, ncol=2)
  for(i in 1:n.split) {
    main <- runif(nrow(x))>0.5
    fit1 <- fit.function(x[!main & treat==1,], y[!main & treat==1])
    fit0 <- fit.function(x[!main & treat==0,], y[!main & treat==0])
    B <- as.vector(predict.function(fit0,x))
    S <- as.vector(predict.function(fit1,x)) - B
    baseline[,i] <- B
    cate[,i] <- S
    ES <- mean(S)
    ## BLP
    # assume P(treat|x) = P(treat) = mean(treat)
    p <- mean(treat)
    df <- data.frame(y, B, treat, S, main)
    reg <- lm(y ~ B + I(treat-p) + I((treat-p)*(S-ES)), data=subset(df, main))
    blp[i,] <- reg$coef[3:4]
    if (is.null(clusterid)) blp.se[i,] <- sqrt(diag(vcovHC(reg))[3:4])
    else blp.se[i,] <- sqrt(diag(vcovCL(reg, clusterid[main]))[3:4])
    Lambda[i,1] <- reg$coefficient[4]^2*var(S)
    ## GATES
    cut <- quantile(S, probs=seq(0,1,length.out=(n.group+1)))
    cut[n.group+1] <- cut[n.group+1] + 1 
    for(k in 1:n.group) {
      df[,sprintf("G.%d",k)] <- (cut[k]<=S & S<cut[k+1])
    }
    greg <- lm(as.formula(paste(c("y ~ B ", sprintf("I((treat-p)*G.%d)",1:n.group)), collapse=" + ")),
                data=subset(df,main))    
    gc <- grep("G", names(greg$coefficients))
    gate[i,] <- greg$coefficients[gc]
    if (is.null(clusterid)) gate.se[i,] <- sqrt(diag(vcovHC(greg))[gc])
    else gate.se[i,] <- sqrt(diag(vcovCL(greg, clusterid[main]))[gc])
    Lambda[i,2] <- sum(gate[i,]^2)/n.group
  }
  out <- list( gate=gate, gate.se=gate.se, blp=blp, blp.se=blp.se, Lambda=Lambda, baseline=baseline, cate=cate)
}
genericML.summary <- function(gml)
{
  blp <- apply(gml$blp, 2, function(x) median(x, na.rm=TRUE))
  blp.se <- apply(gml$blp.se, 2, function(x) median(x, na.rm=TRUE))
  gate <- apply(gml$gate, 2, function(x) median(x, na.rm=TRUE))
  gate.se <- apply(gml$gate.se, 2, function(x) median(x, na.rm=TRUE))
  Lambda <- apply(gml$Lambda, 2, function(x) median(x, na.rm=TRUE))
  return(list(blp=blp, blp.se=blp.se, gate=gate, gate.se=gate.se, Lambda=Lambda))
}
library(glmnet)
# create x matrix
fmla.l <- bw ~ pkh_kec_ever +
  as.factor(edu)*as.factor(agecat) + log_xp_percap + hh_land + hh_home + as.factor(dist) +
  hh_phone + hh_rf_tile + hh_rf_shingle + hh_rf_fiber +
  hh_wall_plaster + hh_wall_brick + hh_wall_wood + hh_wall_fiber +
  hh_fl_tile + hh_fl_plaster + hh_fl_wood + hh_fl_dirt +
  hh_water_pam + hh_water_mechwell + hh_water_well + hh_water_spring + hh_water_river + 
  hh_waterhome + 
  hh_toilet_own + hh_toilet_pub + hh_toilet_none +
  hh_waste_tank + hh_waste_hole + hh_waste_river + hh_waste_field +
  hh_kitchen +
  hh_cook_wood + hh_cook_kerosene + hh_cook_gas +
  tv + fridge + motorbike + car + goat + cow + horse
m <- lm(fmla.l, data=pw, x=TRUE, y=TRUE)
treat <- m$x[,2]
Xl <- m$x[,3:ncol(m$x)]
scale <- sd(m$y)
center <- mean(m$y)
yl <- (m$y-center)/scale
lid <- as.factor(pw[as.numeric(rownames(m$x)),]$Location_ID)
gml.lasso <- genericML(Xl,m$y, treat, 
                   function(x,y) cv.glmnet(x,(y-center)/scale,alpha=1,parallel=FALSE, intercept=TRUE, nfolds=20),
                   function(model, x) { predict(model, x, s=model$lambda.min, type="response")*scale + center },
                   n.split=11,n.group=5, clusterid=lid)
library(grf)
gml.rf <- genericML(Xl,m$y, treat, 
                   function(x,y) regression_forest(x, (y-center)/scale, tune.parameters=TRUE),
                   function(model, x) { predict(model,x)$predictions*scale + center},
                   n.split=11,n.group=5, clusterid=lid)
library(RSNNS)
gml.nn <- genericML(Xl,m$y, treat, 
                   function(x,y) mlp(x,(y-center)/scale,linOut=TRUE, size=c(10,10), learnFunc="SCG"),
                   function(model, x) { predict(model,x)*scale + center},
                   n.split=11,n.group=5, clusterid=lid)
library(GGally)
df <- data.frame(Lasso=apply(gml.lasso$cate,1,median),
                 Forest=apply(gml.rf$cate,1,median),
                 Neural=apply(gml.nn$cate,1,median))
ggpairs(df, lower=list(continuous="smooth"))  + theme_minimal()

 
 Best linear projection of CATE
- Randomly partition sample into auxillary and main samples 
- Use any method on auxillary sample to estimate \[S(x) = \widehat{\Er[y(1) - y(0) | x]}\] and \[B(x) = \widehat{\Er[y(0)|x]}\] 
- Use main sample to regress with weights \((P(x)(1-P(X)))^{-1}\) \[ 
y = \alpha_0 + \alpha_1 B(x) + \beta_0 (d-P(x)) + \beta_1
(d-P(x))(S(x) - \Er[S(x)]) + \epsilon 
\] 
- \(\hat{\beta}_0, \hat{\beta}_1 \to_p \argmin_{b_0,b_1} \Er[(s_0(x) - b_0 - b_1 (S(x)-E[S(x)]))^2]\) 
- \(\Lambda = \beta_1^2 Var(S(x)) = corr(s_0(x),S(X))^2 Var(s_0(x))\) 
library(kableExtra)
colfn <- function(gml) {
  s <- genericML.summary(gml)
  c(s$blp[1], s$blp.se[1], s$blp[2], s$blp.se[2], s$Lambda[1])
}
tbl <- cbind(colfn(gml.lasso), colfn(gml.rf), colfn(gml.nn))
colnames(tbl) <- c("Lasso","Regression forest","Neural network")
rownames(tbl) <- c("ATE=b0","se","b1","se","Lambda")
kable_styling(kable(tbl,
                    caption="Machine learning proxies as BLP of CATE on Birthweight",
                    format="html", digits=3),
              bootstrap_options = c("striped", "hover", "condensed",
                                    "responsive"), full_width=TRUE)
Machine learning proxies as BLP of CATE on Birthweight
|  | Lasso | Regression forest | Neural network | 
| ATE=b0 | -3.325 | -12.390 | -9.003 | 
| se | 33.095 | 32.024 | 31.416 | 
| b1 | 0.169 | 0.435 | 0.114 | 
| se | 0.480 | 0.653 | 0.118 | 
| Lambda | 562.187 | 574.082 | 403.610 | 
 
Group average treatment effects
- Define \(G_k = 1\{\ell_{k-1} \leq S(x) \leq \ell_k\}\) with \(\ell_k = k/5\) quantile of \(S(x)\) 
- Use main sample to regress with weights \((P(x)(1-P(X)))^{-1}\) \[ 
y = \alpha_0 + \alpha_1 B(x) + \sum_k \gamma_k (d-P(X)) 1(G_k) +
\epsilon 
\] 
- \(\hat{\gamma}_k \to_p \Er[y(1) - y(0) | G_k]\) 
- \(\bar{\Lambda} = \frac{1}{K} \sum_k \gamma_k^2\) 
library(kableExtra)
colfn <- function(gml) {
  s <- genericML.summary(gml)
  c(s$gate[1], s$gate.se[1],
    s$gate[2], s$gate.se[2],
    s$gate[3], s$gate.se[3],
    s$gate[4], s$gate.se[4],
    s$gate[5], s$gate.se[5], 
    s$Lambda[2])
}
tbl <- cbind(colfn(gml.lasso), colfn(gml.rf), colfn(gml.nn))
colnames(tbl) <- c("Lasso","Regression forest","Neural network")
rownames(tbl) <- c(as.vector(sapply(1:5, function(x) c(sprintf("GATE %d",x),"se"))),"Lambda")
kable_styling(kable(tbl,
                    caption="GATE on Birthweight",
                    format="html", digits=3),
              bootstrap_options = c("striped", "hover", "condensed",
                                    "responsive"), full_width=TRUE)
GATE on Birthweight
|  | Lasso | Regression forest | Neural network | 
| GATE 1 | -16.413 | -28.554 | -14.990 | 
| se | 71.836 | 72.023 | 62.556 | 
| GATE 2 | 44.850 | -39.772 | -30.630 | 
| se | 67.828 | 64.132 | 67.542 | 
| GATE 3 | -41.362 | -1.857 | -5.024 | 
| se | 71.415 | 63.443 | 68.606 | 
| GATE 4 | -25.154 | -28.400 | 0.072 | 
| se | 69.296 | 65.399 | 70.607 | 
| GATE 5 | 12.703 | 54.117 | 84.213 | 
| se | 72.644 | 76.440 | 71.837 | 
| Lambda | 2850.894 | 6524.788 | 3937.901 | 
 
 Heterogeneity in CATE on Midwife utilization
# create x matrix
mwb <- pw[as.numeric(rownames(m$x)),]$midwife_birth
mw.lasso <- genericML(Xl,mwb, treat, 
                   function(x,y) cv.glmnet(x,y,family="binomial",
                                           alpha=1,parallel=FALSE, intercept=TRUE, nfolds=20),
                   function(model, x) { predict(model, x, s=model$lambda.min, type="response") },
                   n.split=11,n.group=5, clusterid=lid)
library(grf)
mw.rf <- genericML(Xl,mwb, treat, 
                   function(x,y) regression_forest(x, y, tune.parameters=TRUE),
                   function(model, x) { predict(model,x)$predictions },
                   n.split=11,n.group=5, clusterid=lid)
library(RSNNS)
mw.nn <- genericML(Xl,mwb, treat, 
                   function(x,y) mlp(x, y, linOut=FALSE, size=c(10,10), learnFunc="SCG"),
                   function(model, x) { predict(model,x) },
                   n.split=11,n.group=5, clusterid=lid)
library(GGally)
df <- data.frame(Lasso=apply(mw.lasso$cate,1,median),
                 Forest=apply(mw.rf$cate,1,median),
                 Neural=apply(mw.nn$cate,1,median))
ggpairs(df, lower=list(continuous="smooth"))  + theme_minimal()

library(kableExtra)
colfn <- function(gml) {
  s <- genericML.summary(gml)
  c(s$blp[1], s$blp.se[1], s$blp[2], s$blp.se[2], s$Lambda[1])
}
tbl <- cbind(colfn(mw.lasso), colfn(mw.rf), colfn(mw.nn))
colnames(tbl) <- c("Lasso","Regression forest","Neural network")
rownames(tbl) <- c("ATE=b0","se","b1","se","Lambda")
kable_styling(kable(tbl,
                    caption="Machine learning proxies as BLP of CATE on Midwife Use",
                    format="html", digits=3),
              bootstrap_options = c("striped", "hover", "condensed",
                                    "responsive"), full_width=TRUE)
Machine learning proxies as BLP of CATE on Midwife Use
|  | Lasso | Regression forest | Neural network | 
| ATE=b0 | 0.057 | 0.058 | 0.056 | 
| se | 0.024 | 0.024 | 0.025 | 
| b1 | 0.368 | 0.606 | 0.150 | 
| se | 0.167 | 0.263 | 0.088 | 
| Lambda | 0.003 | 0.003 | 0.002 | 
library(kableExtra)
colfn <- function(gml) {
  s <- genericML.summary(gml)
  c(s$gate[1], s$gate.se[1],
    s$gate[2], s$gate.se[2],
    s$gate[3], s$gate.se[3],
    s$gate[4], s$gate.se[4],
    s$gate[5], s$gate.se[5], 
    s$Lambda[2])
}
tbl <- cbind(colfn(mw.lasso), colfn(mw.rf), colfn(mw.nn))
colnames(tbl) <- c("Lasso","Regression forest","Neural network")
rownames(tbl) <- c(as.vector(sapply(1:5, function(x) c(sprintf("GATE %d",x),"se"))),"Lambda")
kable_styling(kable(tbl,
                    caption="GATE on Midwife Use",
                    format="html", digits=3),
              bootstrap_options = c("striped", "hover", "condensed",
                                    "responsive"), full_width=TRUE)
GATE on Midwife Use
|  | Lasso | Regression forest | Neural network | 
| GATE 1 | 0.012 | 0.000 | 0.036 | 
| se | 0.049 | 0.054 | 0.057 | 
| GATE 2 | -0.023 | -0.024 | 0.030 | 
| se | 0.052 | 0.051 | 0.058 | 
| GATE 3 | 0.022 | 0.021 | 0.044 | 
| se | 0.052 | 0.050 | 0.047 | 
| GATE 4 | 0.056 | 0.058 | 0.034 | 
| se | 0.056 | 0.050 | 0.053 | 
| GATE 5 | 0.169 | 0.178 | 0.137 | 
| se | 0.053 | 0.053 | 0.052 | 
| Lambda | 0.008 | 0.009 | 0.009 | 
 
Covariate means by group
df <- pw[as.numeric(rownames(m$x)),]
df$edu99 <- df$edu==99
df$educ <- df$edu
df$educ[df$educ==99] <- NA
vars <- c("log_xp_percap","agecat","educ","tv","goat",
          "hh_toilet_own","motorbike","hh_cook_wood","pkh_ever")
tmp <- data.frame()
groupMeans <- function(var, gml, clusterid) {
  n.group <- ncol(gml$gate)
  gate <- matrix(NA, nrow=nrow(gml$gate), ncol=ncol(gml$gate))
  gate.se <- gate
  dat <- data.frame(y=var)
  for (i in 1:ncol(gml$cate)) {
    S <- gml$cate[,i]
    cut <- quantile(S, probs=seq(0,1,length.out=(n.group+1)))
    cut[n.group+1] <- cut[n.group+1] + 1 
    for(k in 1:n.group) {
      dat[,sprintf("G.%d",k)] <- 1*(cut[k]<=S & S<cut[k+1])
    }
    greg <- lm(as.formula(paste(c("y ~ -1", sprintf("G.%d",1:n.group)), collapse=" + ")),
                data=dat)
    gc <- grep("G", names(greg$coefficients))
    gate[i,] <- greg$coefficients[gc]
    if (is.null(clusterid)) gate.se[i,] <- sqrt(diag(vcovHC(greg))[gc])
    else gate.se[i,] <- sqrt(diag(vcovCL(greg,clusterid))[gc])
  }
  return(list(mean=apply(gate, 2, function(x) median(x,na.rm=TRUE)),
              se = apply(gate.se, 2, function(x) median(x,na.rm=TRUE))))
}
methods <- c("Lasso","Forest","Neural")
gmls <- list(mw.lasso,mw.rf,mw.nn)
for(v in vars) {
  for (m in 1:length(methods)) {
    gm <- groupMeans(df[,v], gmls[[m]], lid)    
    tmp <- rbind(tmp, data.frame(group=1:length(gm$mean),variable=v, method=methods[m],
                                 mean=gm$mean, se=gm$se))
  }
}
library(ggplot2)
fig <- ggplot(data=tmp, aes(x=group, y=mean, colour=method)) +
  geom_line() +
  geom_line(aes(y=(mean+1.96*se), colour=method), linetype=2) +
  geom_line(aes(y=(mean-1.96*se), colour=method), linetype=2) +
  facet_wrap(~ variable,scales="free_y") + theme_minimal()
print(fig)
