## Installing packages into '/private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/RtmpeoPsoA/temp_libpathb854d355ff0'
## (as 'lib' is unspecified)
## also installing the dependencies 'hms', 'prettyunits', 'listenv', 'parallelly', 'backports', 'base64url', 'brew', 'checkmate', 'data.table', 'progress', 'foreach', 'future', 'globals', 'iterators', 'batchtools'
##
## The downloaded binary packages are in
## /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T//RtmpFSoiso/downloaded_packages
## Installing packages into '/private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/RtmpeoPsoA/temp_libpathb854d355ff0'
## (as 'lib' is unspecified)
## also installing the dependencies 'credentials', 'zip', 'gitcreds', 'ini', 'clipr', 'gert', 'gh', 'xopen', 'commonmark', 'generics', 'tidyselect', 'usethis', 'pkgbuild', 'rcmdcheck', 'roxygen2', 'rversions', 'sessioninfo'
##
## The downloaded binary packages are in
## /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T//RtmpFSoiso/downloaded_packages
## Downloading GitHub repo cran/glmnet@f4fc95ab49efaad9b6e1728a7c840bc6159501dc
## shape (NA -> 1.4.6) [CRAN]
## Installing 1 packages: shape
## Installing package into '/private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/RtmpeoPsoA/temp_libpathb854d355ff0'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T//RtmpFSoiso/downloaded_packages
## * checking for file ‘/private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/RtmpFSoiso/remotesbee45f181e5/cran-glmnet-f4fc95a/DESCRIPTION’ ... OK
## * preparing ‘glmnet’:
## * checking DESCRIPTION meta-information ... OK
## * cleaning src
## * checking vignette meta-information ... OK
## * checking for LF line-endings in source and make files and shell scripts
## * checking for empty or unneeded directories
## * looking to see if a ‘data/datalist’ file should be added
## * building ‘glmnet_4.1-1.tar.gz’
## Installing package into '/private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/RtmpeoPsoA/temp_libpathb854d355ff0'
## (as 'lib' is unspecified)
## Warning in i.p(...): installation of package '/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T//RtmpFSoiso/filebee32845904/glmnet_4.1-1.tar.gz' had non-zero exit status
## Downloading GitHub repo susanathey/MCPanel@6b2706fd7c35f3266048ceb22a7e9a61ae1774da
## glmnet (NA -> 4.1-3) [CRAN]
## latex2exp (NA -> 0.9.4) [CRAN]
## Installing 2 packages: glmnet, latex2exp
## Installing packages into '/private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/RtmpeoPsoA/temp_libpathb854d355ff0'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T//RtmpFSoiso/downloaded_packages
## * checking for file ‘/private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/RtmpFSoiso/remotesbeeafbc440/susanathey-MCPanel-6b2706f/DESCRIPTION’ ... OK
## * preparing ‘MCPanel’:
## * checking DESCRIPTION meta-information ... OK
## * cleaning src
## * excluding invalid files
## Subdirectory 'R' contains invalid file names:
## ‘Makevars’
## * checking for LF line-endings in source and make files and shell scripts
## * checking for empty or unneeded directories
## Omitted ‘LazyData’ from DESCRIPTION
## * building ‘MCPanel_0.0.tar.gz’
## Warning in utils::tar(filepath, pkgname, compression = compression, compression_level = 9L, :
## storing paths of more than 100 bytes is not portable:
## ‘MCPanel/tests/examples_from_paper/california/california_data_N_38_T_31_numruns_10_num_treated_35_simultaneuous_0.png’
## Warning in utils::tar(filepath, pkgname, compression = compression, compression_level = 9L, :
## storing paths of more than 100 bytes is not portable:
## ‘MCPanel/tests/examples_from_paper/california/california_data_N_38_T_31_numruns_10_num_treated_35_simultaneuous_0.rds’
## Warning in utils::tar(filepath, pkgname, compression = compression, compression_level = 9L, :
## storing paths of more than 100 bytes is not portable:
## ‘MCPanel/tests/examples_from_paper/california/california_data_N_38_T_31_numruns_10_num_treated_8_simultaneuous_1.png’
## Warning in utils::tar(filepath, pkgname, compression = compression, compression_level = 9L, :
## storing paths of more than 100 bytes is not portable:
## ‘MCPanel/tests/examples_from_paper/california/california_data_N_38_T_31_numruns_10_num_treated_8_simultaneuous_1.rds’
## Warning in utils::tar(filepath, pkgname, compression = compression, compression_level = 9L, :
## storing paths of more than 100 bytes is not portable:
## ‘MCPanel/tests/examples_from_paper/stock/stock_data_N_1000_T_5_numruns_5_num_treated_500_simultaneuous_0.png’
## Warning in utils::tar(filepath, pkgname, compression = compression, compression_level = 9L, :
## storing paths of more than 100 bytes is not portable:
## ‘MCPanel/tests/examples_from_paper/stock/stock_data_N_1000_T_5_numruns_5_num_treated_500_simultaneuous_0.rds’
## Warning in utils::tar(filepath, pkgname, compression = compression, compression_level = 9L, :
## storing paths of more than 100 bytes is not portable:
## ‘MCPanel/tests/examples_from_paper/stock/stock_data_N_5_T_1000_numruns_20_num_treated_3_simultaneuous_0.png’
## Warning in utils::tar(filepath, pkgname, compression = compression, compression_level = 9L, :
## storing paths of more than 100 bytes is not portable:
## ‘MCPanel/tests/examples_from_paper/stock/stock_data_N_5_T_1000_numruns_20_num_treated_3_simultaneuous_0.rds’
## Warning in utils::tar(filepath, pkgname, compression = compression, compression_level = 9L, :
## storing paths of more than 100 bytes is not portable:
## ‘MCPanel/tests/examples_from_paper/stock/stock_data_N_70_T_70_numruns_5_num_treated_35_simultaneuous_0.png’
## Warning in utils::tar(filepath, pkgname, compression = compression, compression_level = 9L, :
## storing paths of more than 100 bytes is not portable:
## ‘MCPanel/tests/examples_from_paper/stock/stock_data_N_70_T_70_numruns_5_num_treated_35_simultaneuous_0.rds’
## Installing package into '/private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/RtmpeoPsoA/temp_libpathb854d355ff0'
## (as 'lib' is unspecified)
library(synthdid) library(MCPanel) library(rngtools) library(future) library(doFuture) library(future.batchtools)
In this vignette, we generate the figures and tables from Arkhangelsky et al.. The first section, on the California Proposition 99 application, generates Table 1 and Figure 1. The second section runs the placebo simulations discussed in Section 3 and then summarizes them by generating Tables 2-4 and Figure 2.
We also include variants sc_reg and difp_reg which, like sdid, use a ridge penalty when estimating the synthetic control weights \(\omega\). These use the same regularization-strength parameter \(\zeta\) as sdid, defined in Algorithm 1 of Arkhangelsky et al.
mc_estimate = function(Y, N0, T0) { N1=nrow(Y)-N0 T1=ncol(Y)-T0 W <- outer(c(rep(0,N0),rep(1,N1)),c(rep(0,T0),rep(1,T1))) mc_pred <- mcnnm_cv(Y, 1-W, num_lam_L = 20) mc_fit <- mc_pred$L + outer(mc_pred$u, mc_pred$v, '+') mc_est <- sum(W*(Y-mc_fit))/sum(W) mc_est } mc_placebo_se = function(Y, N0, T0, replications=200) { N1 = nrow(Y) - N0 theta = function(ind) { mc_estimate(Y[ind,], length(ind)-N1, T0) } sqrt((replications-1)/replications) * sd(replicate(replications, theta(sample(1:N0)))) } difp_estimate = function(Y, N0, T0) { synthdid_estimate(Y, N0, T0, weights=list(lambda=rep(1/T0, T0)), eta.omega=1e-6) } sc_estimate_reg = function(Y, N0, T0) { sc_estimate(Y, N0, T0, eta.omega=((nrow(Y)-N0)*(ncol(Y)-T0))^(1/4)) } difp_estimate_reg = function(Y, N0, T0) { synthdid_estimate(Y, N0, T0, weights=list(lambda=rep(1/T0, T0))) } estimators = list(did=did_estimate, sc=sc_estimate, sdid=synthdid_estimate, difp=difp_estimate, mc = mc_estimate, sc_reg = sc_estimate_reg, difp_reg = difp_estimate_reg)
data('california_prop99') setup = panel.matrices(california_prop99) estimates = lapply(estimators, function(estimator) { estimator(setup$Y, setup$N0, setup$T0) } ) standard.errors = mapply(function(estimate, name) { set.seed(12345) if(name == 'mc') { mc_placebo_se(setup$Y, setup$N0, setup$T0) } else { sqrt(vcov(estimate, method='placebo')) } }, estimates, names(estimators))
california.table = rbind(unlist(estimates), unlist(standard.errors)) rownames(california.table) = c('estimate', 'standard error') colnames(california.table) = toupper(names(estimators)) round(california.table, digits=1)
## DID SC SDID DIFP MC SC_REG DIFP_REG
## estimate -27.3 -19.6 -15.6 -11.1 -20.2 -21.7 -16.1
## standard error 17.7 9.9 8.4 9.5 11.5 10.0 9.2
synthdid_plot(estimates[1:3], facet.vertical=FALSE, control.name='control', treated.name='california', lambda.comparable=TRUE, se.method = 'none', trajectory.linetype = 1, line.width=.75, effect.curvature=-.4, trajectory.alpha=.7, effect.alpha=.7, diagram.alpha=1, onset.alpha=.7) + theme(legend.position=c(.26,.07), legend.direction='horizontal', legend.key=element_blank(), legend.background=element_blank(), strip.background=element_blank(), strip.text.x = element_blank())
synthdid_units_plot(rev(estimates[1:3]), se.method='none') + theme(legend.background=element_blank(), legend.title = element_blank(), legend.direction='horizontal', legend.position=c(.17,.07), strip.background=element_blank(), strip.text.x = element_blank())
unit.weights = synthdid_controls(estimates[1:3], weight.type='omega', mass=1) time.weights = synthdid_controls(estimates[1:3], weight.type='lambda', mass=1) unit.table = xtable(round(unit.weights[rev(1:nrow(unit.weights)), ], 3)) time.table = xtable(round(time.weights, 3)) print(unit.table, type='latex', file='figures/table-california-unit-weights.tex') print(time.table, type='latex', file='figures/table-california-time-weights.tex') unit.table
## % latex table generated in R 4.1.3 by xtable 1.8-4 package
## % Tue Mar 15 21:08:53 2022
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrr}
## \hline
## & did & sc & sdid \\
## \hline
## Alabama & 0.03 & 0.00 & 0.00 \\
## Arkansas & 0.03 & 0.00 & 0.00 \\
## Colorado & 0.03 & 0.01 & 0.06 \\
## Connecticut & 0.03 & 0.10 & 0.08 \\
## Delaware & 0.03 & 0.00 & 0.07 \\
## Georgia & 0.03 & 0.00 & 0.00 \\
## Idaho & 0.03 & 0.00 & 0.03 \\
## Illinois & 0.03 & 0.00 & 0.05 \\
## Indiana & 0.03 & 0.00 & 0.01 \\
## Iowa & 0.03 & 0.00 & 0.03 \\
## Kansas & 0.03 & 0.00 & 0.02 \\
## Kentucky & 0.03 & 0.00 & 0.00 \\
## Louisiana & 0.03 & 0.00 & 0.00 \\
## Maine & 0.03 & 0.00 & 0.03 \\
## Minnesota & 0.03 & 0.00 & 0.04 \\
## Mississippi & 0.03 & 0.00 & 0.00 \\
## Missouri & 0.03 & 0.00 & 0.01 \\
## Montana & 0.03 & 0.23 & 0.04 \\
## Nebraska & 0.03 & 0.00 & 0.05 \\
## Nevada & 0.03 & 0.20 & 0.12 \\
## New Hampshire & 0.03 & 0.04 & 0.10 \\
## New Mexico & 0.03 & 0.00 & 0.04 \\
## North Carolina & 0.03 & 0.00 & 0.03 \\
## North Dakota & 0.03 & 0.00 & 0.00 \\
## Ohio & 0.03 & 0.00 & 0.03 \\
## Oklahoma & 0.03 & 0.00 & 0.00 \\
## Pennsylvania & 0.03 & 0.00 & 0.01 \\
## Rhode Island & 0.03 & 0.00 & 0.00 \\
## South Carolina & 0.03 & 0.00 & 0.00 \\
## South Dakota & 0.03 & 0.00 & 0.00 \\
## Tennessee & 0.03 & 0.00 & 0.00 \\
## Texas & 0.03 & 0.00 & 0.01 \\
## Utah & 0.03 & 0.40 & 0.04 \\
## Vermont & 0.03 & 0.00 & 0.00 \\
## Virginia & 0.03 & 0.00 & 0.00 \\
## West Virginia & 0.03 & 0.00 & 0.03 \\
## Wisconsin & 0.03 & 0.00 & 0.04 \\
## Wyoming & 0.03 & 0.00 & 0.00 \\
## \hline
## \end{tabular}
## \end{table}
time.table
## % latex table generated in R 4.1.3 by xtable 1.8-4 package
## % Tue Mar 15 21:08:53 2022
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrr}
## \hline
## & did & sc & sdid \\
## \hline
## 1988 & 0.05 & 0.00 & 0.43 \\
## 1987 & 0.05 & 0.00 & 0.21 \\
## 1986 & 0.05 & 0.00 & 0.37 \\
## 1985 & 0.05 & 0.00 & 0.00 \\
## 1984 & 0.05 & 0.00 & 0.00 \\
## 1983 & 0.05 & 0.00 & 0.00 \\
## 1982 & 0.05 & 0.00 & 0.00 \\
## 1981 & 0.05 & 0.00 & 0.00 \\
## 1980 & 0.05 & 0.00 & 0.00 \\
## 1979 & 0.05 & 0.00 & 0.00 \\
## 1978 & 0.05 & 0.00 & 0.00 \\
## 1977 & 0.05 & 0.00 & 0.00 \\
## 1976 & 0.05 & 0.00 & 0.00 \\
## 1975 & 0.05 & 0.00 & 0.00 \\
## 1974 & 0.05 & 0.00 & 0.00 \\
## 1973 & 0.05 & 0.00 & 0.00 \\
## 1972 & 0.05 & 0.00 & 0.00 \\
## 1971 & 0.05 & 0.00 & 0.00 \\
## 1970 & 0.05 & 0.00 & 0.00 \\
## \hline
## \end{tabular}
## \end{table}
last.col = function(X) { X[, ncol(X)] } data(CPS) Y.logwage = panel.matrices(CPS, treatment='min_wage', outcome='log_wage', treated.last=FALSE)$Y Y.hours = panel.matrices(CPS, treatment='min_wage', outcome='hours', treated.last=FALSE)$Y Y.urate = panel.matrices(CPS, treatment='min_wage', outcome='urate', treated.last=FALSE)$Y w.minwage = last.col(panel.matrices(CPS, treatment='min_wage', treated.last=FALSE)$W) w.gunlaw = last.col(panel.matrices(CPS, treatment='open_carry', treated.last=FALSE)$W) w.abortion = last.col(panel.matrices(CPS, treatment='abort_ban', treated.last=FALSE)$W) data(PENN) Y.loggdp = panel.matrices(PENN, treatment='dem', outcome='log_gdp', treated.last=FALSE)$Y w.democracy = last.col(panel.matrices(PENN, treatment='dem', treated.last=FALSE)$W) w.education = last.col(panel.matrices(PENN, treatment='educ', treated.last=FALSE)$W) default=list(rank = 4, N1 = 10, T1 = 10) cps.baseline.params = estimate_dgp(Y.logwage, w.minwage, default$rank)
A simulator is a no-arg functions returning a simulated dataset, and this function is used to define them by modifying an extant dgp specification.
F
, M
, etc. with names corresponding to those
parameters which compute the actual parameters for the simulator as
functions of the corresponding baseline parameterL = M + F
,
to use in the simulation design. This is used in some rows of Table
4.This returns a list with two elements.
$run
, the simulator, a no-args function
returning a simulated dataset.$params
, the parameters of the dgp.simulator = function(params = cps.baseline.params, F=function(x){x}, M=function(x){x}, Sigma = function(x){x}, pi = function(x){x}, ar_coef = function(x){x}, N1=default$N1, T1=default$T1, resample=NULL) { updated.params = list(F=F(params$F), M=M(params$M), Sigma=Sigma(params$Sigma), pi = pi(params$pi), ar_coef=ar_coef(params$ar_coef)) list(params=updated.params, N1=N1, T1=T1, run=function() { if(!is.numeric(resample)) { simulate_dgp(updated.params, N1, T1) } else { ind = sample.int(nrow(updated.params$F), size=resample, replace=TRUE) resampled.params=updated.params resampled.params$F=updated.params$F[ind,] resampled.params$M=updated.params$M[ind,] simulate_dgp(resampled.params, N1, T1) } }) }
Sigma
, not ar_coef
, to
generate noise, so updating ar_coef
doesn’t affect the
simulations.ar_coef
below so $params
has
the correct coefficients. These will be shown in Table 2.simulators = list( baseline = simulator(), # Modified outcome model no.corr = simulator(Sigma=function(Sigma) { diag(nrow(Sigma)) * norm(Sigma,'f')/sqrt(nrow(Sigma))}, ar_coef=function(coefs) { 0*coefs }), no.M = simulator(M=function(M) { 0*M }), no.F = simulator(F=function(F) { 0*F }), only.noise = simulator(M=function(M) { 0*M }, F=function(F) {0*F}), no.noise = simulator(Sigma=function(Sigma) { 0*Sigma }, ar_coef=function(coefs) { 0*coefs }), # Modified assignment process gun.law = simulator(estimate_dgp(Y.logwage, w.gunlaw, default$rank)), abortion = simulator(estimate_dgp(Y.logwage, w.abortion, default$rank)), random = simulator(pi=function(pi) { rep(.5,length(pi)) }), # Modified outcome variable hours = simulator(estimate_dgp(Y.hours, w.minwage, default$rank)), urate = simulator(estimate_dgp(Y.urate, w.minwage, default$rank)), # Assignment block size T1.is.one = simulator(T1=1), N1.is.one = simulator(N1=1), blocksize.is.one = simulator(T1=1, N1=1), # Resample rows (from Table 4) resample.200 = simulator(resample=200, N1=20), resample.400 = simulator(resample=400, N1=40), # penn world table (Table 3) penn.democracy = simulator(estimate_dgp(Y.loggdp, w.democracy, default$rank)), penn.education = simulator(estimate_dgp(Y.loggdp, w.education, default$rank)), penn.random = simulator(estimate_dgp(Y.loggdp, w.education, default$rank), pi=function(pi) { rep(.5,length(pi)) }))
This is a long-running computation, taking roughly 3 days on a recent macbook air. It is written to save data as it runs, one file per simulation, so it can be continued if interrupted. To continue, just rerun the loop: only simulation for which these files are missing are run. It can also be run on a slurm cluster, and will do so if a correctly configured template file batchtools.slurm.tmpl is present. We include the one we use in the paper-results-details directory.
The loop gives each replication of a simulation its own RNG seed. Within each replication, every call to a simulator, estimator, or variance estimator uses this replication-specific seed: we explicitly restore the seed before each call. As a result, changing the set of simulations, estimators, or variance estimators considered does not change the results we get for any specific simulator/estimator/variance-estimator within a given replication. Nor does interrupting and continuing the loop.
By using RNGSeq to choose replication-specific seeds for the
L’Ecuyer-CMRG RNG, we get streams of random numbers that are more or
less independent from replication to replication. See
help(RNGseq)
.
sim.replications = 1000 coverage.replications = 400 coverage.estimators = c('sdid','sc', 'did', 'difp') coverage.sims = c('baseline', 'gun.law', 'abortion', 'random', 'hours', 'urate', 'T1.is.one', 'N1.is.one', 'blocksize.is.one', 'resample.200', 'resample.400', 'penn.democracy', 'penn.education', 'penn.random') se.methods = c('bootstrap', 'jackknife', 'placebo') cluster = file.exists('batchtools.slurm.tmpl') # set up simulation grid grid = expand.grid(ss = 1:length(simulators), ee = 1:length(estimators), rr = 1:sim.replications) grid$estimate.se = names(simulators[grid$ss]) %in% coverage.sims & names(estimators[grid$ee]) %in% coverage.estimators & grid$rr <= coverage.replications # associate a grid row to an output filename output.file = function(row) { sprintf('simulations/simulation-%s-%s-%d.rds', names(simulators)[row$ss], names(estimators)[row$ee], row$rr) } rows.finished = function() { output.files = sapply(1:nrow(grid), function(ii) { output.file(grid[ii,]) }) file.exists(output.files) }
# set up RNG seeds for each replication. # We need to pass a L'Ecuyer-type seed: a vector of 7 integers starting with 10407 like initial.seed below. # If we pass a simple integer seed, the first call in a vanilla R session (R --vanilla) differs from subsequent ones. # To get initial_seed I run: 'library(rngtools); ignore=RNGseq(n=1, seed=12345); initial.seed=RNGseq(n=1,seed=12345)'. initial.seed = c(10407, -2132566924, 1638542565, 108172386, -1884566405, -1838154368, -250773631) seeds = RNGseq(n=sim.replications, seed=initial.seed)
run.simulation = function(row) { setRNG(seeds[[row$rr]]) setup = simulators[[row$ss]]$run() estimate = estimators[[row$ee]](setup$Y, setup$N0, setup$T0) se.estimates = sapply(se.methods, function(method) { setRNG(seeds[[row$rr]]) if(row$estimate.se) { sqrt(vcov(estimate, method = method)) } else { NA } }) cbind(data.frame(simulation = names(simulators)[row$ss], estimator = names(estimators)[row$ee], replication = row$rr, estimate = c(estimate)), t(as.data.frame(se.estimates))) }
# kill warning that the futures library can't determine that we're # using and deterministically seeding the L'Ecuyer RNG options('future.options.rng.onMisuse', 'ignore')
## $future.options.rng.onMisuse
## NULL
##
## $ignore
## NULL
registerDoFuture() # set up worker processes if(cluster) { # use multiple nodes on a Slurm Cluster plan(batchtools_slurm, workers=1000, resources=list(ncpus=1, memory=1024)) } else { # use multiple processes on this this computer plan(multisession, workers = 8) }
# uncomment to run sims # estimates = foreach(ii = which(rows.finished()), .combine=rbind) %do% { # readRDS(output.file(grid[ii,])) # } # estimates$simulation = factor(estimates$simulation, levels=names(simulators)) # estimates$estimator = factor(estimates$estimator, levels=names(estimators))
To share data, check that simulations are complete and save concatenated data frame as one file.
# uncomment to run sims # stopifnot(all(rows.finished())) # saveRDS(estimates, 'all-simulations.rds')
To use shared data, load concatenated data frame from file.
estimates = readRDS('all-simulations.rds')
summaries = estimates %>% group_by(simulation, estimator) %>% summarize(rmse = sqrt(mean(estimate^2)), bias = mean(estimate), bootstrap.coverage = mean(abs(estimate/bootstrap) <= qnorm(.975), na.rm=TRUE), jackknife.coverage = mean(abs(estimate/jackknife) <= qnorm(.975), na.rm=TRUE), placebo.coverage = mean(abs(estimate/placebo) <= qnorm(.975), na.rm=TRUE), coverage.count = sum(!(is.na(placebo) | is.na(bootstrap) | is.na(jackknife))))
## `summarise()` has grouped output by 'simulation'. You can override using the `.groups` argument.
point.columns = c('rmse', 'bias') coverage.columns = c('bootstrap.coverage', 'jackknife.coverage', 'placebo.coverage')
Our standard error estimators can be undefined in some instances, returning NA. This happens to the bootstrap and jacknife if there is only one treated unit and to the jackknife if there is only one control with nonzero weight. As we see in the table below, this happens in one replication of the urate simulation. We compute coverage over the replications for which each standard error estimator is defined passing na.rm=TRUE to mean above.
summaries[!(summaries$coverage.count %in% c(0, coverage.replications)), c('simulation', 'estimator', 'coverage.count')]
## # A tibble: 0 × 3
## # Groups: simulation [0]
## # … with 3 variables: simulation <fct>, estimator <fct>, coverage.count <int>
In the paper, this is split into Tables 2 and 3, with CPS sims in the former and PENN sims in the latter.
point.summaries = summaries[,c('simulation', 'estimator', point.columns)] %>% pivot_wider(names_from=estimator, values_from=all_of(point.columns)) %>% column_to_rownames('simulation') point.table = cbind(sim.info, point.summaries) # display table, leaving out non-default regularization choices round(point.table, digits=3)[, -c(11,12,18,19)]
## F_scale M_scale noise_scale ar_lag1 ar_lag2 rmse_did rmse_sc rmse_sdid rmse_difp rmse_mc bias_did bias_sc bias_sdid bias_difp bias_mc
## baseline 0.992 0.100 0.098 0.01 -0.06 0.049 0.037 0.028 0.032 0.035 0.021 0.020 0.010 0.007 0.015
## no.corr 0.992 0.100 0.098 0.00 0.00 0.049 0.038 0.028 0.032 0.035 0.021 0.020 0.010 0.007 0.015
## no.M 0.992 0.000 0.098 0.01 -0.06 0.014 0.018 0.016 0.016 0.014 0.001 0.004 0.001 0.001 0.001
## no.F 0.000 0.100 0.098 0.01 -0.06 0.049 0.023 0.028 0.032 0.035 0.021 0.004 0.010 0.007 0.015
## only.noise 0.000 0.000 0.098 0.01 -0.06 0.014 0.014 0.016 0.016 0.014 0.001 0.001 0.001 0.001 0.001
## no.noise 0.992 0.100 0.000 0.00 0.00 0.047 0.017 0.006 0.011 0.004 0.020 0.004 0.004 0.001 0.000
## gun.law 0.992 0.100 0.098 0.01 -0.06 0.047 0.027 0.026 0.030 0.035 0.015 -0.003 0.008 0.009 0.015
## abortion 0.992 0.100 0.098 0.01 -0.06 0.045 0.031 0.023 0.027 0.031 0.003 0.016 0.004 0.001 0.003
## random 0.992 0.100 0.098 0.01 -0.06 0.044 0.025 0.024 0.027 0.031 0.002 -0.001 0.001 0.000 0.001
## hours 0.789 0.402 0.575 0.06 0.00 0.206 0.203 0.190 0.197 0.185 0.085 -0.049 0.111 0.099 0.100
## urate 0.752 0.441 0.593 -0.02 -0.01 0.353 0.184 0.191 0.187 0.247 0.304 0.080 0.100 0.078 0.187
## T1.is.one 0.992 0.100 0.098 0.01 -0.06 0.070 0.059 0.050 0.054 0.051 0.038 0.017 0.019 0.012 0.021
## N1.is.one 0.992 0.100 0.098 0.01 -0.06 0.126 0.072 0.063 0.083 0.081 0.011 0.014 0.002 -0.002 0.004
## blocksize.is.one 0.992 0.100 0.098 0.01 -0.06 0.153 0.124 0.112 0.117 0.108 0.033 0.024 0.014 0.011 0.016
## resample.200 0.992 0.100 0.098 0.01 -0.06 0.029 0.017 0.015 0.018 0.018 -0.001 -0.002 0.000 0.000 -0.001
## resample.400 0.992 0.100 0.098 0.01 -0.06 0.020 0.014 0.010 0.015 0.013 -0.001 -0.004 0.000 0.000 0.000
## penn.democracy 0.972 0.229 0.070 0.91 -0.22 0.197 0.038 0.031 0.039 0.058 0.175 -0.004 -0.005 -0.007 0.043
## penn.education 0.972 0.229 0.070 0.91 -0.22 0.172 0.053 0.030 0.039 0.049 0.162 0.025 -0.003 -0.005 0.040
## penn.random 0.972 0.229 0.070 0.91 -0.22 0.129 0.046 0.037 0.045 0.063 -0.006 -0.011 -0.002 -0.004 -0.004
# display table comparing default and non-default regularization choices round(point.table, digits=3)[,c(7,11,9,12)]
## rmse_sc rmse_sc_reg rmse_difp rmse_difp_reg
## baseline 0.037 0.078 0.032 0.036
## no.corr 0.038 0.079 0.032 0.036
## no.M 0.018 0.034 0.016 0.014
## no.F 0.023 0.025 0.032 0.036
## only.noise 0.014 0.012 0.016 0.014
## no.noise 0.017 0.034 0.011 0.020
## gun.law 0.027 0.034 0.030 0.040
## abortion 0.031 0.065 0.027 0.035
## random 0.025 0.031 0.027 0.035
## hours 0.203 0.329 0.197 0.191
## urate 0.184 0.259 0.187 0.288
## T1.is.one 0.059 0.065 0.054 0.050
## N1.is.one 0.072 0.085 0.083 0.087
## blocksize.is.one 0.124 0.124 0.117 0.112
## resample.200 0.017 0.016 0.018 0.018
## resample.400 0.014 0.011 0.015 0.012
## penn.democracy 0.038 0.035 0.039 0.031
## penn.education 0.053 0.062 0.039 0.029
## penn.random 0.046 0.047 0.045 0.046
A subset of these rows are in Table 4 of the paper.
coverage.table = summaries[,c('simulation', 'estimator', coverage.columns)] %>% pivot_wider(names_from=estimator, values_from=all_of(coverage.columns)) %>% column_to_rownames('simulation') keep = !is.na(coverage.table[1,]) round(coverage.table[rownames(coverage.table) %in% coverage.sims, keep], digits=2)
## bootstrap.coverage_did bootstrap.coverage_sc bootstrap.coverage_sdid bootstrap.coverage_difp jackknife.coverage_did jackknife.coverage_sc jackknife.coverage_sdid jackknife.coverage_difp placebo.coverage_did placebo.coverage_sc placebo.coverage_sdid placebo.coverage_difp
## baseline 0.89 0.93 0.96 0.94 0.92 0.95 0.96 0.97 0.96 0.88 0.95 0.94
## gun.law 0.92 0.96 0.97 0.96 0.93 0.99 0.97 0.99 0.93 0.95 0.94 0.94
## abortion 0.93 0.94 0.96 0.97 0.95 0.97 0.96 0.99 0.96 0.91 0.97 0.96
## random 0.92 0.96 0.96 0.95 0.94 0.98 0.97 0.98 0.94 0.96 0.96 0.96
## hours 0.94 0.96 0.92 0.92 0.95 0.98 0.94 0.97 0.96 0.89 0.91 0.93
## urate 0.58 0.90 0.91 0.91 0.64 0.96 0.92 0.96 0.62 0.88 0.88 0.89
## T1.is.one 0.84 0.94 0.93 0.91 0.88 0.97 0.95 0.97 0.92 0.90 0.92 0.92
## N1.is.one NaN NaN NaN NaN NaN NaN NaN NaN 0.96 0.95 0.97 0.96
## blocksize.is.one NaN NaN NaN NaN NaN NaN NaN NaN 0.94 0.94 0.95 0.94
## resample.200 0.92 0.96 0.94 0.96 0.93 1.00 0.95 1.00 0.94 0.94 0.96 0.94
## resample.400 0.96 0.92 0.95 0.94 0.95 1.00 0.95 1.00 0.96 0.90 0.96 0.93
## penn.democracy 0.55 0.96 0.93 0.96 0.59 0.99 0.95 0.99 0.79 0.97 0.98 0.96
## penn.education 0.30 0.95 0.95 0.95 0.34 0.96 0.97 1.00 0.94 0.90 0.99 0.96
## penn.random 0.89 0.95 0.93 0.96 0.91 0.99 0.94 1.00 0.91 0.94 0.95 0.95
We plot an estimate of error density function for the baseline CPS setting and the minimum wage assignment variant. This is Figure 2.
grid = expand.grid(ss=1:length(simulators), ee=1:length(estimators)) error.densities = do.call(rbind, mapply(function(ss,ee) { errors = estimates[estimates$simulation == names(simulators)[ss] & estimates$estimator == names(estimators)[ee], 'estimate'] density.estimate = lindsey_density_estimate(errors, K = 100, deg = 3) data.frame(x=density.estimate$centers, y=density.estimate$density, simulator=names(simulators)[ss], estimator=names(estimators)[ee]) }, grid$ss, grid$ee, SIMPLIFY=FALSE))
## Warning: glm.fit: fitted rates numerically 0 occurred
## Warning: glm.fit: fitted rates numerically 0 occurred
show.sims = error.densities$simulator %in% c('baseline', 'random') show.estimators = error.densities$estimator %in% c('did', 'sc', 'sdid') # reformat the information in $simulator and $estimator for display in the plot title and legend error.densities$Title = ifelse(error.densities$simulator == 'baseline', 'Minimum Wage Assignment', 'Random Assignment') error.densities$Estimator = toupper(error.densities$estimator) ggplot(error.densities[show.sims & show.estimators, ]) + geom_line(aes(x=x,y=y,color=Estimator)) + geom_vline(xintercept=0, linetype=3) + facet_wrap(~Title) + xlab('error') + ylab('density') + theme_light() + theme(legend.position=c(.94,.88), legend.background=element_blank(), legend.title=element_blank())