############################################################################################ # PLEASE READ ############################################################################################ # This is the R code to generate simulations in # "Estimating Policy Impact in A Difference-in-Differences Hazard Model: A Simulation Study" # by David A Hsieh # # Rules for using this R script: # 1) You must cite the following article: # David A Hsieh (2025) "Estimating Policy Impact in A Difference-in-Differences Hazard Model: A Simulation Study." Risks # 2) You must place the following URL in a footnote to help others find this code: # https://people.duke.edu/~dah7/OnlineAppendices/OnlineAppendixOneriskDiD.html # 3) By downloading the R script, you confirm that you are assuming all risk for the use of this script ############################################################################################ library(data.table) library(doParallel) library(doRNG) dir_name = "D:/save_random_25k/" versions = c("na","nb","nc","nd","ga","gb","gc","gd","0a","0b","0c","0d") ncore = 10 # Register cluster cl <- makeCluster(ncore) registerDoParallel(cl) clusterExport(cl,c("dir_name"), envir=.GlobalEnv) set.seed(183) foreach (iversion = c(1:12), .inorder = FALSE, .packages=c("data.table")) %dorng% { sim_version = versions[iversion] kvec <- c(1:110) for (isim in kvec) { uh = substr(sim_version,1,1) bh = substr(sim_version,2,2) if (isim < 10) { fsimin <- paste0(dir_name,"onedid",uh,'_25kw',bh,'_random_00',isim,'.rda') } else if (isim < 100) { fsimin <- paste0(dir_name,"onedid",uh,'_25kw',bh,'_random_0',isim,'.rda') } else { fsimin <- paste0(dir_name,"onedid",uh,'_25kw',bh,'_random_',isim,'.rda') } setwd("D:/save_random_25k/") nloan <- 25000 maxper <- 24 nobs <- nloan*maxper numper <- maxper # set coef and baseline beta1 <- c(1.0,0.5,-0.5,-0.1) # parameters for x1,x2,x3,x4,x5 and d3 nz = length(beta1) if (bh=='a') { wshape <- 1.0 tau1 <- dweibull(c(1:numper)*2.5/numper,shape=wshape,scale=1) } else if (bh=='b') { wshape <- 1.5 tau1 <- dweibull(c(1:numper)*0.5/numper,shape=wshape,scale=1) } else if (bh=='c') { tau1 <- rep(0.4,numper) } else if (bh=='d') { wshape <- 5.0 tau1 <- dweibull(c(1:numper)*2.5/numper,shape=wshape,scale=1) } gamma1 <- -3.5+log(tau1) parm.true <- beta1 # draw heterogeneity if (uh=='0') { uhet1 = rep(0,nloan) } else if (uh=='n') { sig1<- 1.0 mu1 <- (-0.5*sig1^2) uhet1 <- rnorm(nloan,mu1,sig1) } else if (uh=='g') { uhet1 <- log(rgamma(nloan,shape=1,scale=1)) } # draw termination U1 <- runif(nloan,0,1) # draw xvar (time invariant) sigmat <- matrix(c(1,-0.2,-0.2,1),nrow=2,ncol=2,byrow=TRUE) tsigchol <- t(chol(sigmat)) z1 <- rnorm(nloan,0,1) z2 <- rnorm(nloan,0,1) x1 <- z1 * tsigchol[1,1] + z2 * tsigchol[1,2] x2 <- z1 * tsigchol[2,1] + z2 * tsigchol[2,2] x3 <- rnorm(nloan,0,1) x3 <- ifelse(x3>0,1,0) d3 <- rep(1,nloan) # treatment group dt <- data.table(id=c(1:nloan),x1=x1,x2=x2,x3=x3,d3=d3,U1=U1,u1=uhet1) # create loan ids tmp <- matrix(c(1:nloan),nrow=nloan,ncol=numper) tmp <- t(tmp) vecid<- as.vector(tmp) # create period (loan.age) tmp <- matrix(c(1:numper),nrow=nloan,ncol=numper,byrow=TRUE) tmp <- t(tmp) vect<- as.vector(tmp) dtlong <- data.table(id=vecid,period=vect) dtlong <- dt[dtlong,on='id'] d3tim = data.table(id=c(1:nloan)) d3tim$d3start = 6 + floor(runif(nloan,0,6.9999)) dtlong = d3tim[dtlong,on='id'] dtlong[, d3 := ifelse(period0,S1/exp(-h1),1)] dtlong[status %in% c('default','both'),t1x:=-log(U1/S1L)/h1+period-1] # fixed on 2022/08/11 dtlong[, status1 := 0] dtlong[, status2 := 0] dtlong[, status1 := ifelse(status=='default',1,0)] dtlong[, status3 := 0] dtlong[, status4 := 0] # find continuous time dtlong[, t := t1x] dtlong[, start := shift(t,type='lag',n=1L,fill=0), by='id' ] dtlong[, end := ceiling(t)] ndat <- dtlong[,.(id,period,status1,status2,status3,status4,x1,x2,x3,d3)] save(ndat,file=fsimin) } } stopCluster(cl)