############################################################################################
# PLEASE READ
############################################################################################
# This is the R code to generate simulations in
# "Estimating Policy Impact in A Difference-in-Differences Hazard Model: A Simulation Study"
# by David A Hsieh
#
# Rules for using this R script:
# 1) You must cite the following article:
# David A Hsieh (2025) "Estimating Policy Impact in A Difference-in-Differences Hazard Model: A Simulation Study." Risks
# 2) You must place the following URL in a footnote to help others find this code:
# https://people.duke.edu/~dah7/OnlineAppendices/OnlineAppendixOneriskDiD.html
# 3) By downloading the R script, you confirm that you are assuming all risk for the use of this script 
############################################################################################

library(data.table)
library(doParallel)
library(doRNG)

dir_name = "D:/save_random_25k/"

versions = c("na","nb","nc","nd","ga","gb","gc","gd","0a","0b","0c","0d")

ncore = 10
# Register cluster
cl <- makeCluster(ncore)
registerDoParallel(cl)
clusterExport(cl,c("dir_name"),
              envir=.GlobalEnv)
set.seed(183)

foreach (iversion = c(1:12),
         .inorder = FALSE,
         .packages=c("data.table")) %dorng% {
           sim_version = versions[iversion]
           kvec <- c(1:110)
           
           for (isim in kvec) {
             
             uh = substr(sim_version,1,1)
             bh = substr(sim_version,2,2)
             
             if (isim < 10) {
               fsimin <- paste0(dir_name,"onedid",uh,'_25kw',bh,'_random_00',isim,'.rda')
             } else if (isim < 100) {
               fsimin <- paste0(dir_name,"onedid",uh,'_25kw',bh,'_random_0',isim,'.rda')
             } else {
               fsimin <- paste0(dir_name,"onedid",uh,'_25kw',bh,'_random_',isim,'.rda')
             }
             
             setwd("D:/save_random_25k/")
             
             nloan  <- 25000
             maxper <- 24
             nobs   <- nloan*maxper
             numper <- maxper
             
             # set coef and baseline
             beta1 <- c(1.0,0.5,-0.5,-0.1)             # parameters for x1,x2,x3,x4,x5 and d3
             nz = length(beta1)
             
             if (bh=='a') {
               wshape <- 1.0
               tau1  <- dweibull(c(1:numper)*2.5/numper,shape=wshape,scale=1)
             } else if (bh=='b') {
               wshape <- 1.5
               tau1  <- dweibull(c(1:numper)*0.5/numper,shape=wshape,scale=1)
             } else if (bh=='c') {
               tau1  <- rep(0.4,numper)
             } else if (bh=='d') {
               wshape <- 5.0
               tau1  <- dweibull(c(1:numper)*2.5/numper,shape=wshape,scale=1)
             }
             
             gamma1 <- -3.5+log(tau1)
             parm.true <- beta1
             
             # draw heterogeneity 
             if (uh=='0') {
               uhet1 = rep(0,nloan)
             } else if (uh=='n') {
               sig1<- 1.0
               mu1 <- (-0.5*sig1^2)
               uhet1 <- rnorm(nloan,mu1,sig1)
             } else if (uh=='g') {
               uhet1 <- log(rgamma(nloan,shape=1,scale=1))
             }
             
             # draw termination
             U1   <- runif(nloan,0,1)
             
             # draw xvar (time invariant)
             sigmat <- matrix(c(1,-0.2,-0.2,1),nrow=2,ncol=2,byrow=TRUE)
             tsigchol  <- t(chol(sigmat))
             z1 <- rnorm(nloan,0,1)
             z2 <- rnorm(nloan,0,1)
             x1 <- z1 * tsigchol[1,1] + z2 * tsigchol[1,2] 
             x2 <- z1 * tsigchol[2,1] + z2 * tsigchol[2,2] 
             
             x3 <- rnorm(nloan,0,1)
             x3 <- ifelse(x3>0,1,0)
             d3 <- rep(1,nloan)          # treatment group
             
             dt <- data.table(id=c(1:nloan),x1=x1,x2=x2,x3=x3,d3=d3,U1=U1,u1=uhet1)
             
             # create loan ids
             tmp <- matrix(c(1:nloan),nrow=nloan,ncol=numper)
             tmp <- t(tmp)
             vecid<- as.vector(tmp)
             
             # create period (loan.age)
             tmp <- matrix(c(1:numper),nrow=nloan,ncol=numper,byrow=TRUE)
             tmp <- t(tmp)
             vect<- as.vector(tmp)
             
             dtlong    <- data.table(id=vecid,period=vect)
             dtlong    <- dt[dtlong,on='id']
             
             d3tim = data.table(id=c(1:nloan))
             d3tim$d3start = 6 + floor(runif(nloan,0,6.9999))
             dtlong = d3tim[dtlong,on='id']
             dtlong[, d3 := ifelse(period<d3start,0,d3)] 
             dtlong[, d3start := NULL]
             
             dtlong[, a1 := rep(gamma1,nloan)]
             
             dtlong[, h1 := exp(a1 + beta1[1]*x1 + beta1[2]*x2 + beta1[3]*x3 + beta1[4]*x3*d3 + u1)]
             dtlong[, H1 := cumsum(h1), by='id']
             dtlong[, S1 := exp(-H1)]
             dtlong[, t1 := S1-U1]
             T <- dtlong[t1<0, .(mint=min(period)), by='id']
             dtlong <- T[dtlong,on='id']
             dtlong[, mint := ifelse(is.na(mint),numper,mint)]
             dtlong <- dtlong[period<=mint]
             
             dtlong[, status := 'censored']
             dtlong[period==mint,status:=  ifelse(t1<0,'default','censored')]
             # resolve between risk 1 and risk 2 if termination in the same period
             dtlong[, t1x := as.numeric(period)]
             dtlong[, S1L := ifelse(S1>0,S1/exp(-h1),1)]
             dtlong[status %in% c('default','both'),t1x:=-log(U1/S1L)/h1+period-1]  # fixed on 2022/08/11
             dtlong[, status1 := 0]
             dtlong[, status2 := 0]
             dtlong[, status1 := ifelse(status=='default',1,0)]
             dtlong[, status3 := 0]
             dtlong[, status4 := 0]
             
             # find continuous time
             dtlong[, t       := t1x]
             dtlong[, start   := shift(t,type='lag',n=1L,fill=0), by='id'  ]
             dtlong[, end     := ceiling(t)]
             
             ndat <- dtlong[,.(id,period,status1,status2,status3,status4,x1,x2,x3,d3)]
             save(ndat,file=fsimin)
             
           }

}
stopCluster(cl)