/*****************************************************************************/
/*** DESCRIPTION: REPLICATION CODE FOR "DIVERSITY AND CONFLICT"            ***/
/*** AUTHORS:     EREN ARBATLI, QUAMRUL ASHRAF, ODED GALOR, AND MARC KLEMP ***/
/*****************************************************************************/

/***********************************************************************************************************************************************************************/
/* TABLE: Population Diversity and the Frequency of Civil Conflict Onset across Countries - Robustness to Accounting for Population Diversity as a Generated Regressor */
/***********************************************************************************************************************************************************************/

# delimit ;
clear all ;

set more off ;

/*******************************************************************************/
/* Define programs for the two-step bootstrapped estimation of standard errors */
/*******************************************************************************/

capture program drop getbeta ;
program define getbeta, rclass ;
  version 14.2 ;
  syntax varlist if [, iv(varname numeric)] ;

  tokenize `varlist' ;
  local confvar `1' ;
  local pdivvar `2' ;
  macro shift 2 ;
  local ctlvars `*' ;

  marksample touse ;
  if "`iv'" == "" { ;
    qui regress ln`confvar' `pdivvar' `ctlvars' if `touse' ;
  } ;
  else { ;
    qui ivregress 2sls ln`confvar' `ctlvars' (`pdivvar' = `iv') if `touse' ;
  } ;
  qui sum `confvar' if e(sample) == 1 ;
  local avgconf = r(mean) ;
  _pctile `pdivvar' if e(sample) == 1, percentiles(10 90) ;
  local delta = r(r2) - r(r1) ;
  qui lincom ((1 + `avgconf') * `delta' * `pdivvar') ;
  matrix B = (e(b), r(estimate)) ;

  return matrix beta = B ;
  return scalar r2_a = e(r2_a) ;
  return scalar Nobs = e(N) ;
end ;

capture program drop tseboot ;
program define tseboot, eclass ;
  version 14.2 ;
  syntax varlist if [, iv(varname numeric)] ;

  tokenize `varlist' ;
  local confvar `1' ;
  local pdivvar `2' ;
  macro shift 2 ;
  local ctlvars `*' ;

  use "../../data/hgdp.dta", clear ;
  bsample 1, strata(country);
  regress adiv mdist ;

  use "../../data/aagk_xcountry_tmp.dta", clear ;
  marksample touse ;
  bsample if `touse' ;
  replace pdiv_aa = _b[_cons] + _b[mdist] * mdist_addis_aa ;
  if "`iv'" == "" { ;
    regress ln`confvar' `pdivvar' `ctlvars' ;
  } ;
  else { ;
    ivregress 2sls ln`confvar' `ctlvars' (`pdivvar' = `iv') ;
  } ;
  sum `confvar' ;
  local avgconf = r(mean) ;
  _pctile `pdivvar', percentiles(10 90) ;
  local delta = r(r2) - r(r1) ;
  lincom ((1 + `avgconf') * `delta' * `pdivvar') ;

  ereturn scalar meff = r(estimate) ;
end ;

/*******************/
/* Open a log file */
/*******************/

capture log close ;
log using "../../results/logs/xtab_boots.log", text replace ;

/*****************************************/
/* Load the AAGK cross-sectional dataset */
/*****************************************/

use "../../data/aagk_xcountry.dta", clear ;

/**************************************/
/* Apply relevant log transformations */
/**************************************/

gen lnnum_ccst_60_17_avg_pri1 = ln(1 + num_ccst_60_17_avg_pri1) ;

/***********************************************************************************************/
/* Rescale those RHS variables whose regression coefficients would otherwise appear as "0.000" */
/***********************************************************************************************/

foreach var of varlist abslat ruggavg elevavg elevrng distcr { ;
  replace `var' = `var' / 1000 ;
} ;

replace mdist_addis = mdist_addis / 10 ;

/**************************************************************************************/
/* Shorten variable names that are too long for the reporting of bootstrapped results */
/**************************************************************************************/

rename xconst_1960_2017_avg_p4_v17 xconst_60_17_avg_p4 ;
rename democ_1960_2017_shr_p4_v17  democ_60_17_shr_p4 ;
rename autoc_1960_2017_shr_p4_v17  autoc_60_17_shr_p4 ;

rename lnpop_1960_2017_avg_wdi          lnpop_60_17_avg_wdi ;
rename lngdppc_cu_usd_1960_2017_avg_wdi lngdppc_60_17_avg_wdi ;

/********************************************************/
/* Create global macro lists of the relevant covariates */
/********************************************************/

global ethfrac "efrac" ;
global ethpolr "des_pol15" ;
global geovars "abslat ruggavg elevavg elevrng suitavg suitrng distcr island" ;
global colhist "evercol_gbr evercol_fra evercol_oth" ;
global legalor "legor_uk legor_fr" ;
global exconst "xconst_60_17_avg_p4" ;
global polregs "democ_60_17_shr_p4 autoc_60_17_shr_p4" ;
global oilprod "anypetroleum_pet" ;
global popsize "lnpop_60_17_avg_wdi" ;
global ypercap "lngdppc_60_17_avg_wdi" ;
global contold "africa asia" ;
global contall "africa asia namerica samerica oceania" ;

/******************************************************/
/* Generate the sample indicators for the regressions */
/******************************************************/

qui egen smpl_flag1 = rowmiss(lnnum_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${ethfrac} ${ethpolr}) ;
qui replace smpl_flag1 = (smpl_flag1 == 0) ;

qui egen smpl_flag2 = rowmiss(lnnum_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${ethfrac} ${ethpolr} ${colhist} ${legalor} ${exconst} ${polregs} ${oilprod} ${popsize} ${ypercap}) ;
qui replace smpl_flag2 = (smpl_flag2 == 0) ;

/**************************************************/
/* Save the data changes to a temporary data file */
/**************************************************/

save "../../data/aagk_xcountry_tmp.dta", replace ;

/***********************/
/* Run the regressions */
/***********************/

/* -------- */
/* COLUMN 1 */
/* -------- */
use "../../data/aagk_xcountry_tmp.dta", clear ;

/* Obtain the relevant point estimates */
getbeta num_ccst_60_17_avg_pri1 pdiv_aa if smpl_flag1 == 1 ;
matrix beta = r(beta) ;

/* Store the adjusted R2 and the number of observations for subsequent use */
scalar r2_a = r(r2_a) ;
scalar Nobs = r(Nobs) ;

/* Obtain the bootstrapped standard error estimates */
simulate _b meff = e(meff), reps(1000) seed(12345): tseboot num_ccst_60_17_avg_pri1 pdiv_aa if smpl_flag1 == 1 ;
eststo col1: bstat, stat(beta) ;

/* Obtain the adjusted R2 and the number of observations */
estadd scalar adjr2 = r2_a : col1 ;
estadd scalar N_obs = Nobs : col1 ;

/* -------- */
/* COLUMN 2 */
/* -------- */
use "../../data/aagk_xcountry_tmp.dta", clear ;

/* Obtain the relevant point estimates */
getbeta num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} if smpl_flag1 == 1 ;
matrix beta = r(beta) ;

/* Store the adjusted R2 and the number of observations for subsequent use */
scalar r2_a = r(r2_a) ;
scalar Nobs = r(Nobs) ;

/* Obtain the bootstrapped standard error estimates */
simulate _b meff = e(meff), reps(1000) seed(12345): tseboot num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} if smpl_flag1 == 1 ;
eststo col2: bstat, stat(beta) ;

/* Obtain the adjusted R2 and the number of observations */
estadd scalar adjr2 = r2_a : col2 ;
estadd scalar N_obs = Nobs : col2 ;

/* -------- */
/* COLUMN 3 */
/* -------- */
use "../../data/aagk_xcountry_tmp.dta", clear ;

/* Obtain the relevant point estimates */
getbeta num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${contall} if smpl_flag1 == 1 ;
matrix beta = r(beta) ;

/* Store the adjusted R2 and the number of observations for subsequent use */
scalar r2_a = r(r2_a) ;
scalar Nobs = r(Nobs) ;

/* Obtain the bootstrapped standard error estimates */
simulate _b meff = e(meff), reps(1000) seed(12345): tseboot num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${contall} if smpl_flag1 == 1 ;
eststo col3: bstat, stat(beta) ;

/* Obtain the adjusted R2 and the number of observations */
estadd scalar adjr2 = r2_a : col3 ;
estadd scalar N_obs = Nobs : col3 ;

/* -------- */
/* COLUMN 4 */
/* -------- */
use "../../data/aagk_xcountry_tmp.dta", clear ;

/* Obtain the relevant point estimates */
getbeta num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${ethfrac} ${ethpolr} ${contall} if smpl_flag1 == 1 ;
matrix beta = r(beta) ;

/* Store the adjusted R2 and the number of observations for subsequent use */
scalar r2_a = r(r2_a) ;
scalar Nobs = r(Nobs) ;

/* Obtain the bootstrapped standard error estimates */
simulate _b meff = e(meff), reps(1000) seed(12345): tseboot num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${ethfrac} ${ethpolr} ${contall} if smpl_flag1 == 1 ;
eststo col4: bstat, stat(beta) ;

/* Obtain the adjusted R2 and the number of observations */
estadd scalar adjr2 = r2_a : col4 ;
estadd scalar N_obs = Nobs : col4 ;

/* -------- */
/* COLUMN 5 */
/* -------- */
use "../../data/aagk_xcountry_tmp.dta", clear ;

/* Obtain the relevant point estimates */
getbeta num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${ethfrac} ${ethpolr} ${colhist} ${legalor} ${exconst} ${polregs} ${oilprod} ${popsize} ${ypercap} ${contall} if smpl_flag2 == 1 ;
matrix beta = r(beta) ;

/* Store the adjusted R2 and the number of observations for subsequent use */
scalar r2_a = r(r2_a) ;
scalar Nobs = r(Nobs) ;

/* Obtain the bootstrapped standard error estimates */
simulate _b meff = e(meff), reps(1000) seed(12345): tseboot num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${ethfrac} ${ethpolr} ${colhist} ${legalor} ${exconst} ${polregs} ${oilprod} ${popsize} ${ypercap} ${contall} if smpl_flag2 == 1 ;
eststo col5: bstat, stat(beta) ;

/* Obtain the adjusted R2 and the number of observations */
estadd scalar adjr2 = r2_a : col5 ;
estadd scalar N_obs = Nobs : col5 ;

/* -------- */
/* COLUMN 6 */
/* -------- */
use "../../data/aagk_xcountry_tmp.dta", clear ;

/* Obtain the relevant point estimates */
getbeta num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${contold} if smpl_flag1 == 1 & oldw == 1 ;
matrix beta = r(beta) ;

/* Store the adjusted R2 and the number of observations for subsequent use */
scalar r2_a = r(r2_a) ;
scalar Nobs = r(Nobs) ;

/* Obtain the bootstrapped standard error estimates */
simulate _b meff = e(meff), reps(1000) seed(12345): tseboot num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${contold} if smpl_flag1 == 1 & oldw == 1 ;
eststo col6: bstat, stat(beta) ;

/* Obtain the adjusted R2 and the number of observations */
estadd scalar adjr2 = r2_a : col6 ;
estadd scalar N_obs = Nobs : col6 ;

/* -------- */
/* COLUMN 7 */
/* -------- */
use "../../data/aagk_xcountry_tmp.dta", clear ;

/* Obtain the relevant point estimates */
getbeta num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${ethfrac} ${ethpolr} ${colhist} ${legalor} ${exconst} ${polregs} ${oilprod} ${popsize} ${ypercap} ${contold} if smpl_flag2 == 1 & oldw == 1 ;
matrix beta = r(beta) ;

/* Store the adjusted R2 and the number of observations for subsequent use */
scalar r2_a = r(r2_a) ;
scalar Nobs = r(Nobs) ;

/* Obtain the bootstrapped standard error estimates */
simulate _b meff = e(meff), reps(1000) seed(12345): tseboot num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${ethfrac} ${ethpolr} ${colhist} ${legalor} ${exconst} ${polregs} ${oilprod} ${popsize} ${ypercap} ${contold} if smpl_flag2 == 1 & oldw == 1 ;
eststo col7: bstat, stat(beta) ;

/* Obtain the adjusted R2 and the number of observations */
estadd scalar adjr2 = r2_a : col7 ;
estadd scalar N_obs = Nobs : col7 ;

/* -------- */
/* COLUMN 8 */
/* -------- */
use "../../data/aagk_xcountry_tmp.dta", clear ;

/* Obtain the relevant point estimates */
getbeta num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${contall} if smpl_flag1 == 1, iv(mdist_addis) ;
matrix beta = r(beta) ;

/* Store the number of observations for subsequent use */
scalar Nobs = r(Nobs) ;

/* Obtain the bootstrapped standard error estimates */
simulate _b meff = e(meff), reps(1000) seed(12345): tseboot num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${contall} if smpl_flag1 == 1, iv(mdist_addis) ;
eststo col8: bstat, stat(beta) ;

/* Obtain the number of observations */
estadd scalar N_obs = Nobs : col8 ;

/* -------- */
/* COLUMN 9 */
/* -------- */
use "../../data/aagk_xcountry_tmp.dta", clear ;

/* Obtain the relevant point estimates */
getbeta num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${ethfrac} ${ethpolr} ${colhist} ${legalor} ${exconst} ${polregs} ${oilprod} ${popsize} ${ypercap} ${contall} if smpl_flag2 == 1, iv(mdist_addis) ;
matrix beta = r(beta) ;

/* Store the number of observations for subsequent use */
scalar Nobs = r(Nobs) ;

/* Obtain the bootstrapped standard error estimates */
simulate _b meff = e(meff), reps(1000) seed(12345): tseboot num_ccst_60_17_avg_pri1 pdiv_aa ${geovars} ${ethfrac} ${ethpolr} ${colhist} ${legalor} ${exconst} ${polregs} ${oilprod} ${popsize} ${ypercap} ${contall} if smpl_flag2 == 1, iv(mdist_addis) ;
eststo col9: bstat, stat(beta) ;

/* Obtain the number of observations */
estadd scalar N_obs = Nobs : col9 ;

/**************************************/
/* Print the results to a LaTeX table */
/**************************************/

estout col1 col2 col3 col4 col5 col6 col7 col8 col9 using "../../results/tables/xtab_boots.tex", style(tex) replace 
  cells(b(nostar fmt(a2)) se(fmt(a2) par)) keep(_b_pdiv_aa, relax) 
  indicate("Continent dummies=_b_africa _b_asia _b_namerica _b_samerica _b_oceania" 
           "Controls for geography=_b_abslat _b_ruggavg _b_elevavg _b_elevrng _b_suitavg _b_suitrng _b_distcr _b_island" 
           "Controls for ethnic diversity=_b_efrac _b_des_pol15" 
           "Controls for institutions=_b_evercol_gbr _b_evercol_fra _b_evercol_oth _b_legor_uk _b_legor_fr _b_xconst_60_17_avg_p4 _b_democ_60_17_shr_p4 _b_autoc_60_17_shr_p4" 
           "Controls for oil, population, and income=_b_anypetroleum_pet _b_lnpop_60_17_avg_wdi _b_lngdppc_60_17_avg_wdi", labels("\$\times\$" " ")) 
  stats(N_obs adjr2, fmt(%9.0f a2) labels("Observations" "Adjusted \$R^2\$") layout(@ @)) 
  varwidth(44) msign("\$-\$") nolabel 
  prehead("\begin{tabular*}{645pt}{@{\extracolsep{\fill}}lccccccccc}" 
          "\toprule" 
          "Cross-country sample:&\multicolumn{5}{c}{Global}&\multicolumn{2}{c}{Old World}&\multicolumn{2}{c}{Global}\\" 
          "\cmidrule(r){2-6}\cmidrule(lr){7-8}\cmidrule(l){9-10}") 
  numbers mlabels("OLS" "OLS" "OLS" "OLS" "OLS" "OLS" "OLS" "2SLS" "2SLS") collabels(none) 
  posthead("\midrule" 
           "&\multicolumn{9}{c}{Log number of new PRIO25 civil conflict onsets per year, 1960--2017}\\" 
           "\cmidrule{2-10}") 
  varlabels(_b_pdiv_aa "Population diversity (ancestry adjusted)", elist(_b_pdiv_aa \addlinespace)) 
  prefoot("\midrule") postfoot("\addlinespace") ;

estout col1 col2 col3 col4 col5 col6 col7 col8 col9 using "../../results/tables/xtab_boots.tex", style(tex) append 
  cells(b(nostar fmt(a2)) se(fmt(a2) par)) keep(_eq2_meff, relax) 
  varwidth(44) msign("\$-\$") nolabel 
  mlabels(none) collabels(none) 
  varlabels(_eq2_meff "Effect of 10th--90th \%ile move in diversity") 
  postfoot("\bottomrule\addlinespace" "\end{tabular*}") ;

/****************************************************************************************************************/
/* Erase the temporary data file from disk, clean-up stored estimates from memory, close the log file, and exit */
/****************************************************************************************************************/

erase "../../data/aagk_xcountry_tmp.dta" ;
est clear ;
log close ;

exit ;
