Giter Club home page Giter Club logo

multidiff's People

Contributors

matthieustigler avatar

Stargazers

 avatar  avatar

Watchers

 avatar

multidiff's Issues

test_event fails when not enough pre period

library(multiDiff)
mdd_data <- sim_dat_common(Time=2, timing_treatment = 2, as_mdd = TRUE)
mdd_test_pre_trend_event(mdd_dat=mdd_data)
#> Error in `[<-`(`*tmp*`, i, which(which_before)[i], value = 1): subscript out of bounds
mdd_test_pre_trend_means(mdd_dat=mdd_data)
#> # A tibble: 2 × 5
#>   test       coefficient statistic p.value term                                 
#>   <chr>            <dbl>     <dbl>   <dbl> <chr>                                
#> 1 test_joint     NA         0.0198   0.888 " = period1:treatcontrol - period1:t…
#> 2 test_indiv      0.0149    0.0198   0.888 "- period1:treatcontrol + period1:tr…

Created on 2023-10-28 with reprex v2.0.2

`mdd_data_format` fails when tr is logical

See:

library(multiDiff)

dat_DiD_raw <- sim_dat_common()
dat_DiD_raw$tr2 <- dat_DiD_raw$tr==1

a <- mdd_data_format(dat_DiD_raw, treat = "tr2")
a
#> Warning in min(treated_periods_clean): no non-missing arguments to min;
#> returning Inf
#> Warning in max(treated_periods_clean): no non-missing arguments to max;
#> returning -Inf
#> Error in min(treated_periods_clean):max(treated_periods_clean): result would be too long a vector

Created on 2023-11-04 with reprex v2.0.2

multi period DD: lots of errors

Get a lot of errors: Error in chol2inv(ch) : element (1, 1) is zero, so the inverse cannot be computed

This happens even if min_obs_required is increased... maybe no variation in y? Cf pollution covid project.

Diagnostic: check variance of y!?
If no variance, treat same as no obs!?

anova?

feols() residuals is muuuch faster!

very important to make sure the vars are as factor!

library(multiDiff)

## sim dat
dat_did_df <- sim_dat_common(timing_treatment = 5, seed = 97) |> 
  dplyr::mutate(across(c(Time, unit), as.factor))
head(dat_did_df)
#> # A tibble: 6 × 5
#>   unit  Time  treat_group    tr      y
#>   <fct> <fct> <chr>       <dbl>  <dbl>
#> 1 1     1     treated         0 -0.746
#> 2 1     2     treated         0 -1.30 
#> 3 1     3     treated         0 -1.25 
#> 4 1     4     treated         0 -0.991
#> 5 1     5     treated         1 -1.06 
#> 6 1     6     treated         0 -1.27

## get total var
var(dat_did_df$y)
#> [1] 2.713643
v_full <- var(residuals(fixest::feols(y~1, data=dat_did_df)))
all.equal(v_full, var(dat_did_df$y))
#> [1] TRUE

## get conditioanl vars
var2 <- function(x) drop(crossprod(x)/length(x))
res_FE_T <- residuals(fixest::feols(y~1|Time, data=dat_did_df))
v_FE_T <- var(res_FE_T)
res_FE_N <- residuals(fixest::feols(y~1|unit, data=dat_did_df))
v_FE_N <- var(res_FE_N)
v_FE_NT <- var(residuals(fixest::feols(y~1|Time+unit, data=dat_did_df)))

v_FE_T_then_N <- var(residuals(fixest::feols(res~1|unit, data=dat_did_df |> 
                                               dplyr::mutate(res = res_FE_T))))
v_FE_N_then_T <- var(residuals(fixest::feols(res~1|Time, data=dat_did_df |> 
                                               dplyr::mutate(res = res_FE_N))))

ano_manu_lm <- c(v_full=v_full, v_FE_T=v_FE_T, v_FE_N=v_FE_N, v_FE_NT=v_FE_NT,
                 v_FE_T_then_N = v_FE_T_then_N, v_FE_N_then_T=v_FE_N_then_T)
ano_manu_lm
#>        v_full        v_FE_T        v_FE_N       v_FE_NT v_FE_T_then_N 
#>     2.7136432     2.0314706     1.5900935     0.9079209     0.9079209 
#> v_FE_N_then_T 
#>     0.9079209
ano_manu_lm/ano_manu_lm[1]
#>        v_full        v_FE_T        v_FE_N       v_FE_NT v_FE_T_then_N 
#>     1.0000000     0.7486137     0.5859626     0.3345764     0.3345764 
#> v_FE_N_then_T 
#>     0.3345764

all.equal(ano_manu_lm[["v_FE_T_then_N"]],
          ano_manu_lm[["v_FE_NT"]])
#> [1] TRUE


## anova total SSR
SSR_tot_manu <- crossprod(dat_did_df$y-mean(dat_did_df$y)) |> drop()
SSR_tot_anova <- aov(y~Time, data =dat_did_df) |> broom::tidy() |> 
  dplyr::pull(sumsq) |> sum()
all.equal(SSR_tot_anova, SSR_tot_manu)
#> [1] TRUE

##
get_ssr_aov <- function(aov){
  aov |>
    broom::tidy() |> 
    dplyr::select(term, sumsq) |> 
    dplyr::mutate(perc = 100* sumsq/sum(sumsq)) 
}

## by Y
AOV_T <- aov(y~Time, data =dat_did_df) |> get_ssr_aov()
AOV_T
#> # A tibble: 2 × 3
#>   term       sumsq  perc
#>   <chr>      <dbl> <dbl>
#> 1 Time       6821.  25.1
#> 2 Residuals 20313.  74.9
all.equal(AOV_T$perc[1],
          100*(1-v_FE_T/v_full))
#> [1] TRUE


AOV_N <- aov(y~unit, data =dat_did_df) |> get_ssr_aov()
AOV_N
#> # A tibble: 2 × 3
#>   term       sumsq  perc
#>   <chr>      <dbl> <dbl>
#> 1 unit      11234.  41.4
#> 2 Residuals 15899.  58.6
all.equal(AOV_N$perc[1], 100*(1-v_FE_N/v_full))
#> [1] TRUE

AOV_NT <- aov(y~unit+Time, data =dat_did_df) |> get_ssr_aov()
AOV_TN <- aov(y~Time +unit, data =dat_did_df) |> get_ssr_aov()
all.equal(AOV_NT$perc[1:2], AOV_TN$perc[2:1])
#> [1] TRUE
AOV_NT
#> # A tibble: 3 × 3
#>   term       sumsq  perc
#>   <chr>      <dbl> <dbl>
#> 1 unit      11234.  41.4
#> 2 Time       6821.  25.1
#> 3 Residuals  9078.  33.5
AOV_TN
#> # A tibble: 3 × 3
#>   term       sumsq  perc
#>   <chr>      <dbl> <dbl>
#> 1 Time       6821.  25.1
#> 2 unit      11234.  41.4
#> 3 Residuals  9078.  33.5
all.equal(sum(AOV_NT$perc[1:2]), 100*(1-v_FE_NT/v_full))
#> [1] TRUE

Created on 2023-10-15 with reprex v2.0.2

estimator: doubly robust?

Useful to implement too? But only works for 2 x 2 DiD...

Here is some code, possibly redundant with new mdd_CS:

mdd_drdid <- function(mdd_dat, ...){
  
  mdd_slot <- multiDiff:::intrnl_mdd_get_mdd_slot(mdd_dat)
  mdd_vars <- mdd_slot$var_names
  mdd_var_unit <- mdd_vars$unit.index
  if(length(mdd_slot$periods)>2) stop("Only available for 2 time periods")
  
  require(DRDID)
  
  mdd_dat_df <- mdd_dat %>% 
    as.data.frame()
  
  ## make as numeric
  if(!is.numeric(mdd_dat_df[[mdd_vars$unit.index]])){
    mdd_dat_df <- mdd_dat_df %>% 
      mutate({{mdd_var_unit}} := utl_char_to_num(.[[mdd_vars$unit.index]]))
  }
  
  ## now run
  drdid(yname = mdd_vars$y_var, tname = mdd_vars$time.index,
        idname = mdd_vars$unit.index, 
        dname = mdd_vars$treat,
        data = as.data.frame(mdd_dat_df), ...)
}

## double rob
library(multiDiff)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
dat_full <- sim_dat_common(timing_treatment = 5:10, as_mdd = TRUE)
dat_T2 <- mdd_data_format(subset(dat_full, Time %in% c(5,6)))

mdd_drdid(dat_full)
#> Error in mdd_drdid(dat_full): Only available for 2 time periods
mdd_drdid(mdd_dat = dat_T2)
#> Loading required package: DRDID
#>  Call:
#> drdid(yname = mdd_vars$y_var, tname = mdd_vars$time.index, idname = mdd_vars$unit.index, 
#>     dname = mdd_vars$treat, data = as.data.frame(mdd_dat_df))
#> ------------------------------------------------------------------
#>  Further improved locally efficient DR DID estimator for the ATT:
#>  
#>    ATT     Std. Error  t value    Pr(>|t|)  [95% Conf. Interval] 
#>   0.1789     0.1005     1.7797     0.0751    -0.0181     0.3759  
#> ------------------------------------------------------------------
#>  Estimator based on panel data.
#>  Outcome regression est. method: weighted least squares.
#>  Propensity score est. method: inverse prob. tilting.
#>  Analytical standard error.
#> ------------------------------------------------------------------
#>  See Sant'Anna and Zhao (2020) for details.

Created on 2023-10-19 with reprex v2.0.2

estimator: MC?

Would be nice o implement MC estimator!?

Resourrces:

Issues:

current code:

library(MCPanel)
library(multiDiff)
library(tidyverse)


get_wide <- function(df, var_wide, unit_var, time_var){
  
  
  df %>% 
    select({{unit_var}}, {{time_var}}, {{var_wide}}) %>% 
    spread({{time_var}}, {{var_wide}}) %>% 
    select(-{{unit_var}}) %>% 
    as.matrix()
}

get_stats <- function(pred_diff, D) {
  preds_1 <- pred_diff[D==1]
  errors_0 <- pred_diff[D==0]
  beta <- mean(preds_1)
  rmse <- sqrt(mean(errors_0^2))
  c(beta=beta, rmse=rmse)
}

mdd_MC <- function(mdd_dat, ...){
  
  mdd_vars <- multiDiff:::intrnl_mdd_get_mdd_slot(mdd_dat)$var_names
  Y <-   get_wide(mdd_dat, var_wide = mdd_vars$y_var, unit_var = mdd_vars$unit.index, time_var = mdd_vars$time.index)
  D_treat <-   get_wide(mdd_dat, var_wide = mdd_vars$treat, unit_var = mdd_vars$unit.index, time_var = mdd_vars$time.index)
  D_Y0_obs <- 1-D_treat
  
  ##  DID
  Y_pred_DiD <- DID(M = Y, mask = D_Y0_obs)
  diff_DiD <- Y-Y_pred_DiD
  stats_DiD <- get_stats(diff_DiD, D_Y0_obs)
  
  ## MC
  MC_out <- mcnnm_cv(Y, mask=D_Y0_obs, to_estimate_u = 1, to_estimate_v = 1, ...)
  
  ## construc Y hat
  u <- MC_out$u
  v <- MC_out$v
  Y_pred <- MC_out$L + replicate(length(v), u) + t(replicate(length(u), v))
  
  ## get beta and errors
  diff_MC <- Y-Y_pred
  stats_MC <- get_stats(diff_MC, D_Y0_obs)
  
  ## collect results
  params <- c(get_stats(diff_MC, 1-D_Y0_obs), get_stats(diff_DiD, 1-D_Y0_obs))
  names(params) <- paste(rep(c("beta", "mse"), 2), rep(c("MC", "DID"), each=2))
  res <- list(params = params,
           best_lambda=MC_out$best_lambda,
           min_RMSE=MC_out$min_RMSE, lambda_L=MC_out$lambda_L, Avg_RMSE= MC_out$Avg_RMSE)
  class(res) <- "MC_out"
  res
}

print.MC_out <- function(x){
  print(x$params)
  cat("\noptimal lambda:", x$best_lambda)
}

## apply
dat_sim <- sim_dat_common(timing_treatment = 5:10, N = 100, as_mdd = TRUE, seed = 12342)
coef(mdd_DD_simple(dat_sim))
#>        tr 
#> 0.8600362
x <- mdd_MC(mdd_dat = dat_sim)
x
#>   beta MC    mse MC  beta DID   mse DID 
#> 0.8505727 0.8336386 0.8600362 0.2989928 
#> 
#> optimal lambda: 0.02080432

Created on 2023-10-20 with reprex v2.0.2

Using a time.index that leads to duplicated id leads to error

There should be an earlier warning/check, since otherwise it will take loooong time before complaining:

library(multiDiff)

dat_sim <- sim_dat_common(N = 2, timing_treatment = 6:10,beta =1.2) 
dat_sim$pre_post <- ifelse(dat_sim$Time<=5, 0, 1)

dat_sim |> 
  mdd_data_format(time.index = "pre_post") |> 
  mdd_group_means()
#> Error in `spread()`:
#> ! Each row of output must be identified by a unique combination of keys.
#> ℹ Keys are shared for 20 rows
#> • 1, 2, 3, 4, 5
#> • 11, 12, 13, 14, 15
#> • 6, 7, 8, 9, 10
#> • 16, 17, 18, 19, 20
#> Backtrace:
#>      ▆
#>   1. ├─multiDiff::mdd_group_means(mdd_data_format(dat_sim, time.index = "pre_post"))
#>   2. ├─multiDiff::mdd_data_format(dat_sim, time.index = "pre_post")
#>   3. │ ├─... %>% rename(.group = "seq")
#>   4. │ └─multiDiff:::get_sequences(...)
#>   5. │   └─... %>% tidyr::unite("seq", -!!enquo(unit.index))
#>   6. ├─dplyr::rename(., .group = "seq")
#>   7. ├─tidyr::unite(., "seq", -!!enquo(unit.index))
#>   8. ├─tidyr::spread(., !!enquo(time.index), !!enquo(treat))
#>   9. └─tidyr:::spread.data.frame(., !!enquo(time.index), !!enquo(treat))
#>  10.   └─cli::cli_abort(...)
#>  11.     └─rlang::abort(...)

Created on 2023-08-19 with reprex v2.0.2

lag_group: converts years intehger to double!?

Not sure why/when this happens!?

library(multiDiff)
library(tidyverse)

df_test <- data.frame(group = rep(c("a", "b"), each=6),
                      year = rep(2000:2005, 2),
                      value = (0:11) ^ 2,
                      value2 = rnorm(12)) %>% 
  as_tibble()
res <- lag_group(df_test, "group", time_var="year", value_var="value", lagamount = -1:2)

class(df_test$year)
#> [1] "integer"
class(res$year)
#> [1] "numeric"

Created on 2021-08-08 by the reprex package (v2.0.0)

Add texreg extract method?

Here's one possibility, should think about how to add the para test!?

library(multiDiff)
suppressPackageStartupMessages(library(texreg))


extract_mdd_DiD <- function(model, add_para = FALSE, ...) {
  
  extr_out <- texreg:::extract.fixest(model, ...)
  
  if(add_para) {
    if(!all(c("para_test_joint_pval", "para_test_joint_pval") %in% names(model))) stop("Ooutput should have added slot 'para_test_joint'")
    
    extr_out@gof.names <- c(extr_out@gof.names, "Parallel test: Wald stat", "Parallel test: p-val")
    extr_out@gof <- c(extr_out@gof, model$para_test_joint_val, model$para_test_joint_pval)
    extr_out@gof.decimal <- c(extr_out@gof.decimal, TRUE)
    
  }
  extr_out
}

DID_dat <- mdd_data_format(sim_dat_common(timing_treatment = 5:10))
mdd_out <- mdd_DD_simple(DID_dat)
mdd_event <- mdd_test_pre_trend_event(DID_dat)
mdd_out$para_test_joint_pval <- subset(mdd_event, test =="test_joint")$p.value
mdd_out$para_test_joint_val <- subset(mdd_event, test =="test_joint")$statistic
setMethod("extract", signature = className("mdd_DiD", "multiDiff"), definition = extract_mdd_DiD)

screenreg(mdd_out, add_para=TRUE)
#> 
#> ======================================
#>                           Model 1     
#> --------------------------------------
#> tr                            1.08 ***
#>                              (0.05)   
#> --------------------------------------
#> Num. obs.                 10000       
#> Num. groups: unit          1000       
#> Num. groups: Time            10       
#> R^2 (full model)              0.70    
#> R^2 (proj model)              0.05    
#> Adj. R^2 (full model)         0.66    
#> Adj. R^2 (proj model)         0.05    
#> Parallel test: Wald stat      2.98    
#> Parallel test: p-val          0.40    
#> ======================================
#> *** p < 0.001; ** p < 0.01; * p < 0.05

Created on 2023-06-07 with reprex v2.0.2

Chaisemartin weights: extensions

Would be nice to extend to:

  • covariates
  • 2 SLS?

Also, I need to compute the sigma_fe statistics, for now I don't get same results as in STATA/paper

lag_group rlang problem

Argument time_var cannot be used as external variable!?

library(multiDiff)

df_test <- data.frame(group = rep(c("a", "b"), each=6),
                      year = rep(2000:2005, 2),
                      value = (0:11) ^ 2,
                      value2 = rnorm(12))
out <- lag_group(df_test, "group", time_var="year", value_var="value", lagamount = -1:2)
time_var_char <- "year"
lag_group(df_test, group_var = "group",
          time_var=time_var_char, value_var="value", lagamount = -1:2)
#> Error: Join columns must be present in data.
#> x Problem with `time_var_char`.

Created on 2020-06-29 by the reprex package (v0.3.0)

DD: reg_out column showing up?

Happens when for one period there's no single regression to run!!

Fix: a conditional check for that column

library(multiDiff)
library(tidyverse, warn.conflicts=FALSE)
data <- sim_dat(N=100)
data2 <- data %>% 
  mutate(tr = if_else(Time %in% c(1,2), 0L, tr))
DD_out <- DD(data=data2)
DD_out
#> # A tibble: 56 x 15
#>     time   DiD treat control n_treat n_control n_min miss_data reg_out estimate
#>    <int> <int> <chr> <chr>     <dbl>     <dbl> <dbl> <lgl>     <lgl>      <dbl>
#>  1     2     1 0_1   0_0           0       100     0 TRUE      NA        NA    
#>  2     2     2 0_1   1_1           0         0     0 TRUE      NA        NA    
#>  3     2     3 1_0   0_0           0       100     0 TRUE      NA        NA    
#>  4     2     4 1_0   1_1           0         0     0 TRUE      NA        NA    
#>  5     3     1 0_1   0_0          26        74    26 FALSE     NA         0.929
#>  6     3     2 0_1   1_1          26         0     0 TRUE      NA        NA    
#>  7     3     3 1_0   0_0           0        74     0 TRUE      NA        NA    
#>  8     3     4 1_0   1_1           0         0     0 TRUE      NA        NA    
#>  9     4     1 0_1   0_0          14        60    14 FALSE     NA         0.976
#> 10     4     2 0_1   1_1          14         5     5 FALSE     NA         1.46 
#> # … with 46 more rows, and 5 more variables: std.error <dbl>, statistic <dbl>,
#> #   p.value <dbl>, conf.low <dbl>, conf.high <dbl>

Created on 2020-04-17 by the reprex package (v0.3.0)

mdd_test_pre_trend_event: ystem is computationally singular

See reprex, from Indo/ZDC/3_4_diff_diff_conc_Han_by_Gaveau_rSdWeCc

library(multiDiff)

dat <- tibble::tribble(
  ~conc_code,          ~spott_2D, ~dfrt_year,               ~value, ~treat_dummy_vs_Low,
  "BPN0094",  0.566273047398526,      2001L,  0.00543669185791016,                   0,
  "BPN0094",  0.566273047398526,      2002L,    0.154952919810576,                   0,
  "BPN0094",  0.566273047398526,      2003L,     0.25220622599571,                   0,
  "BPN0094",  0.566273047398526,      2004L,    0.315746200394933,                   0,
  "BPN0094",  0.566273047398526,      2005L,    0.180029643119543,                   0,
  "BPN0094",  0.566273047398526,      2006L,   0.0207953660780963,                   0,
  "BPN0094",  0.566273047398526,      2007L,    0.384359542766736,                   0,
  "BPN0094",  0.566273047398526,      2008L,     0.02779195348044,                   0,
  "BPN0094",  0.566273047398526,      2009L,    0.466887524299651,                   0,
  "BPN0094",  0.566273047398526,      2010L,    0.265588824567728,                   0,
  "BPN0094",  0.566273047398526,      2011L,    0.205191417162128,                   0,
  "BPN0094",  0.566273047398526,      2012L,    0.131933282165527,                   0,
  "BPN0094",  0.566273047398526,      2018L,    0.397275783422373,                   1,
  "BPN0094",  0.566273047398526,      2019L,    0.290129819592285,                   1,
  "BPN0094",  0.566273047398526,      2020L,     1.31209680395197,                   1,
  "BPN0346",  0.368222534455677,      2001L,     1.13305377082137,                   0,
  "BPN0346",  0.368222534455677,      2002L,      1.3453870783385,                   0,
  "BPN0346",  0.368222534455677,      2003L,    0.633215003553443,                   0,
  "BPN0346",  0.368222534455677,      2004L,     1.16776754014893,                   0,
  "BPN0346",  0.368222534455677,      2005L,    0.793520328454351,                   0,
  "BPN0346",  0.368222534455677,      2006L,     1.44589679970823,                   0,
  "BPN0346",  0.368222534455677,      2007L,    0.241681943444346,                   0,
  "BPN0346",  0.368222534455677,      2008L,    0.680613592480948,                   0,
  "BPN0346",  0.368222534455677,      2009L,    0.476324787054563,                   0,
  "BPN0346",  0.368222534455677,      2010L,    0.543399985161755,                   0,
  "BPN0346",  0.368222534455677,      2011L,     1.18186498097259,                   0,
  "BPN0346",  0.368222534455677,      2012L,    0.392517909344363,                   0,
  "BPN0346",  0.368222534455677,      2018L,     0.79324921717242,                   0,
  "BPN0346",  0.368222534455677,      2019L,     1.61538542833491,                   0,
  "BPN0346",  0.368222534455677,      2020L,    0.695718280317718,                   0,
  "BPN0457",  0.251929652516488,      2001L,   0.0784614347069834,                   0,
  "BPN0457",  0.251929652516488,      2002L,     0.30932281510656,                   0,
  "BPN0457",  0.251929652516488,      2003L,    0.111745521789551,                   0,
  "BPN0457",  0.251929652516488,      2004L,     1.01505123693704,                   0,
  "BPN0457",  0.251929652516488,      2005L,      0.2069475367446,                   0,
  "BPN0457",  0.251929652516488,      2006L,    0.721351229225789,                   0,
  "BPN0457",  0.251929652516488,      2007L,    0.533027676591222,                   0,
  "BPN0457",  0.251929652516488,      2008L,    0.135161774291992,                   0,
  "BPN0457",  0.251929652516488,      2009L,    0.793244776468672,                   0,
  "BPN0457",  0.251929652516488,      2010L,    0.967185140781537,                   0,
  "BPN0457",  0.251929652516488,      2011L,    0.496725012133789,                   0,
  "BPN0457",  0.251929652516488,      2012L,     1.14639059801552,                   0,
  "BPN0457",  0.251929652516488,      2018L,    0.131480254230813,                   0,
  "BPN0457",  0.251929652516488,      2019L,    0.222580464046942,                   0,
  "BPN0457",  0.251929652516488,      2020L,    0.689336785214413,                   0,
  "BPN0492",  0.110169163642794,      2001L,    0.160388734756769,                   0,
  "BPN0492",  0.110169163642794,      2002L,   0.0142008914435892,                   0,
  "BPN0492",  0.110169163642794,      2003L,   0.0118555207744524,                   0,
  "BPN0492",  0.110169163642794,      2004L,   0.0255097491213331,                   0,
  "BPN0492",  0.110169163642794,      2005L,  0.00855857706131281,                   0,
  "BPN0492",  0.110169163642794,      2006L,   0.0378714713472254,                   0,
  "BPN0492",  0.110169163642794,      2007L,     0.02341088689276,                   0,
  "BPN0492",  0.110169163642794,      2008L,    0.622882192426613,                   0,
  "BPN0492",  0.110169163642794,      2009L,    0.585626616019216,                   0,
  "BPN0492",  0.110169163642794,      2010L,    0.072957689891142,                   0,
  "BPN0492",  0.110169163642794,      2011L,   0.0102898817543179,                   0,
  "BPN0492",  0.110169163642794,      2012L,   0.0975056053859337,                   0,
  "BPN0492",  0.110169163642794,      2018L,   0.0281700942938113,                   0,
  "BPN0492",  0.110169163642794,      2019L,  0.00720479454130285,                   0,
  "BPN0492",  0.110169163642794,      2020L,   0.0258329786893957,                   0,
  "BPN0862", 0.0367990418332638,      2001L,                    0,                   0,
  "BPN0862", 0.0367990418332638,      2002L,  0.00260029174804687,                   0,
  "BPN0862", 0.0367990418332638,      2003L,                    0,                   0,
  "BPN0862", 0.0367990418332638,      2004L,   0.0580177041025199,                   0,
  "BPN0862", 0.0367990418332638,      2005L,  0.00259937353515625,                   0,
  "BPN0862", 0.0367990418332638,      2006L,   0.0425533268911324,                   0,
  "BPN0862", 0.0367990418332638,      2007L,   0.0554715089111328,                   0,
  "BPN0862", 0.0367990418332638,      2008L,   0.0760603857598997,                   0,
  "BPN0862", 0.0367990418332638,      2009L,   0.0290339904541016,                   0,
  "BPN0862", 0.0367990418332638,      2010L,   0.0273428244717467,                   0,
  "BPN0862", 0.0367990418332638,      2011L,   0.0192360107110715,                   0,
  "BPN0862", 0.0367990418332638,      2012L,    0.382847344033395,                   0,
  "BPN0862", 0.0367990418332638,      2018L,  0.00173357684326172,                   0,
  "BPN0862", 0.0367990418332638,      2019L,   0.0155887941736558,                   0,
  "BPN0862", 0.0367990418332638,      2020L,   0.0562961668366077,                   0,
  "GPC0289",  0.474758361695789,      2001L,    0.262239373413086,                   0,
  "GPC0289",  0.474758361695789,      2002L,    0.136763531924977,                   0,
  "GPC0289",  0.474758361695789,      2003L,    0.125454442006549,                   0,
  "GPC0289",  0.474758361695789,      2004L,    0.210493823756319,                   0,
  "GPC0289",  0.474758361695789,      2005L,   0.0162494674050724,                   0,
  "GPC0289",  0.474758361695789,      2006L,   0.0113243975219727,                   0,
  "GPC0289",  0.474758361695789,      2007L,    0.164942772095445,                   0,
  "GPC0289",  0.474758361695789,      2008L,    0.860796221710804,                   0,
  "GPC0289",  0.474758361695789,      2009L,     1.41099162853573,                   0,
  "GPC0289",  0.474758361695789,      2010L,   0.0653400502929687,                   0,
  "GPC0289",  0.474758361695789,      2011L,     0.10454949810791,                   0,
  "GPC0289",  0.474758361695789,      2012L,    0.613592416445983,                   0,
  "GPC0289",  0.474758361695789,      2018L,    0.288197625913373,                   0,
  "GPC0289",  0.474758361695789,      2019L,    0.568458455811264,                   0,
  "GPC0289",  0.474758361695789,      2020L,    0.264009170564539,                   0,
  "GPC0579",  0.110169163642794,      2001L,    0.277744137578747,                   0,
  "GPC0579",  0.110169163642794,      2002L,    0.531491778507725,                   0,
  "GPC0579",  0.110169163642794,      2003L,    0.662143395236146,                   0,
  "GPC0579",  0.110169163642794,      2004L,     0.68972221359145,                   0,
  "GPC0579",  0.110169163642794,      2005L,    0.587857206413718,                   0,
  "GPC0579",  0.110169163642794,      2006L,    0.757148620915671,                   0,
  "GPC0579",  0.110169163642794,      2007L,    0.253449473517923,                   0,
  "GPC0579",  0.110169163642794,      2008L,    0.128805976102702,                   0,
  "GPC0579",  0.110169163642794,      2009L,     1.49804019674623,                   0,
  "GPC0579",  0.110169163642794,      2010L,    0.455303483190439,                   0,
  "GPC0579",  0.110169163642794,      2011L,     0.99147353292811,                   0,
  "GPC0579",  0.110169163642794,      2012L,     1.33657659514854,                   0,
  "GPC0579",  0.110169163642794,      2018L,    0.133220993566416,                   0,
  "GPC0579",  0.110169163642794,      2019L,   0.0842421197741939,                   0,
  "GPC0579",  0.110169163642794,      2020L,   0.0405333830362956,                   0,
  "GPC0863",  0.383243675993051,      2001L,   0.0271046865234375,                   0,
  "GPC0863",  0.383243675993051,      2002L,    0.332774439979463,                   0,
  "GPC0863",  0.383243675993051,      2003L,    0.174051236588542,                   0,
  "GPC0863",  0.383243675993051,      2004L,    0.270385926310939,                   0,
  "GPC0863",  0.383243675993051,      2005L,    0.233477186279297,                   0,
  "GPC0863",  0.383243675993051,      2006L,     0.26746463915513,                   0,
  "GPC0863",  0.383243675993051,      2007L,   0.0865898879296396,                   0,
  "GPC0863",  0.383243675993051,      2008L,  0.00349633685302734,                   0,
  "GPC0863",  0.383243675993051,      2009L,    0.348548097385302,                   0,
  "GPC0863",  0.383243675993051,      2010L,   0.0577083878173828,                   0,
  "GPC0863",  0.383243675993051,      2011L,   0.0848166971435547,                   0,
  "GPC0863",  0.383243675993051,      2012L,     2.40003533505334,                   0,
  "GPC0863",  0.383243675993051,      2018L,   0.0834559284301758,                   0,
  "GPC0863",  0.383243675993051,      2019L,    0.113660851137169,                   0,
  "GPC0863",  0.383243675993051,      2020L,   0.0521243060510972,                   0,
  "GPC0864",  0.383243675993051,      2001L,    0.190003904052734,                   0,
  "GPC0864",  0.383243675993051,      2002L,    0.440765878322467,                   0,
  "GPC0864",  0.383243675993051,      2003L,     0.30419754265759,                   0,
  "GPC0864",  0.383243675993051,      2004L,     1.05182007931172,                   0,
  "GPC0864",  0.383243675993051,      2005L,    0.644986196769924,                   0,
  "GPC0864",  0.383243675993051,      2006L,     0.68958050794989,                   0,
  "GPC0864",  0.383243675993051,      2007L,    0.467900706172928,                   0,
  "GPC0864",  0.383243675993051,      2008L,    0.116762469238281,                   0,
  "GPC0864",  0.383243675993051,      2009L,     2.22643830122573,                   0,
  "GPC0864",  0.383243675993051,      2010L,    0.176228899863329,                   0,
  "GPC0864",  0.383243675993051,      2011L,    0.305804677684829,                   0,
  "GPC0864",  0.383243675993051,      2012L,     1.77858370625766,                   0,
  "GPC0864",  0.383243675993051,      2018L,    0.375146618177945,                   0,
  "GPC0864",  0.383243675993051,      2019L,    0.512858270555922,                   0,
  "GPC0864",  0.383243675993051,      2020L,    0.467502245133225,                   0,
  "GPC0865",  0.383243675993051,      2001L,   0.0208965475463867,                   0,
  "GPC0865",  0.383243675993051,      2002L,    0.062822830598719,                   0,
  "GPC0865",  0.383243675993051,      2003L,     0.29579998631903,                   0,
  "GPC0865",  0.383243675993051,      2004L,    0.272376023067938,                   0,
  "GPC0865",  0.383243675993051,      2005L,   0.0256870812808766,                   0,
  "GPC0865",  0.383243675993051,      2006L,    0.026060984055463,                   0,
  "GPC0865",  0.383243675993051,      2007L,   0.0148021076660156,                   0,
  "GPC0865",  0.383243675993051,      2008L,    0.183653233812758,                   0,
  "GPC0865",  0.383243675993051,      2009L,     0.15448628186825,                   0,
  "GPC0865",  0.383243675993051,      2010L,   0.0466292853881836,                   0,
  "GPC0865",  0.383243675993051,      2011L,    0.123644021789551,                   0,
  "GPC0865",  0.383243675993051,      2012L,     1.60555653680396,                   0,
  "GPC0865",  0.383243675993051,      2018L,     1.06293804834487,                   0,
  "GPC0865",  0.383243675993051,      2019L,    0.425658271461397,                   0,
  "GPC0865",  0.383243675993051,      2020L,    0.454896897400142,                   0,
  "GPC0971",  0.383243675993051,      2001L,                    0,                   0,
  "GPC0971",  0.383243675993051,      2002L, 0.000881309265136719,                   0,
  "GPC0971",  0.383243675993051,      2003L,                    0,                   0,
  "GPC0971",  0.383243675993051,      2004L,                    0,                   0,
  "GPC0971",  0.383243675993051,      2005L,                    0,                   0,
  "GPC0971",  0.383243675993051,      2006L,                    0,                   0,
  "GPC0971",  0.383243675993051,      2007L,                    0,                   0,
  "GPC0971",  0.383243675993051,      2008L,                    0,                   0,
  "GPC0971",  0.383243675993051,      2009L,                    0,                   0,
  "GPC0971",  0.383243675993051,      2010L,  0.00254376197150735,                   0,
  "GPC0971",  0.383243675993051,      2011L,  0.00352524682617187,                   0,
  "GPC0971",  0.383243675993051,      2012L, 0.000881362976074219,                   0,
  "GPC0971",  0.383243675993051,      2018L,                    0,                   0,
  "GPC0971",  0.383243675993051,      2019L,                    0,                   0,
  "GPC0971",  0.383243675993051,      2020L,                    0,                   0,
  "M0112",  0.765460169372274,      2001L,   0.0055585267712163,                   0,
  "M0112",  0.765460169372274,      2002L,   0.0026099345703125,                   0,
  "M0112",  0.765460169372274,      2003L,                    0,                   0,
  "M0112",  0.765460169372274,      2004L,                    0,                   0,
  "M0112",  0.765460169372274,      2005L, 0.000218401784620098,                   0,
  "M0112",  0.765460169372274,      2006L,                    0,                   0,
  "M0112",  0.765460169372274,      2007L,                    0,                   0,
  "M0112",  0.765460169372274,      2008L,                    0,                   0,
  "M0112",  0.765460169372274,      2009L,                    0,                   0,
  "M0112",  0.765460169372274,      2010L,  0.00178460512886795,                   0,
  "M0112",  0.765460169372274,      2011L,                    0,                   0,
  "M0112",  0.765460169372274,      2012L,   0.0114464961251053,                   0,
  "M0112",  0.765460169372274,      2018L,  0.00171900321332146,                   1,
  "M0112",  0.765460169372274,      2019L,  0.00352727472665824,                   1,
  "M0112",  0.765460169372274,      2020L,  0.00957119470214844,                   1
)


dat_reg_H_vs_L <- mdd_data_format(dat, y_var = "value", treat = "treat_dummy_vs_Low", 
                                  time.index ="dfrt_year", unit.index = "conc_code")  

mdd_DD_simple(dat_reg_H_vs_L)
#> OLS estimation, Dep. Var.: value
#> Observations: 180 
#> Fixed-effects: conc_code: 12,  dfrt_year: 15
#> Standard-errors: Clustered (conc_code) 
#>                    Estimate Std. Error t value Pr(>|t|) 
#> treat_dummy_vs_Low 0.315623   0.200063 1.57762  0.14296 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 0.316483     Adj. R2: 0.419741
#>                  Within R2: 0.021624
mdd_test_pre_trend_event(dat_reg_H_vs_L)
#> Error in solve.default(vcov.hyp): system is computationally singular: reciprocal condition number = 7.02236e-18

Created on 2022-12-21 with reprex v2.0.2

warning with irregular years: longer object length is not a multiple of shorter object length

Code currently assumes regular years, otherwise throws warning:

#> Warning in treated_periods_clean == expected_seq: longer object length is not a
#> multiple of shorter object length

library(multiDiff)
dat_DiD_raw <- sim_dat_common()
dat_DiD <- mdd_data_format(subset(dat_DiD_raw, Time !=3))
dat_DiD
#> Warning in treated_periods_clean == expected_seq: longer object length is not a
#> multiple of shorter object length
#> ### MDD data
#>  -Design type:  classical 
#>  -Reversible treatment:  FALSE 
#>  -N units: 1000. Treated: 250
#>  -N treatment sequences:  2 
#>  -T periods: 9. Treated: 8
#>  -Treated periods:  2 4 5 6 7 8 9 10

Created on 2023-07-16 with reprex v2.0.2

Better warning for impossible pre-trend test

Should check if there are enough pre-periods!

library(multiDiff)
dat_DiD_raw <- sim_dat_common(Time = 2)
dat_DiD <- mdd_data_format(dat_DiD_raw)

DD <- mdd_DD_simple(dat_DiD)
event <- mdd_event_study(dat_DiD)
pre_test <- mdd_test_pre_trend_event(event)
#> Error in `[<-`(`*tmp*`, i, which(which_before)[i], value = 1): subscript out of bounds

Created on 2023-07-16 with reprex v2.0.2

DD why n/2 first round?

I got that:

CNT %>%

  • distinct(time, Tot)

A tibble: 4 x 2

time Tot

1 2 100000
2 3 200000
3 4 200000
4 5 200000

mdd_group_means: add arg to compute just before/after!?

For now mdd_group_means computes for all period before after, would be nice to have arg to just compute pre/post? Note that only makes sense for "common" design.

Current code:

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(multiDiff)

set.seed(123)
dat_sim <- sim_dat_common(N = 10, timing_treatment = 6:10,beta =1.2) 
dat_sim$pre_post <- ifelse(dat_sim$Time<=5, 0, 1)

dat_sim |> 
  group_by(pre_post, treat_group) |> 
  summarise(y=mean(y))
#> `summarise()` has grouped output by 'pre_post'. You can override using the
#> `.groups` argument.
#> # A tibble: 4 × 3
#> # Groups:   pre_post [2]
#>   pre_post treat_group       y
#>      <dbl> <chr>         <dbl>
#> 1        0 control      0.0124
#> 2        0 treated     -0.446 
#> 3        1 control      0.600 
#> 4        1 treated      1.37

dat_sim |> 
  mdd_data_format() |> 
  mdd_group_means() |> 
  mutate(pre_post = ifelse(Time<=5, 0, 1)) |> 
  group_by(pre_post, .group) |> 
  summarise(y=mean(y))
#> `summarise()` has grouped output by 'pre_post'. You can override using the
#> `.groups` argument.
#> # A tibble: 4 × 3
#> # Groups:   pre_post [2]
#>   pre_post .group        y
#>      <dbl> <chr>     <dbl>
#> 1        0 control  0.0124
#> 2        0 treated -0.446 
#> 3        1 control  0.600 
#> 4        1 treated  1.37

Created on 2023-08-19 with reprex v2.0.2

DD_manu: should take values of subperiod as ordered

Here it is showing post then pre, inverting the order!?

library(multiDiff)
library(tidyverse)

dat_reprex <- sim_dat_staggered(Time=10, N = 100, timing_treatment=6, perc_treat=0.5, perc_never=0.5,
                               gamma=0.2)

DD_manu(dat_reprex,
        control_gr = "0_0_0_0_0_0_0_0_0_0",
        treat_gr = "0_0_0_0_0_1_1_1_1_1",
        subperiod= rep(c("pre", "post"), each=5))
#> # A tibble: 3 x 5
#>   .period   group_0_0_0_0_0_0_0_0_0_0 group_0_0_0_0_0_1_1_1_1…   diff diff_check
#>   <chr>                         <dbl>                    <dbl>  <dbl>      <dbl>
#> 1 post                         -1.81                    -0.460  1.35       1.35 
#> 2 pre                          -0.801                   -1.07  -0.267     -0.267
#> 3 Diff Time                     1.01                    -0.608 -1.61      -1.61

DD_manu(dat_reprex,
        control_gr = "0_0_0_0_0_0_0_0_0_0",
        treat_gr = "0_0_0_0_0_1_1_1_1_1",
        subperiod= factor(rep(c("pre", "post"), each=5),
                          levels=c("pre", "post")))
#> # A tibble: 3 x 5
#>   .period   group_0_0_0_0_0_0_0_0_0_0 group_0_0_0_0_0_1_1_1_1…   diff diff_check
#>   <chr>                         <dbl>                    <dbl>  <dbl>      <dbl>
#> 1 pre                          -0.801                   -1.07  -0.267     -0.267
#> 2 post                         -1.81                    -0.460  1.35       1.35 
#> 3 Diff Time                    -1.01                     0.608  1.61       1.61

Created on 2020-06-10 by the reprex package (v0.3.0)

cross-sectional data: mdd_group_means will not work

related to #19

library(dplyr, warn.conflicts = FALSE)
library(multiDiff)

df_raw <- sim_dat_common(Time=4, timing_treatment = 3, perc_treat = 0.5, seed = 45)

df_cross <- df_raw |>
  mutate(keep = rep(c(TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE), times =500)) |>
  filter(keep) |>
  select(-keep)

mdd_FE <- mdd_data_format(df_cross)
mdd_cr <- mdd_data_format(df_cross, unit.index = "treat_group")

mdd_group_means(mdd_FE)
#> # A tibble: 6 × 3
#>   .group     Time      y
#>   <chr>     <int>  <dbl>
#> 1 0_NA_NA_0     1  1.45 
#> 2 0_NA_NA_0     4 -0.787
#> 3 NA_0_0_NA     2  0.550
#> 4 NA_0_0_NA     3 -0.304
#> 5 NA_0_1_NA     2  0.401
#> 6 NA_0_1_NA     3  0.560
mdd_group_means(mdd_cr)
#> Error in `spread()`:
#> ! Each row of output must be identified by a unique combination of keys.
#> ℹ Keys are shared for 2000 rows
#> • 1001, 1005, 1009, 1013, 1017, 1021, 1025, 1029, 1033, 1037, 1041, 1045, 1049,
#>   1053, 1057, 1061, 1065, 1069, 1073, 1077, 1081, 1085, 1089, 1093, 1097, 1101,
#>   1105, 1109, 1113, 1117, 1121, 1125, 1129, 1133, 1137, 1141, 1145, 1149, 1153,
#>   1157, 1161, 1165, 1169, 1173, 1177, 1181, 1185, 1189, 1193, 1197, 1201, 1205,
#>   1209, 1213, 1217, 1221, 1225, 1229, 1233, 1237, 1241, 1245, 1249, 1253, 1257,
#>   1261, 1265, 1269, 1273, 1277, 1281, 1285, 1289, 1293, 1297, 1301, 1305, 1309,
#>   1313, 1317, 1321, 1325, 1329, 1333, 1337, 1341, 1345, 1349, 1353, 1357, 1361,
#>   1365, 1369, 1373, 1377, 1381, 1385, 1389, 1393, 1397, 1401, 1405, 1409, 1413,
#>   1417, 1421, 1425, 1429, 1433, 1437, 1441, 1445, 1449, 1453, 1457, 1461, 1465,
#>   1469, 1473, 1477, 1481, 1485, 1489, 1493, 1497, 1501, 1505, 1509, 1513, 1517,
#>   1521, 1525, 1529, 1533, 1537, 1541, 1545, 1549, 1553, 1557, 1561, 1565, 1569,
#>   1573, 1577, 1581, 1585, 1589, 1593, 1597, 1601, 1605, 1609, 1613, 1617, 1621,
#>   1625, 1629, 1633, 1637, 1641, 1645, 1649, 1653, 1657, 1661, 1665, 1669, 1673,
#>   1677, 1681, 1685, 1689, 1693, 1697, 1701, 1705, 1709, 1713, 1717, 1721, 1725,
#>   1729, 1733, 1737, 1741, 1745, 1749, 1753, 1757, 1761, 1765, 1769, 1773, 1777,
#>   1781, 1785, 1789, 1793, 1797, 1801, 1805, 1809, 1813, 1817, 1821, 1825, 1829,
#>   1833, 1837, 1841, 1845, 1849, 1853, 1857, 1861, 1865, 1869, 1873, 1877, 1881,
#>   1885, 1889, 1893, 1897, 1901, 1905, 1909, 1913, 1917, 1921, 1925, 1929, 1933,
#>   1937, 1941, 1945, 1949, 1953, 1957, 1961, 1965, 1969, 1973, 1977, 1981, 1985,
#>   1989, 1993, 1997
#> • 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77,
#>   81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125, 129, 133, 137, 141,
#>   145, 149, 153, 157, 161, 165, 169, 173, 177, 181, 185, 189, 193, 197, 201,
#>   205, 209, 213, 217, 221, 225, 229, 233, 237, 241, 245, 249, 253, 257, 261,
#>   265, 269, 273, 277, 281, 285, 289, 293, 297, 301, 305, 309, 313, 317, 321,
#>   325, 329, 333, 337, 341, 345, 349, 353, 357, 361, 365, 369, 373, 377, 381,
#>   385, 389, 393, 397, 401, 405, 409, 413, 417, 421, 425, 429, 433, 437, 441,
#>   445, 449, 453, 457, 461, 465, 469, 473, 477, 481, 485, 489, 493, 497, 501,
#>   505, 509, 513, 517, 521, 525, 529, 533, 537, 541, 545, 549, 553, 557, 561,
#>   565, 569, 573, 577, 581, 585, 589, 593, 597, 601, 605, 609, 613, 617, 621,
#>   625, 629, 633, 637, 641, 645, 649, 653, 657, 661, 665, 669, 673, 677, 681,
#>   685, 689, 693, 697, 701, 705, 709, 713, 717, 721, 725, 729, 733, 737, 741,
#>   745, 749, 753, 757, 761, 765, 769, 773, 777, 781, 785, 789, 793, 797, 801,
#>   805, 809, 813, 817, 821, 825, 829, 833, 837, 841, 845, 849, 853, 857, 861,
#>   865, 869, 873, 877, 881, 885, 889, 893, 897, 901, 905, 909, 913, 917, 921,
#>   925, 929, 933, 937, 941, 945, 949, 953, 957, 961, 965, 969, 973, 977, 981,
#>   985, 989, 993, 997
#> • 1003, 1007, 1011, 1015, 1019, 1023, 1027, 1031, 1035, 1039, 1043, 1047, 1051,
#>   1055, 1059, 1063, 1067, 1071, 1075, 1079, 1083, 1087, 1091, 1095, 1099, 1103,
#>   1107, 1111, 1115, 1119, 1123, 1127, 1131, 1135, 1139, 1143, 1147, 1151, 1155,
#>   1159, 1163, 1167, 1171, 1175, 1179, 1183, 1187, 1191, 1195, 1199, 1203, 1207,
#>   1211, 1215, 1219, 1223, 1227, 1231, 1235, 1239, 1243, 1247, 1251, 1255, 1259,
#>   1263, 1267, 1271, 1275, 1279, 1283, 1287, 1291, 1295, 1299, 1303, 1307, 1311,
#>   1315, 1319, 1323, 1327, 1331, 1335, 1339, 1343, 1347, 1351, 1355, 1359, 1363,
#>   1367, 1371, 1375, 1379, 1383, 1387, 1391, 1395, 1399, 1403, 1407, 1411, 1415,
#>   1419, 1423, 1427, 1431, 1435, 1439, 1443, 1447, 1451, 1455, 1459, 1463, 1467,
#>   1471, 1475, 1479, 1483, 1487, 1491, 1495, 1499, 1503, 1507, 1511, 1515, 1519,
#>   1523, 1527, 1531, 1535, 1539, 1543, 1547, 1551, 1555, 1559, 1563, 1567, 1571,
#>   1575, 1579, 1583, 1587, 1591, 1595, 1599, 1603, 1607, 1611, 1615, 1619, 1623,
#>   1627, 1631, 1635, 1639, 1643, 1647, 1651, 1655, 1659, 1663, 1667, 1671, 1675,
#>   1679, 1683, 1687, 1691, 1695, 1699, 1703, 1707, 1711, 1715, 1719, 1723, 1727,
#>   1731, 1735, 1739, 1743, 1747, 1751, 1755, 1759, 1763, 1767, 1771, 1775, 1779,
#>   1783, 1787, 1791, 1795, 1799, 1803, 1807, 1811, 1815, 1819, 1823, 1827, 1831,
#>   1835, 1839, 1843, 1847, 1851, 1855, 1859, 1863, 1867, 1871, 1875, 1879, 1883,
#>   1887, 1891, 1895, 1899, 1903, 1907, 1911, 1915, 1919, 1923, 1927, 1931, 1935,
#>   1939, 1943, 1947, 1951, 1955, 1959, 1963, 1967, 1971, 1975, 1979, 1983, 1987,
#>   1991, 1995, 1999
#> • 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63, 67, 71, 75, 79,
#>   83, 87, 91, 95, 99, 103, 107, 111, 115, 119, 123, 127, 131, 135, 139, 143,
#>   147, 151, 155, 159, 163, 167, 171, 175, 179, 183, 187, 191, 195, 199, 203,
#>   207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255, 259, 263,
#>   267, 271, 275, 279, 283, 287, 291, 295, 299, 303, 307, 311, 315, 319, 323,
#>   327, 331, 335, 339, 343, 347, 351, 355, 359, 363, 367, 371, 375, 379, 383,
#>   387, 391, 395, 399, 403, 407, 411, 415, 419, 423, 427, 431, 435, 439, 443,
#>   447, 451, 455, 459, 463, 467, 471, 475, 479, 483, 487, 491, 495, 499, 503,
#>   507, 511, 515, 519, 523, 527, 531, 535, 539, 543, 547, 551, 555, 559, 563,
#>   567, 571, 575, 579, 583, 587, 591, 595, 599, 603, 607, 611, 615, 619, 623,
#>   627, 631, 635, 639, 643, 647, 651, 655, 659, 663, 667, 671, 675, 679, 683,
#>   687, 691, 695, 699, 703, 707, 711, 715, 719, 723, 727, 731, 735, 739, 743,
#>   747, 751, 755, 759, 763, 767, 771, 775, 779, 783, 787, 791, 795, 799, 803,
#>   807, 811, 815, 819, 823, 827, 831, 835, 839, 843, 847, 851, 855, 859, 863,
#>   867, 871, 875, 879, 883, 887, 891, 895, 899, 903, 907, 911, 915, 919, 923,
#>   927, 931, 935, 939, 943, 947, 951, 955, 959, 963, 967, 971, 975, 979, 983,
#>   987, 991, 995, 999
#> • 1004, 1008, 1012, 1016, 1020, 1024, 1028, 1032, 1036, 1040, 1044, 1048, 1052,
#>   1056, 1060, 1064, 1068, 1072, 1076, 1080, 1084, 1088, 1092, 1096, 1100, 1104,
#>   1108, 1112, 1116, 1120, 1124, 1128, 1132, 1136, 1140, 1144, 1148, 1152, 1156,
#>   1160, 1164, 1168, 1172, 1176, 1180, 1184, 1188, 1192, 1196, 1200, 1204, 1208,
#>   1212, 1216, 1220, 1224, 1228, 1232, 1236, 1240, 1244, 1248, 1252, 1256, 1260,
#>   1264, 1268, 1272, 1276, 1280, 1284, 1288, 1292, 1296, 1300, 1304, 1308, 1312,
#>   1316, 1320, 1324, 1328, 1332, 1336, 1340, 1344, 1348, 1352, 1356, 1360, 1364,
#>   1368, 1372, 1376, 1380, 1384, 1388, 1392, 1396, 1400, 1404, 1408, 1412, 1416,
#>   1420, 1424, 1428, 1432, 1436, 1440, 1444, 1448, 1452, 1456, 1460, 1464, 1468,
#>   1472, 1476, 1480, 1484, 1488, 1492, 1496, 1500, 1504, 1508, 1512, 1516, 1520,
#>   1524, 1528, 1532, 1536, 1540, 1544, 1548, 1552, 1556, 1560, 1564, 1568, 1572,
#>   1576, 1580, 1584, 1588, 1592, 1596, 1600, 1604, 1608, 1612, 1616, 1620, 1624,
#>   1628, 1632, 1636, 1640, 1644, 1648, 1652, 1656, 1660, 1664, 1668, 1672, 1676,
#>   1680, 1684, 1688, 1692, 1696, 1700, 1704, 1708, 1712, 1716, 1720, 1724, 1728,
#>   1732, 1736, 1740, 1744, 1748, 1752, 1756, 1760, 1764, 1768, 1772, 1776, 1780,
#>   1784, 1788, 1792, 1796, 1800, 1804, 1808, 1812, 1816, 1820, 1824, 1828, 1832,
#>   1836, 1840, 1844, 1848, 1852, 1856, 1860, 1864, 1868, 1872, 1876, 1880, 1884,
#>   1888, 1892, 1896, 1900, 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936,
#>   1940, 1944, 1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988,
#>   1992, 1996, 2000
#> • 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80,
#>   84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144,
#>   148, 152, 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
#>   208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260, 264,
#>   268, 272, 276, 280, 284, 288, 292, 296, 300, 304, 308, 312, 316, 320, 324,
#>   328, 332, 336, 340, 344, 348, 352, 356, 360, 364, 368, 372, 376, 380, 384,
#>   388, 392, 396, 400, 404, 408, 412, 416, 420, 424, 428, 432, 436, 440, 444,
#>   448, 452, 456, 460, 464, 468, 472, 476, 480, 484, 488, 492, 496, 500, 504,
#>   508, 512, 516, 520, 524, 528, 532, 536, 540, 544, 548, 552, 556, 560, 564,
#>   568, 572, 576, 580, 584, 588, 592, 596, 600, 604, 608, 612, 616, 620, 624,
#>   628, 632, 636, 640, 644, 648, 652, 656, 660, 664, 668, 672, 676, 680, 684,
#>   688, 692, 696, 700, 704, 708, 712, 716, 720, 724, 728, 732, 736, 740, 744,
#>   748, 752, 756, 760, 764, 768, 772, 776, 780, 784, 788, 792, 796, 800, 804,
#>   808, 812, 816, 820, 824, 828, 832, 836, 840, 844, 848, 852, 856, 860, 864,
#>   868, 872, 876, 880, 884, 888, 892, 896, 900, 904, 908, 912, 916, 920, 924,
#>   928, 932, 936, 940, 944, 948, 952, 956, 960, 964, 968, 972, 976, 980, 984,
#>   988, 992, 996, 1000
#> • 1002, 1006, 1010, 1014, 1018, 1022, 1026, 1030, 1034, 1038, 1042, 1046, 1050,
#>   1054, 1058, 1062, 1066, 1070, 1074, 1078, 1082, 1086, 1090, 1094, 1098, 1102,
#>   1106, 1110, 1114, 1118, 1122, 1126, 1130, 1134, 1138, 1142, 1146, 1150, 1154,
#>   1158, 1162, 1166, 1170, 1174, 1178, 1182, 1186, 1190, 1194, 1198, 1202, 1206,
#>   1210, 1214, 1218, 1222, 1226, 1230, 1234, 1238, 1242, 1246, 1250, 1254, 1258,
#>   1262, 1266, 1270, 1274, 1278, 1282, 1286, 1290, 1294, 1298, 1302, 1306, 1310,
#>   1314, 1318, 1322, 1326, 1330, 1334, 1338, 1342, 1346, 1350, 1354, 1358, 1362,
#>   1366, 1370, 1374, 1378, 1382, 1386, 1390, 1394, 1398, 1402, 1406, 1410, 1414,
#>   1418, 1422, 1426, 1430, 1434, 1438, 1442, 1446, 1450, 1454, 1458, 1462, 1466,
#>   1470, 1474, 1478, 1482, 1486, 1490, 1494, 1498, 1502, 1506, 1510, 1514, 1518,
#>   1522, 1526, 1530, 1534, 1538, 1542, 1546, 1550, 1554, 1558, 1562, 1566, 1570,
#>   1574, 1578, 1582, 1586, 1590, 1594, 1598, 1602, 1606, 1610, 1614, 1618, 1622,
#>   1626, 1630, 1634, 1638, 1642, 1646, 1650, 1654, 1658, 1662, 1666, 1670, 1674,
#>   1678, 1682, 1686, 1690, 1694, 1698, 1702, 1706, 1710, 1714, 1718, 1722, 1726,
#>   1730, 1734, 1738, 1742, 1746, 1750, 1754, 1758, 1762, 1766, 1770, 1774, 1778,
#>   1782, 1786, 1790, 1794, 1798, 1802, 1806, 1810, 1814, 1818, 1822, 1826, 1830,
#>   1834, 1838, 1842, 1846, 1850, 1854, 1858, 1862, 1866, 1870, 1874, 1878, 1882,
#>   1886, 1890, 1894, 1898, 1902, 1906, 1910, 1914, 1918, 1922, 1926, 1930, 1934,
#>   1938, 1942, 1946, 1950, 1954, 1958, 1962, 1966, 1970, 1974, 1978, 1982, 1986,
#>   1990, 1994, 1998
#> • 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, 66, 70, 74, 78,
#>   82, 86, 90, 94, 98, 102, 106, 110, 114, 118, 122, 126, 130, 134, 138, 142,
#>   146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190, 194, 198, 202,
#>   206, 210, 214, 218, 222, 226, 230, 234, 238, 242, 246, 250, 254, 258, 262,
#>   266, 270, 274, 278, 282, 286, 290, 294, 298, 302, 306, 310, 314, 318, 322,
#>   326, 330, 334, 338, 342, 346, 350, 354, 358, 362, 366, 370, 374, 378, 382,
#>   386, 390, 394, 398, 402, 406, 410, 414, 418, 422, 426, 430, 434, 438, 442,
#>   446, 450, 454, 458, 462, 466, 470, 474, 478, 482, 486, 490, 494, 498, 502,
#>   506, 510, 514, 518, 522, 526, 530, 534, 538, 542, 546, 550, 554, 558, 562,
#>   566, 570, 574, 578, 582, 586, 590, 594, 598, 602, 606, 610, 614, 618, 622,
#>   626, 630, 634, 638, 642, 646, 650, 654, 658, 662, 666, 670, 674, 678, 682,
#>   686, 690, 694, 698, 702, 706, 710, 714, 718, 722, 726, 730, 734, 738, 742,
#>   746, 750, 754, 758, 762, 766, 770, 774, 778, 782, 786, 790, 794, 798, 802,
#>   806, 810, 814, 818, 822, 826, 830, 834, 838, 842, 846, 850, 854, 858, 862,
#>   866, 870, 874, 878, 882, 886, 890, 894, 898, 902, 906, 910, 914, 918, 922,
#>   926, 930, 934, 938, 942, 946, 950, 954, 958, 962, 966, 970, 974, 978, 982,
#>   986, 990, 994, 998
#> Backtrace:
#>      ▆
#>   1. ├─multiDiff::mdd_group_means(mdd_cr)
#>   2. │ └─mdd_dat %>% ...
#>   3. ├─multiDiff:::add_group(...)
#>   4. │ ├─... %>% rename(.group = "seq")
#>   5. │ └─multiDiff:::get_sequences(...)
#>   6. │   └─... %>% tidyr::unite("seq", -!!enquo(unit.index))
#>   7. ├─dplyr::rename(., .group = "seq")
#>   8. ├─tidyr::unite(., "seq", -!!enquo(unit.index))
#>   9. ├─tidyr::spread(., !!enquo(time.index), !!enquo(treat))
#>  10. └─tidyr:::spread.data.frame(., !!enquo(time.index), !!enquo(treat))
#>  11.   └─cli::cli_abort(...)
#>  12.     └─rlang::abort(...)

Created on 2023-10-15 with reprex v2.0.2

lag_group: 2 grouping variables?

Having two grouping variables fail:

library(multiDiff)
df_test <- data.frame(group = rep(c("a", "b"), each=6),
                      group2 = rep(c("a", "b"), each=6),
                      year = rep(2000:2005, 2),
                      value = (0:11) ^ 2,
                      value2 = rnorm(12))

lag_group(df_test, "group", time_var="year", value_var="value", lagamount = -1:2)
#>    group year group2 value       value2 value_lead1 value_lag0 value_lag1
#> 1      a 2000      a     0 -0.067586638           1          0         NA
#> 2      a 2001      a     1 -2.456295641           4          1          0
#> 3      a 2002      a     4  1.068701111           9          4          1
#> 4      a 2003      a     9  1.929611081          16          9          4
#> 5      a 2004      a    16  0.344507011          25         16          9
#> 6      a 2005      a    25  1.073819125          NA         25         16
#> 7      b 2000      b    36  0.875790870          49         36         NA
#> 8      b 2001      b    49  0.806462000          64         49         36
#> 9      b 2002      b    64 -1.421379692          81         64         49
#> 10     b 2003      b    81  0.950644879         100         81         64
#> 11     b 2004      b   100  0.986921242         121        100         81
#> 12     b 2005      b   121 -0.004699788          NA        121        100
#>    value_lag2
#> 1          NA
#> 2          NA
#> 3           0
#> 4           1
#> 5           4
#> 6           9
#> 7          NA
#> 8          NA
#> 9          36
#> 10         49
#> 11         64
#> 12         81
lag_group(df_test, c("group","group2"), time_var="year", value_var="value", lagamount = -1:2)
#> Error in `rlang::sym()`:
#> ! Can't convert a character vector to a symbol.

Created on 2022-08-12 by the reprex package (v2.0.1)

cross-sectional data: event study will not work

See:

library(dplyr, warn.conflicts = FALSE)
library(multiDiff)

df_raw <- sim_dat_common(Time=2, timing_treatment = 2, perc_treat = 0.5, seed = 45)

## create cross-section of regressions
df_cross <- df_raw |>
  mutate(keep = rep(c(TRUE, FALSE, FALSE, TRUE), times =500)) |>
  filter(keep) |>
  select(-keep)

df_cross |> 
  count(treat_group, tr, Time)
#> # A tibble: 4 × 4
#>   treat_group    tr  Time     n
#>   <chr>       <dbl> <int> <int>
#> 1 control         0     1   250
#> 2 control         0     2   250
#> 3 treated         0     1   250
#> 4 treated         1     2   250

mdd <- mdd_data_format(df_cross, unit.index = "treat_group")
mdd_event_study(mdd_dat = mdd, time.omit = -1)
#> OLS estimation, Dep. Var.: y_var
#> Observations: 1,000 
#> Fixed-effects: unit.index: 2,  time.index: 2
#> Standard-errors: Clustered (unit.index) 
#>                   Estimate Std. Error      t value   Pr(>|t|)    
#> timing_to_treat0  0.892546   2.95e-11  30279356117 2.1025e-11 ***
#> timing_to_treat1 -0.736917   2.95e-11 -25007668148 2.5457e-11 ***
#> timing_to_treat2  0.346675   2.95e-11  11737172289 5.4240e-11 ***
#> timing_to_treat3 -0.997029   2.95e-11 -33833189331 1.8816e-11 ***
#> timing_to_treat4  0.451085   2.95e-11  15289621268 4.1637e-11 ***
#> timing_to_treat5 -0.542856   2.94e-11 -18437396546 3.4529e-11 ***
#> timing_to_treat6  1.390562   2.96e-11  46957457662 1.3557e-11 ***
#> timing_to_treat7  2.557051   2.97e-11  86128174444 7.3915e-12 ***
#> ... 491 coefficients remaining (display them with summary() or use argument n)
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 1.0743     Adj. R2: 0.057629
#>                Within R2: 0.503085

Created on 2023-10-15 with reprex v2.0.2

DD: count units, not obs

Code from server 8_...

cnt_units <- df %>% 
      distinct(plot_id, seq_4Y) %>% 
      count(seq_4Y, name = "n_units") %>% 
      complete(seq_4Y= all_seq, fill = list(n_units=0)) 
    
    cnt <- count(df, seq_4Y) %>% 
      complete(seq_4Y= all_seq, fill = list(n=0)) %>% 
      left_join(cnt_units, by = "seq_4Y")

    
    ## table of metadata
    DiD_tab_here <- DiD_tab_CS %>%
      left_join(cnt %>%
                  rename(treat=seq_4Y, n_treat=n, n_treat_units = n_units), by = c("treat")) %>%
      left_join(cnt %>%
                  rename(control=seq_4Y, n_control=n, n_control_units = n_units), by = c("control")) %>%

better error for `mdd_event_study()` when `trim_low=-1`

Should do some simple argument checking!

library(multiDiff)
DID_dat <- sim_dat_common(timing_treatment = 5:10, as_mdd = TRUE)

mdd_event_study(DID_dat, trim_low=-1)
#> OLS estimation, Dep. Var.: y_var
#> Observations: 10,000 
#> Fixed-effects: unit.index: 1,000,  time.index: 10
#> Standard-errors: Clustered (unit.index) 
#>                  Estimate Std. Error t value  Pr(>|t|)    
#> timing_to_treat0 1.075190   0.082824 12.9817 < 2.2e-16 ***
#> timing_to_treat1 1.055766   0.078156 13.5084 < 2.2e-16 ***
#> timing_to_treat2 1.055685   0.081123 13.0133 < 2.2e-16 ***
#> timing_to_treat3 0.981621   0.085285 11.5099 < 2.2e-16 ***
#> timing_to_treat4 1.084160   0.079797 13.5866 < 2.2e-16 ***
#> timing_to_treat5 0.913641   0.084137 10.8590 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> RMSE: 0.953208     Adj. R2: 0.798443
#>                  Within R2: 0.050119
mdd_event_study(DID_dat, trim_low=0)
#> Error in `mutate()`:
#> ℹ In argument: `timing_to_treat = relevel(factor(.data$timing_to_treat),
#>   as.character(time.omit))`.
#> Caused by error in `relevel.factor()`:
#> ! 'ref' must be an existing level
#> Backtrace:
#>      ▆
#>   1. ├─multiDiff::mdd_event_study(DID_dat, trim_low = 0)
#>   2. │ └─... %>% ...
#>   3. ├─dplyr::mutate(...)
#>   4. ├─dplyr:::mutate.data.frame(...)
#>   5. │ └─dplyr:::mutate_cols(.data, dplyr_quosures(...), by)
#>   6. │   ├─base::withCallingHandlers(...)
#>   7. │   └─dplyr:::mutate_col(dots[[i]], data, mask, new_columns)
#>   8. │     └─mask$eval_all_mutate(quo)
#>   9. │       └─dplyr (local) eval()
#>  10. ├─stats::relevel(factor(.data$timing_to_treat), as.character(time.omit))
#>  11. ├─stats:::relevel.factor(factor(.data$timing_to_treat), as.character(time.omit))
#>  12. │ └─base::stop("'ref' must be an existing level")
#>  13. └─base::.handleSimpleError(...)
#>  14.   └─dplyr (local) h(simpleError(msg, call))
#>  15.     └─rlang::abort(message, class = error_class, parent = parent, call = error_call)

Created on 2023-11-09 with reprex v2.0.2

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.