## ----setup, include=FALSE---------------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, results = "asis", eval = FALSE)
options(width = 90)

## ---------------------------------------------------------------------------------------
#  # Illustration of the FWL theorem's magic
#  
#  # We use the `iris` data set
#  base = setNames(iris, c("y", "x", "z1", "z2", "species"))
#  
#  library(fixest)
#  # The main estimation, we're only interested in `x`'s coefficient
#  est = feols(y ~ x + z1 + z2, base)
#  
#  # We estimate both `y` and `x` on the other explanatory variables
#  #  and get the matrix of residuals
#  resids = feols(c(y, x) ~ z1 + z2, base) |> resid()
#  # We estimate y's residuals on x's residuals
#  est_fwl = feols.fit(resids[, 1], resids[, 2])
#  
#  # We compare the estimates: they are identical
#  # The standards errors are also the same, modulo a constant factor
#  etable(est, est_fwl, order = "x|resid")
#  #>                                 est            est_fwl
#  #> Dependent Var.:                   y         resids[,1]
#  #>
#  #> x                0.6508*** (0.0667)
#  #> resids[,2]                          0.6508*** (0.0660)
#  #> Constant          1.856*** (0.2508)
#  #> z1               0.7091*** (0.0567)
#  #> z2              -0.5565*** (0.1275)
#  #> _______________ ___________________ __________________
#  #> S.E. type                       IID                IID
#  #> Observations                    150                150
#  #> R2                          0.85861            0.39104
#  #> Adj. R2                     0.85571            0.39104
#  #> ---
#  #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## ---------------------------------------------------------------------------------------
#  # We generate the data
#  n = 1e6
#  n_half = n / 2
#  df = data.frame(x = rep(0, n))
#  df$x[1:n_half] = 1
#  df$y = df$x + rnorm(n)
#  
#  # we estimate y on x for various translations of x
#  all_trans = c(0, 10 ** (1:5))
#  all_results = list()
#  for(i in seq_along(all_trans)){
#    trans = all_trans[i]
#    all_results[[i]] = feols(y ~ I(x + trans), df)
#  }
#  
#  # we display the results
#  etable(all_results)
#  #>                            model 1            model 2            model 3
#  #> Dependent Var.:                  y                  y                  y
#  #>
#  #> Constant           0.0013 (0.0014) -9.974*** (0.0210) -99.75*** (0.2009)
#  #> I(x+0)          0.9975*** (0.0020)
#  #> I(x+10)                            0.9975*** (0.0020)
#  #> I(x+100)                                              0.9975*** (0.0020)
#  #> I(x+1000)
#  #> I(x+10000)
#  #> I(x+1e+05)
#  #> _______________ __________________ __________________ __________________
#  #> S.E. type                      IID                IID                IID
#  #> Observations             1,000,000          1,000,000          1,000,000
#  #> R2                         0.19936            0.19936            0.19936
#  #> Adj. R2                    0.19936            0.19936            0.19936
#  #>
#  #>                            model 4             model 5              model 6
#  #> Dependent Var.:                  y                   y                    y
#  #>
#  #> Constant         -997.5*** (2.000) -9,974.9*** (19.99) -99,749.2*** (199.9)
#  #> I(x+0)
#  #> I(x+10)
#  #> I(x+100)
#  #> I(x+1000)       0.9975*** (0.0020)
#  #> I(x+10000)                          0.9975*** (0.0020)
#  #> I(x+1e+05)                                               0.9975*** (0.0020)
#  #> _______________ __________________ ___________________ ____________________
#  #> S.E. type                      IID                 IID                  IID
#  #> Observations             1,000,000           1,000,000            1,000,000
#  #> R2                         0.19936             0.19936              0.19936
#  #> Adj. R2                    0.19936             0.19936              0.19936
#  #> ---
#  #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## ---------------------------------------------------------------------------------------
#  # we add 1,000,000 to x
#  feols(y ~ I(x + 1e6), df)
#  #> The variable 'I(x + 1e+06)' has been removed because of collinearity (see $collin.var).
#  #> OLS estimation, Dep. Var.: y
#  #> Observations: 1,000,000
#  #> Standard-errors: IID
#  #>             Estimate Std. Error t value  Pr(>|t|)
#  #> (Intercept) 0.500031   0.001117 447.653 < 2.2e-16 ***
#  #> ... 1 variable was removed because of collinearity (I(x + 1e+06))
#  #> ---
#  #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#  #> RMSE: 1.11701

## ---------------------------------------------------------------------------------------
#  lm(y ~ I(x + 1e6), df) |> coef()
#  #>   (Intercept)  I(x + 1e+06)
#  #> -9.974923e+05  9.974923e-01
#  lm(y ~ I(x + 1e7), df) |> coef()
#  #> (Intercept) I(x + 1e+07)
#  #>    0.500031           NA

## ---------------------------------------------------------------------------------------
#  data(base_pub, package = "fixest")
#  
#  ## The model:
#  feols(nb_pub ~ age + i(author_id) + i(affil_id), base_pub)
#  #> The variables 'affil_id::6902469', 'affil_id::9217761', 'affil_id::27504731',
#  #> 'affil_id::39965400', 'affil_id::43522216', 'affil_id::47301684' and 45 others have been
#  #> removed because of collinearity (see $collin.var).
#  #> OLS estimation, Dep. Var.: nb_pub
#  #> Observations: 4,024
#  #> Standard-errors: IID
#  #>                       Estimate Std. Error   t value   Pr(>|t|)
#  #> (Intercept)          -4.700489   2.396759 -1.961185 4.9934e-02 *
#  #> age                   0.047252   0.006213  7.605218 3.6032e-14 ***
#  #> author_id::90561406  -1.458487   0.902767 -1.615574 1.0627e-01
#  #> author_id::94862465  -3.390346   1.862776 -1.820050 6.8834e-02 .
#  #> author_id::168896994  0.473991   2.447235  0.193684 8.4643e-01
#  #> author_id::217986139 -0.133319   1.734549 -0.076861 9.3874e-01
#  #> author_id::226108609  0.179560   2.021085  0.088843 9.2921e-01
#  #> author_id::231631639  2.799524   3.110143  0.900127 3.6811e-01
#  #> ... 397 coefficients remaining (display them with summary() or use argument n)
#  #> ... 51 variables were removed because of collinearity (affil_id::6902469,
#  #> affil_id::9217761 and 49 others [full set in $collin.var])
#  #> ---
#  #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#  #> RMSE: 2.21108   Adj. R2: 0.685792

## ---------------------------------------------------------------------------------------
#  feols(nb_pub ~ age | author_id + affil_id, base_pub, vcov = "iid")
#  #> OLS estimation, Dep. Var.: nb_pub
#  #> Observations: 4,024
#  #> Fixed-effects: author_id: 200,  affil_id: 256
#  #> Standard-errors: IID
#  #>     Estimate Std. Error t value   Pr(>|t|)
#  #> age 0.047252   0.006257 7.55144 5.4359e-14 ***
#  #> ---
#  #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#  #> RMSE: 2.21108     Adj. R2: 0.681301
#  #>                 Within R2: 0.015731

## ---------------------------------------------------------------------------------------
#  feols(nb_pub ~ is_woman + age + i(author_id) + i(year), base_pub)
#  #> The variables 'author_id::2747123765' and 'year::2000' have been removed because of
#  #> collinearity (see $collin.var).
#  #> OLS estimation, Dep. Var.: nb_pub
#  #> Observations: 4,024
#  #> Standard-errors: IID
#  #>                       Estimate Std. Error   t value Pr(>|t|)
#  #> (Intercept)           3.224328   2.203459  1.463303  0.14347
#  #> is_woman             -0.673406   1.624295 -0.414583  0.67847
#  #> age                   0.046843   0.045423  1.031271  0.30248
#  #> author_id::90561406  -1.028373   1.093804 -0.940180  0.34719
#  #> author_id::94862465  -1.953734   0.985021 -1.983444  0.04739 *
#  #> author_id::168896994 -1.449938   0.914733 -1.585094  0.11303
#  #> author_id::217986139 -1.576761   0.923925 -1.706591  0.08798 .
#  #> author_id::226108609 -0.568410   1.171480 -0.485207  0.62756
#  #> ... 242 coefficients remaining (display them with summary() or use argument n)
#  #> ... 2 variables were removed because of collinearity (author_id::2747123765 and
#  #> year::2000)
#  #> ---
#  #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#  #> RMSE: 2.96683   Adj. R2: 0.457524

## ---------------------------------------------------------------------------------------
#  # same estimation as above
#  est_num = feols(nb_pub ~ is_woman + age + i(author_id) + i(year), base_pub)
#  #> The variables 'author_id::2747123765' and 'year::2000' have been removed because of
#  #> collinearity (see $collin.var).
#  
#  # we create `author_id_char`: same as `author_id` but in character form
#  base_pub$author_id_char = as.character(base_pub$author_id)
#  
#  # replacing `author_id` with `author_id_char`: both variables contain the same information
#  est_char = feols(nb_pub ~ is_woman + age + i(author_id_char) + i(year), base_pub)
#  #> The variables 'author_id_char::731914895' and 'year::2000' have been removed because of
#  #> collinearity (see $collin.var).
#  
#  etable(est_num, est_char, keep = "woman|age")
#  #>                         est_num        est_char
#  #> Dependent Var.:          nb_pub          nb_pub
#  #>
#  #> is_woman        -0.6734 (1.624)   1.729 (3.174)
#  #> age             0.0468 (0.0454) 0.0468 (0.0454)
#  #> _______________ _______________ _______________
#  #> S.E. type                   IID             IID
#  #> Observations              4,024           4,024
#  #> R2                      0.49110         0.49110
#  #> Adj. R2                 0.45752         0.45752
#  #> ---
#  #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## ---------------------------------------------------------------------------------------
#  est_last = feols(nb_pub ~ i(author_id) + i(year) + is_woman + age, base_pub)
#  #> The variables 'is_woman' and 'age' have been removed because of collinearity (see
#  #> $collin.var).

## ---------------------------------------------------------------------------------------
#  feols(nb_pub ~ is_woman + age | author_id + year, base_pub)
#  #> Error: in feols(nb_pub ~ is_woman + age | author_id + year,...:
#  #> All variables, 'is_woman' and 'age', are collinear with the fixed effects. Without
#  #> doubt, your model is misspecified.

