Fitting Models *Without* parsnip • tyecon

library(tidymodels)
library(broom.mixed)
library(tyecon)
data(ames)

Fitting Models Without parsnip

This vignette shows how we can achieve the same (if not better) consistency of using multiple models together without the need for packages such as parsnip or broom. Original article which uses these two packages can be found in the tidymodels book, here.

The linear functions family

To build this family, we simply encapsulate the three lm, stan_glm, and glmnet functions together, using tyecon::convoke:

library(rstanarm)
library(glmnet)

ames_split <- initial_split(ames, prop = 0.80)
ames_train <- training(ames_split)

First, we unify the models:

(linear_model <-
  convoke(
    list(formula, data),
    lm(formula = formula, data = data),
    stan_glm(formula = formula, data = data, mean_PPD = FALSE),
    glmnet(
      x = model.frame(formula, data = data)[-1],
      y = model.frame(formula, data = data)[[1]]
    )
  ))
#> convoke function
#>   interfaces: lm(), stan_glm(), glmnet()
#>   args: formula, data, interface = lm, interface.args

model_results <- enframe(
  map(
    set_names(names(linear_model)),
    ~ linear_model(Sale_Price ~ Longitude + Latitude, ames_train, interface = .)
  ),
  "modelname", "model"
) %>% rowwise()

(model_results <- mutate(model_results,
  vcov_val = list(possibly(vcov, NULL)(model))
))
#> # A tibble: 3 × 3
#> # Rowwise: 
#>   modelname model     vcov_val     
#>   <chr>     <list>    <list>       
#> 1 lm        <lm>      <dbl [3 × 3]>
#> 2 stan_glm  <stanreg> <dbl [3 × 3]>
#> 3 glmnet    <elnet>   <NULL>

Now we’d want to see the coefficients, possibly alongside some other information. In fact, we may want to know a lot more about each model as well.

(model_results <- mutate(model_results,
  coef_val = list(possibly(coef, NULL)(model))
))
#> # A tibble: 3 × 4
#> # Rowwise: 
#>   modelname model     vcov_val      coef_val       
#>   <chr>     <list>    <list>        <list>         
#> 1 lm        <lm>      <dbl [3 × 3]> <dbl [3]>      
#> 2 stan_glm  <stanreg> <dbl [3 × 3]> <dbl [3]>      
#> 3 glmnet    <elnet>   <NULL>        <dgCMatrx[,56]>

We can also store the tidy variant:

(model_results <- mutate(model_results,
  tidy_val = list(possibly(tidy, NULL)(model))
))
#> # A tibble: 3 × 5
#> # Rowwise: 
#>   modelname model     vcov_val      coef_val        tidy_val          
#>   <chr>     <list>    <list>        <list>          <list>            
#> 1 lm        <lm>      <dbl [3 × 3]> <dbl [3]>       <tibble [3 × 5]>  
#> 2 stan_glm  <stanreg> <dbl [3 × 3]> <dbl [3]>       <tibble [3 × 3]>  
#> 3 glmnet    <elnet>   <NULL>        <dgCMatrx[,56]> <tibble [164 × 5]>

We can also plot the results rather easily. First, let us make a unified plot function using conflate, as plot is simply a generic function:

(conflated_plot <- conflate(plot(x)))
#> conflate function for plot
#>  args: x, object.args

par(mfrow = c(2, 2))
walk(
  pull(model_results, model),
  ~ conflated_plot(.,
    lm.ask = FALSE, lm.which = 1,
    elnet.xvar = "dev", stanreg.plotfun = "hist"
  )
)