This vignette gives fast examples of the Poisson Super Learner workflow after the refactoring. It focuses on:
Superlearner() with one learner library shared
by all causes;Superlearner() with one learner library per
cause;summary() for the fitted super learner;predictRisk() for the same model selectors.The examples use small simulated data, two folds, and simple
glmnet learners with fixed lambda values so
the vignette remains quick to run during package checks.
We simulate a small competing-risks data set. The observed follow-up
time is stored in time and the event indicator in
event, where 0 denotes censoring,
1 denotes cardiovascular disease, and 2
denotes death without prior cardiovascular disease.
d <- simulateStenoT1(
n = 45,
scenario = "alpha",
competing_risks = TRUE,
seed = 1
)
d <- d[, .(
id,
time,
event,
sex,
age,
diabetes_duration,
value_LDL,
value_Smoking
)]
head(d)
#> id time event sex age diabetes_duration value_LDL
#> <int> <num> <int> <fctr> <num> <num> <num>
#> 1: 1 0.1338473 2 1 36.03106 16.92239 3.9841195
#> 2: 2 0.1943785 1 0 42.33831 17.85834 1.5088089
#> 3: 3 0.5511647 1 0 43.04253 19.88147 0.9438390
#> 4: 4 0.6826765 2 0 45.93286 21.94420 2.9557458
#> 5: 5 1.1848861 1 1 45.95141 21.45525 4.6480322
#> 6: 6 1.2395837 1 0 39.59863 17.54727 0.8408968
#> value_Smoking
#> <fctr>
#> 1: 0
#> 2: 0
#> 3: 1
#> 4: 1
#> 5: 0
#> 6: 0A learner library is a list of initialized learner objects. If a single library is supplied in a competing-risks analysis, the same library is used for all causes.
shared_library <- list(
simple = Learner_glmnet(
covariates = c("sex", "diabetes_duration"),
cross_validation = FALSE,
lambda = 0
),
shrink = Learner_glmnet(
covariates = c("sex", "age", "value_LDL"),
cross_validation = FALSE,
lambda = 0.05,
alpha = 1
)
)
fit_shared <- Superlearner(
data = d,
id = "id",
status = "event",
event_time = "time",
learners = shared_library,
number_of_nodes = 3,
nfold = 2
)summary() gives a compact overview of the fitted super
learner, including the number of causes, retained learner labels,
cross-validated deviances, and meta-learner coefficients when a
meta-learner was fitted.
summary(fit_shared)
#> Call:
#> Superlearner(..., metalearner = glmnet::glmnet)
#>
#> Fitted object:
#> Class: poisson_superlearner
#> Number of competing risks: 2
#> Number of folds: 2
#> Maximum follow-up: 22.43464
#> Number of nodes: 5
#>
#> Retained learners by cause:
#> cause 1: simple, shrink
#> cause 2: simple, shrink
#>
#> Cross-validation deviance:
#> cause_index cause learner_index learner deviance
#> <int> <char> <int> <char> <num>
#> 1: 1 cause_1 1 simple 110.34291
#> 2: 1 cause_1 2 shrink 97.91293
#> 3: 2 cause_2 1 simple 318.70522
#> 4: 2 cause_2 2 shrink 53.93232
#>
#> Meta-learner coefficients:
#> cause 1:
#> (Intercept) simple shrink
#> 0.0000000 0.2277135 0.7138146
#>
#> cause 2:
#> (Intercept) simple shrink
#> 0.0000000 0.1989269 0.8821621predictRisk() returns one row per subject and one column
per requested prediction time. The model argument uses the
same selectors: "sl" for the stacked ensemble,
"discrete_sl" for the best cross-validated base learner per
cause, or learner labels such as "simple" and
"shrink" for models stored in the ensemble.
newdata <- d[1:2]
times <- c(1, 2)
risk_shared_sl <- predictRisk(
fit_shared, newdata = newdata, times = times, cause = 1, model = "sl"
)
risk_shared_discrete <- predictRisk(
fit_shared, newdata = newdata, times = times, cause = 1, model = "discrete_sl"
)
risk_shared_simple <- predictRisk(
fit_shared, newdata = newdata, times = times, cause = 1, model = "simple"
)
risk_shared_shrink <- predictRisk(
fit_shared, newdata = newdata, times = times, cause = 1, model = "shrink"
)
list(
sl = risk_shared_sl,
discrete_sl = risk_shared_discrete,
simple = risk_shared_simple,
shrink = risk_shared_shrink
)
#> $sl
#> [,1] [,2]
#> [1,] 0.03834563 0.07789423
#> [2,] 0.06632733 0.13279164
#>
#> $discrete_sl
#> [,1] [,2]
#> [1,] 0.04304514 0.08340671
#> [2,] 0.05698820 0.10963701
#>
#> $simple
#> [,1] [,2]
#> [1,] 0.01419019 0.02941751
#> [2,] 0.06819054 0.13891238
#>
#> $shrink
#> [,1] [,2]
#> [1,] 0.04304514 0.08340671
#> [2,] 0.05698820 0.10963701For competing risks, learners can also be a list with
one learner library per cause. This allows different covariates, tuning
parameters, or labels for each cause.
libraries_by_cause <- list(
cvd = list(
cvd_simple = Learner_glmnet(
covariates = c("sex", "diabetes_duration"),
cross_validation = FALSE,
lambda = 0
),
cvd_shrink = Learner_glmnet(
covariates = c("age", "value_LDL"),
cross_validation = FALSE,
lambda = 0.05,
alpha = 1
)
),
death = list(
death_simple = Learner_glmnet(
covariates = c("sex", "age"),
cross_validation = FALSE,
lambda = 0
),
death_shrink = Learner_glmnet(
covariates = c("diabetes_duration", "value_Smoking"),
cross_validation = FALSE,
lambda = 0.05,
alpha = 1
)
)
)
fit_by_cause <- Superlearner(
data = d,
id = "id",
status = "event",
event_time = "time",
learners = libraries_by_cause,
number_of_nodes = 3,
nfold = 2
)The fitted object can be summarized in the same way.
summary(fit_by_cause)
#> Call:
#> Superlearner(..., metalearner = glmnet::glmnet)
#>
#> Fitted object:
#> Class: poisson_superlearner
#> Number of competing risks: 2
#> Number of folds: 2
#> Maximum follow-up: 22.43464
#> Number of nodes: 5
#>
#> Retained learners by cause:
#> cause 1: cvd_simple, cvd_shrink
#> cause 2: death_simple, death_shrink
#>
#> Cross-validation deviance:
#> cause_index cause learner_index learner deviance
#> <int> <char> <int> <char> <num>
#> 1: 1 cvd 1 cvd_simple 140.62531
#> 2: 1 cvd 2 cvd_shrink 100.14039
#> 3: 2 death 1 death_simple 156.00476
#> 4: 2 death 2 death_shrink 68.68571
#>
#> Meta-learner coefficients:
#> cause 1:
#> (Intercept) cvd_simple cvd_shrink
#> 0.0000000 -0.0914743 1.0845723
#>
#> cause 2:
#> (Intercept) death_simple death_shrink
#> 0.0000000 -0.1778133 1.3399726The stacked and discrete super learner selectors still work as scalar model selectors for prediction.
risk_by_cause_sl <- predictRisk(
fit_by_cause, newdata = newdata, times = times, cause = 1, model = "sl"
)
risk_by_cause_discrete <- predictRisk(
fit_by_cause, newdata = newdata, times = times, cause = 1, model = "discrete_sl"
)
list(
sl = risk_by_cause_sl,
discrete_sl = risk_by_cause_discrete
)
#> $sl
#> [,1] [,2]
#> [1,] 0.05189313 0.09760984
#> [2,] 0.05963114 0.11169165
#>
#> $discrete_sl
#> [,1] [,2]
#> [1,] 0.04359312 0.08444487
#> [2,] 0.05638837 0.10851651When selecting learners from cause-specific libraries, provide one selector per cause. The first entry selects the learner for cause 1 and the second entry selects the learner for cause 2.
cause_specific_model <- c("cvd_simple", "death_shrink")
cause_specific_model_alt <- c("cvd_shrink", "death_simple")
risk_by_cause_selected <- predictRisk(
fit_by_cause, newdata = newdata, times = times, cause = 1,
model = cause_specific_model
)
risk_by_cause_selected_alt <- predictRisk(
fit_by_cause, newdata = newdata, times = times, cause = 1,
model = cause_specific_model_alt
)
list(
selected_learners = risk_by_cause_selected,
selected_learners_alt = risk_by_cause_selected_alt
)
#> $selected_learners
#> [,1] [,2]
#> [1,] 0.01505978 0.03181764
#> [2,] 0.06979854 0.14278607
#>
#> $selected_learners_alt
#> [,1] [,2]
#> [1,] 0.04175019 0.079684
#> [2,] 0.05531995 0.106062Integer selectors are also supported. For cause-specific libraries, an integer vector can choose different retained learner positions for different causes.