## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## -----------------------------------------------------------------------------
succinct_dat <- data.frame(
  x = c(1, NA, 3, 4, NA),
  z = factor(c("a", "b", NA, "a", "b"))
)

i <- mimar::impute(succinct_dat, imputer = "knn", m = 2, maxit = 2, seed = 1)
mimar::complete(i, 1)
mimar::complete(i, "all")

## -----------------------------------------------------------------------------
library(mimar)

set.seed(1)
dat <- data.frame(
  age   = rnorm(120, 50, 10),
  bmi   = rnorm(120, 25,  4),
  sex   = factor(sample(c("F", "M"),          120, TRUE)),
  group = factor(sample(c("A", "B", "C"),     120, TRUE)),
  smoker = sample(c(TRUE, FALSE),             120, TRUE)
)

head(dat)

## -----------------------------------------------------------------------------
d <- describe(dat)
d
summary(d)

## ----fig.width=7, fig.height=4------------------------------------------------
plot(d)

## -----------------------------------------------------------------------------
imputer_registry()

## -----------------------------------------------------------------------------
describe("imputers")

## -----------------------------------------------------------------------------
a <- ampute(
  dat,
  prop      = 0.25,
  mechanism = "MAR",
  target    = c("bmi", "group"),
  by        = c("age", "sex"),
  seed      = 1
)

a
summary(a)

## ----fig.width=7, fig.height=4, fig.show='hold'-------------------------------
i_knn <- impute(a, imputer = "knn", m = 3, maxit = 3, seed = 1)
plot(i_knn, type = "density", variable = "bmi")
plot(i_knn, type = "xy", formula = bmi ~ age | sex)

## -----------------------------------------------------------------------------
i_knn <- impute(a, imputer = "knn", m = 3, maxit = 3, seed = 1)
i_knn
summary(i_knn)
complete(i_knn, 1)

## -----------------------------------------------------------------------------
i_knn_small <- impute(a, imputer = "knn",     m = 1, maxit = 2, seed = 1)
i_hotdeck <- impute(a, imputer = "hotdeck", m = 1, maxit = 2, seed = 1)

summary(i_knn_small)
summary(i_hotdeck)

## -----------------------------------------------------------------------------
rf_spec <- imputer("rf", num.trees = 500)
xgb_spec <- imputer("xgboost", nrounds = 100, max_depth = 3)

describe(a)

i_knn <- impute(a, imputer = "knn", m = 3, maxit = 3, seed = 1, donors = 10)

summary(i_knn)

## -----------------------------------------------------------------------------
sl_spec <- imputer(
  "superlearner",
  library = c("pmm", "knn", "rpart"),
  folds = 3,
  metalearner = "inverse_loss"
)

i_sl <- impute(a, imputer = sl_spec, m = 2, maxit = 2, seed = 1)
summary(i_sl)

## -----------------------------------------------------------------------------
e <- evaluate(i_knn)
e
describe(e)
head(e$recovery_by_imputation)

## ----fig.width=7, fig.height=4------------------------------------------------
plot(i_knn)
plot(i_knn, type = "missing")
plot(i_knn, type = "density")
plot(e)

## -----------------------------------------------------------------------------
head(i_knn$diagnostics$trace)

## ----fig.width=7, fig.height=4------------------------------------------------
plot(i_knn, type = "trace", statistic = "mean")

## ----fig.width=7, fig.height=4------------------------------------------------
plot(i_knn, type = "density", variable = "bmi")

## ----fig.width=7, fig.height=4------------------------------------------------
plot(i_knn, type = "boxplot", variable = "bmi")

## ----fig.width=7, fig.height=4------------------------------------------------
plot(i_knn, type = "strip", variable = "bmi")

## ----fig.width=7, fig.height=4------------------------------------------------
plot(i_knn, type = "xy", formula = bmi ~ age | sex)

## ----fig.width=7, fig.height=4------------------------------------------------
plot(i_knn, type = "proportion", variable = "group")
plot(i_knn, type = "proportion", formula = group ~ sex)

## -----------------------------------------------------------------------------
pool(c(0.10, 0.11, 0.09), std.error = c(0.04, 0.05, 0.04), name = "age")

## -----------------------------------------------------------------------------
betas <- list(
  c(age = 0.10, bmi = 0.30),
  c(age = 0.11, bmi = 0.32),
  c(age = 0.09, bmi = 0.29)
)
covariances <- list(
  diag(c(0.04, 0.08)^2),
  diag(c(0.05, 0.09)^2),
  diag(c(0.04, 0.08)^2)
)

pooled_betas <- pool(betas, covariance = covariances)
pooled_betas
pooled_betas$estimate
pooled_betas$variance

## -----------------------------------------------------------------------------
survival_probabilities <- list(
  matrix(c(0.90, 0.80, 0.70, 0.60), nrow = 2),
  matrix(c(0.91, 0.79, 0.72, 0.61), nrow = 2),
  matrix(c(0.89, 0.81, 0.71, 0.59), nrow = 2)
)

pooled_survival <- pool(survival_probabilities)
pooled_survival
pooled_survival$estimate

## -----------------------------------------------------------------------------
external_results <- data.frame(
  term       = rep(c("age", "bmi"), each = 3),
  estimate   = c(0.10, 0.11, 0.09, 0.30, 0.32, 0.29),
  std.error  = c(0.04, 0.05, 0.04, 0.08, 0.09, 0.08),
  imputation = rep(1:3, times = 2)
)

p <- pool(external_results)
p

