% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/callbacks-resume.R
\name{luz_callback_auto_resume}
\alias{luz_callback_auto_resume}
\title{Resume training callback}
\usage{
luz_callback_auto_resume(path = "./state.pt")
}
\arguments{
\item{path}{Path to save state files for the model.}
}
\description{
This callback allows you to resume training a model.
}
\details{
When using it, model weights, optimizer state are serialized at the end of
each epoch. If something fails during training simply re-running the same
script will restart the model training from the epoch right after the last
epoch that was serialized.
}
\note{
In general you will want to add this callback as the last in the callbacks
list, this way, the serialized state is likely to contain all possible changes
that other callbacks could have made at \code{'on_epoch_end'}. The default \code{weight}
attribute of this callback is \code{Inf}.

Read the checkpointing article in the pkgdown website for more
information.
}
\section{Customizing serialization}{


By default model, optimizer state and records are serialized. Callbacks can
be used to customize serialization by implementing the \code{state_dict()} and
\code{load_state_dict()} methods.
If those methods are implemented, then \code{state_dict()} is called at the end of
each epoch and \code{load_state_dict()} is called when the model is resumed.
}

\examples{
if (torch::torch_is_installed()) {
library(torch)
library(luz)

x <- torch_randn(1000, 10)
y <- torch_randn(1000, 1)

model <- nn_linear \%>\%
  setup(optimizer = optim_sgd, loss = nnf_mse_loss) \%>\%
  set_hparams(in_features = 10, out_features = 1) \%>\%
  set_opt_hparams(lr = 0.01)


# simulate a failure in the middle of epoch 5 happening only once.
callback_stop <- luz_callback(
  "interrupt",
  failed = FALSE,
  on_epoch_end = function() {
    if (ctx$epoch == 5 && !self$failed) {
      self$failed <- TRUE
      stop("Error on epoch 5")
    }
  }
)

path <- tempfile()
autoresume <- luz_callback_auto_resume(path = path)
interrupt <- callback_stop()

# try once and the model fails
try({
  results <- model \%>\% fit(
    list(x, y),
    callbacks = list(autoresume, interrupt),
    verbose = FALSE
  )
})

# model resumes and completes
results <- model \%>\% fit(
  list(x, y),
  callbacks = list(autoresume, interrupt),
  verbose = FALSE
)

get_metrics(results)

}
}
\seealso{
Other luz_callbacks: 
\code{\link{luz_callback}()},
\code{\link{luz_callback_csv_logger}()},
\code{\link{luz_callback_early_stopping}()},
\code{\link{luz_callback_interrupt}()},
\code{\link{luz_callback_keep_best_model}()},
\code{\link{luz_callback_lr_scheduler}()},
\code{\link{luz_callback_metrics}()},
\code{\link{luz_callback_mixed_precision}()},
\code{\link{luz_callback_mixup}()},
\code{\link{luz_callback_model_checkpoint}()},
\code{\link{luz_callback_profile}()},
\code{\link{luz_callback_progress}()},
\code{\link{luz_callback_resume_from_checkpoint}()},
\code{\link{luz_callback_train_valid}()}
}
\concept{luz_callbacks}
