% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/BIOMOD_Modeling.R
\name{BIOMOD_Modeling}
\alias{BIOMOD_Modeling}
\title{Run a range of species distribution models}
\usage{
BIOMOD_Modeling(
  bm.format,
  modeling.id = as.character(format(Sys.time(), "\%s")),
  models = c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXENT", "MAXNET", "RF",
    "SRE", "XGBOOST"),
  models.pa = NULL,
  CV.strategy = "random",
  CV.nb.rep = 1,
  CV.perc = NULL,
  CV.k = NULL,
  CV.balance = NULL,
  CV.env.var = NULL,
  CV.strat = NULL,
  CV.user.table = NULL,
  CV.do.full.models = TRUE,
  OPT.data.type = "binary",
  OPT.strategy = "default",
  OPT.user.val = NULL,
  OPT.user.base = "bigboss",
  OPT.user = NULL,
  bm.options,
  nb.rep,
  data.split.perc,
  data.split.table,
  do.full.models,
  weights = NULL,
  prevalence = NULL,
  metric.eval = c("KAPPA", "TSS", "ROC"),
  var.import = 0,
  scale.models = FALSE,
  nb.cpu = 1,
  seed.val = NULL,
  do.progress = TRUE
)
}
\arguments{
\item{bm.format}{a \code{\link{BIOMOD.formated.data}} or \code{\link{BIOMOD.formated.data.PA}} 
object returned by the \code{\link{BIOMOD_FormatingData}} function}

\item{modeling.id}{a \code{character} corresponding to the name (ID) of the simulation set 
(\emph{a random number by default})}

\item{models}{a \code{vector} containing model names to be computed, must be among 
\code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS}, 
\code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd}, \code{SRE}, \code{XGBOOST}}

\item{models.pa}{(\emph{optional, default} \code{NULL}) \cr 
A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets 
are to be used, must be among \code{colnames(bm.format@PA.table)}}

\item{CV.strategy}{a \code{character} corresponding to the cross-validation selection strategy, 
must be among \code{random}, \code{kfold}, \code{block}, \code{strat}, \code{env} or 
\code{user.defined}}

\item{CV.nb.rep}{(\emph{optional, default} \code{0}) \cr
If \code{strategy = 'random'} or \code{strategy = 'kfold'}, an \code{integer} corresponding 
to the number of sets (repetitions) of cross-validation points that will be drawn}

\item{CV.perc}{(\emph{optional, default} \code{0}) \cr
If \code{strategy = 'random'}, a \code{numeric} between \code{0} and \code{1} defining the 
percentage of data that will be kept for calibration}

\item{CV.k}{(\emph{optional, default} \code{0}) \cr
If \code{strategy = 'kfold'} or \code{strategy = 'strat'} or \code{strategy = 'env'}, an 
\code{integer} corresponding to the number of partitions}

\item{CV.balance}{(\emph{optional, default} \code{'presences'}) \cr
If \code{strategy = 'strat'} or \code{strategy = 'env'}, a \code{character} corresponding 
to how data will be balanced between partitions, must be either \code{presences} or
\code{absences}}

\item{CV.env.var}{(\emph{optional}) \cr If \code{strategy = 'env'}, a \code{character} 
corresponding to the environmental variables used to build the partition. \code{k} partitions 
will be built for each environmental variables. By default the function uses all 
environmental variables available.}

\item{CV.strat}{(\emph{optional, default} \code{'both'}) \cr
If \code{strategy = 'env'}, a \code{character} corresponding to how data will partitioned 
along gradient, must be among \code{x}, \code{y}, \code{both}}

\item{CV.user.table}{(\emph{optional, default} \code{NULL}) \cr
If \code{strategy = 'user.defined'}, a \code{matrix} or \code{data.frame} defining for each 
repetition (in columns) which observation lines should be used for models calibration 
(\code{TRUE}) and validation (\code{FALSE})}

\item{CV.do.full.models}{(\emph{optional, default} \code{TRUE}) \cr
A \code{logical} value defining whether models should be also calibrated and validated over 
the whole dataset (and pseudo-absence datasets) or not}

\item{OPT.data.type}{a \code{character} corresponding to the data type to be used, must be 
either \code{binary}, \code{binary.PA}, \code{abundance}, \code{compositional}}

\item{OPT.strategy}{a \code{character} corresponding to the method to select models' 
parameters values, must be either \code{default}, \code{bigboss}, \code{user.defined}, 
\code{tuned}}

\item{OPT.user.val}{(\emph{optional, default} \code{NULL}) \cr
A \code{list} containing parameters values for some (all) models}

\item{OPT.user.base}{(\emph{optional, default} \code{bigboss}) \cr A character, 
\code{default} or \code{bigboss} used when \code{OPT.strategy = 'user.defined'}. 
It sets the bases of parameters to be modified by user defined values.}

\item{OPT.user}{(\emph{optional, default} \code{TRUE}) \cr
A \code{\link{BIOMOD.models.options}} object returned by the \code{\link{bm_ModelingOptions}} 
function}

\item{bm.options}{\emph{deprecated}, now called \code{OPT.user}}

\item{nb.rep}{\emph{deprecated}, now called \code{CV.nb.rep}}

\item{data.split.perc}{\emph{deprecated}, now called \code{CV.perc}}

\item{data.split.table}{\emph{deprecated}, now called \code{CV.user.table}}

\item{do.full.models}{\emph{deprecated}, now called \code{CV.do.full.models}}

\item{weights}{(\emph{optional, default} \code{NULL}) \cr 
A \code{vector} of \code{numeric} values corresponding to observation weights (one per 
observation, see Details)}

\item{prevalence}{(\emph{optional, default} \code{NULL}) \cr 
A \code{numeric} between \code{0} and \code{1} corresponding to the species prevalence to 
build '\emph{weighted response weights}' (see Details)}

\item{metric.eval}{a \code{vector} containing evaluation metric names to be used, must 
be among \code{POD}, \code{FAR}, \code{POFD}, \code{SR}, \code{ACCURACY}, \code{BIAS}, 
\code{ROC}, \code{TSS}, \code{KAPPA}, \code{OR}, \code{ORSS}, \code{CSI}, \code{ETS}, 
\code{BOYCE}, \code{MPA}}

\item{var.import}{(\emph{optional, default} \code{NULL}) \cr 
An \code{integer} corresponding to the number of permutations to be done for each variable to 
estimate variable importance}

\item{scale.models}{(\emph{optional, default} \code{FALSE}) \cr 
A \code{logical} value defining whether all models predictions should be scaled with a 
binomial GLM or not}

\item{nb.cpu}{(\emph{optional, default} \code{1}) \cr 
An \code{integer} value corresponding to the number of computing resources to be used to 
parallelize the single models computation}

\item{seed.val}{(\emph{optional, default} \code{NULL}) \cr 
An \code{integer} value corresponding to the new seed value to be set}

\item{do.progress}{(\emph{optional, default} \code{TRUE}) \cr 
A \code{logical} value defining whether the progress bar is to be rendered or not}
}
\value{
A \code{\link{BIOMOD.models.out}} object containing models outputs, or links to saved outputs. \cr
Models outputs are stored out of \R (for memory storage reasons) in 2 different folders 
created in the current working directory :
\enumerate{
  \item a \emph{models} folder, named after the \code{resp.name} argument of 
  \code{\link{BIOMOD_FormatingData}}, and containing all calibrated models for each 
  repetition and pseudo-absence run
  \item a \emph{hidden} folder, named \code{.BIOMOD_DATA}, and containing outputs related 
  files (original dataset, calibration lines, pseudo-absences selected, predictions, 
  variables importance, evaluation values...), that can be retrieved with 
  \href{https://biomodhub.github.io/biomod2/reference/getters.out.html}{\code{get_[...]}} 
  or \code{\link{load}} functions, and used by other \pkg{biomod2} functions, like 
  \code{\link{BIOMOD_Projection}} or \code{\link{BIOMOD_EnsembleModeling}}
}
}
\description{
This function allows to calibrate and evaluate a range of modeling techniques 
for a given species distribution. The dataset can be split up in calibration/validation parts,
and the predictive power of the different models can be estimated using a range of evaluation 
metrics (see Details).
}
\details{
\describe{
  \item{bm.format}{If pseudo absences have been added to the original dataset (see 
  \code{\link{BIOMOD_FormatingData}}), \cr \code{PA.nb.rep *(nb.rep + 1)} models will be 
  created.}
  
  \item{models}{The set of models to be calibrated on the data. 12 modeling techniques 
  are currently available :
  \itemize{
    \item \code{ANN} : Artificial Neural Network (\code{\link[nnet]{nnet}})
    \item \code{CTA} : Classification Tree Analysis (\code{\link[rpart]{rpart}})
    \item \code{FDA} : Flexible Discriminant Analysis (\code{\link[mda]{fda}})
    \item \code{GAM} : Generalized Additive Model (\code{\link[gam]{gam}}, \code{\link[mgcv]{gam}} 
    or \code{\link[mgcv]{bam}}) \cr 
    (see \code{\link{bm_ModelingOptions} for details on algorithm selection})
    \item \code{GBM} : Generalized Boosting Model, or usually called Boosted Regression Trees 
    (\code{\link[gbm]{gbm}})
    \item \code{GLM} : Generalized Linear Model (\code{\link[stats]{glm}})
    \item \code{MARS} : Multiple Adaptive Regression Splines (\code{\link[earth]{earth}})
    \item \code{MAXENT} : Maximum Entropy 
    (\href{https://biodiversityinformatics.amnh.org/open_source/maxent/}{see Maxent website})
    \item \code{MAXNET} : Maximum Entropy (\code{\link[maxnet]{maxnet}})
    \item \code{RF} : Random Forest (\code{\link[randomForest]{randomForest}})
    \item \code{RFd} : Random Forest downsampled (\code{\link[randomForest]{randomForest}})
    \item \code{SRE} : Surface Range Envelop or usually called BIOCLIM (\code{\link{bm_SRE}})
    \item \code{XGBOOST} : eXtreme Gradient Boosting Training (\code{\link[xgboost]{xgboost}})
  }}
  
  \item{models.pa}{Different models might respond differently to different numbers of 
  pseudo-absences. It is possible to create sets of pseudo-absences with different numbers 
  of points (see \code{\link{BIOMOD_FormatingData}}) and to assign only some of these 
  datasets to each single model.
  }
  
  \item{CV.[...] parameters}{Different methods are available to calibrate/validate the 
  single models (see \code{\link{bm_CrossValidation}}).}
  
  \item{OPT.[...] parameters}{Different methods are available to parameterize the 
  single models (see \code{\link{bm_ModelingOptions}} and 
  \code{\link{BIOMOD.options.dataset}}). Note that only \code{binary} data type is 
  allowed currently.
  \itemize{
    \item \code{default} : only default parameter values of default parameters of the single 
    models functions are retrieved. Nothing is changed so it might not give good results.
    \item \code{bigboss} : uses parameters pre-defined by \pkg{biomod2} team and that are 
    available in the dataset \code{\link{OptionsBigboss}}. \cr 
    \emph{to be optimized in near future}
    \item \code{user.defined} : updates default or bigboss parameters with some parameters 
    values defined by the user (but matching the format of a 
    \code{\link{BIOMOD.models.options}} object)
    \item \code{tuned} : calling the \code{\link{bm_Tuning}} function to try and optimize 
    some default values
  }
  }
  
  \item{weights & prevalence}{More or less weight can be given to some specific observations.
  \itemize{
    \item If \code{weights = prevalence = NULL}, each observation (presence or absence) will 
    have the same weight, no matter the total number of presences and absences.
    \item If \code{prevalence = 0.5}, presences and absences will be weighted equally 
    (\emph{i.e. the weighted sum of presences equals the weighted sum of absences}). 
    \item If \code{prevalence} is set below (\emph{above}) \code{0.5}, more weight will be 
    given to absences (\emph{presences}).
    \item If \code{weights} is defined, \code{prevalence} argument will be ignored, and each 
    observation will have its own weight.
    \item If pseudo-absences have been generated (\code{PA.nb.rep > 0} in 
    \code{\link{BIOMOD_FormatingData}}), weights are by default calculated such that 
    \code{prevalence = 0.5}. \emph{Automatically created \code{weights} will be \code{integer} 
    values to prevent some modeling issues.}
    \item \emph{NOTE THAT \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{RFd} and \code{SRE} 
    models do not take weights into account.}
  }}

  \item{metric.eval}{
  \describe{
    \item{simple}{
    \itemize{
      \item \code{POD} : Probability of detection (hit rate)
      \item \code{FAR} : False alarm ratio
      \item \code{POFD} : Probability of false detection (fall-out)
      \item \code{SR} : Success ratio
      \item \code{ACCURACY} : Accuracy (fraction correct)
      \item \code{BIAS} : Bias score (frequency bias)
    }
    }
    \item{complex}{
    \itemize{
      \item \code{ROC} : Relative operating characteristic
      \item \code{TSS} : True skill statistic (Hanssen and Kuipers discriminant, Peirce's 
      skill score)
      \item \code{KAPPA} : Cohen's Kappa (Heidke skill score)
      \item \code{OR} : Odds Ratio
      \item \code{ORSS} : Odds ratio skill score (Yule's Q)
      \item \code{CSI} : Critical success index (threat score)
      \item \code{ETS} : Equitable threat score (Gilbert skill score)
    }
    }
    \item{presence-only}{
    \itemize{
      \item \code{BOYCE} : Boyce index
      \item \code{MPA} : Minimal predicted area (cutoff optimizing MPA to predict 90\% of 
      presences)
    }
    }
  }
  Optimal value of each method can be obtained with the \code{\link{get_optim_value}} 
  function. Several evaluation metrics can be selected. \emph{Please refer to the 
  \href{https://www.cawcr.gov.au/projects/verification/}{CAWRC website (section "Methods for 
  dichotomous forecasts")} to get detailed description of each metric.}
  Results after modeling can be obtained through the \code{\link{get_evaluations}} function. \cr 
  Evaluation metric are calculated on the calibrating data (column \code{calibration}), on 
  the cross-validation data (column \code{validation}) or on the evaluation data 
  (column \code{evaluation}). \cr \emph{For cross-validation data, see \code{CV.[...]} 
  parameters in \code{\link{BIOMOD_Modeling}} function ; for evaluation data, see 
  \code{eval.[...]} parameters in \code{\link{BIOMOD_FormatingData}}.}
  }
  
  \item{var.import}{A value characterizing how much each variable has an impact on each model 
  predictions can be calculated by randomizing the variable of interest and computing the 
  correlation between original and shuffled variables (see \code{\link{bm_VariablesImportance}}).}
  
  \item{scale.models}{\bold{This parameter is quite experimental and it is recommended 
  not to use it. It may lead to reduction in projection scale amplitude.} Some categorical 
  models always have to be scaled (\code{FDA}, \code{ANN}), but it may be interesting to 
  scale all computed models to ensure comparable predictions (\code{0-1000} range). It might 
  be particularly useful when doing ensemble forecasting to remove the scale prediction effect 
  (\emph{the more extended projections are, the more they influence ensemble forecasting 
  results}).
  }
}
}
\examples{
library(terra)

# Load species occurrences (6 species available)
data(DataSpecies)
head(DataSpecies)

# Select the name of the studied species
myRespName <- 'GuloGulo'

# Get corresponding presence/absence data
myResp <- as.numeric(DataSpecies[, myRespName])

# Get corresponding XY coordinates
myRespXY <- DataSpecies[, c('X_WGS84', 'Y_WGS84')]

# Load environmental variables extracted from BIOCLIM (bio_3, bio_4, bio_7, bio_11 & bio_12)
data(bioclim_current)
myExpl <- terra::rast(bioclim_current)

\dontshow{
myExtent <- terra::ext(0,30,45,70)
myExpl <- terra::crop(myExpl, myExtent)
}

# ---------------------------------------------------------------------------- #
# Format Data with true absences
myBiomodData <- BIOMOD_FormatingData(resp.var = myResp,
                                     expl.var = myExpl,
                                     resp.xy = myRespXY,
                                     resp.name = myRespName)


# ---------------------------------------------------------------------------- #
# Model single models
myBiomodModelOut <- BIOMOD_Modeling(bm.format = myBiomodData,
                                    modeling.id = 'AllModels',
                                    models = c('RF', 'GLM'),
                                    CV.strategy = 'random',
                                    CV.nb.rep = 2,
                                    CV.perc = 0.8,
                                    OPT.strategy = 'bigboss',
                                    metric.eval = c('TSS','ROC'),
                                    var.import = 2,
                                    seed.val = 42)
myBiomodModelOut

# Get evaluation scores & variables importance
get_evaluations(myBiomodModelOut)
get_variables_importance(myBiomodModelOut)

# Represent evaluation scores 
bm_PlotEvalMean(bm.out = myBiomodModelOut, dataset = 'calibration')
bm_PlotEvalMean(bm.out = myBiomodModelOut, dataset = 'validation')
bm_PlotEvalBoxplot(bm.out = myBiomodModelOut, group.by = c('algo', 'run'))

# # Represent variables importance 
# bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('expl.var', 'algo', 'algo'))
# bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('expl.var', 'algo', 'run'))
# bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('algo', 'expl.var', 'run'))

# # Represent response curves 
# mods <- get_built_models(myBiomodModelOut, run = 'RUN1')
# bm_PlotResponseCurves(bm.out = myBiomodModelOut, 
#                       models.chosen = mods,
#                       fixed.var = 'median')
# bm_PlotResponseCurves(bm.out = myBiomodModelOut, 
#                       models.chosen = mods,
#                       fixed.var = 'min')
# mods <- get_built_models(myBiomodModelOut, full.name = 'GuloGulo_allData_RUN2_RF')
# bm_PlotResponseCurves(bm.out = myBiomodModelOut, 
#                       models.chosen = mods,
#                       fixed.var = 'median',
#                       do.bivariate = TRUE)


}
\seealso{
\code{\link[stats]{glm}}, \code{\link[gam]{gam}},
  \code{\link[mgcv]{gam}}, \code{\link[mgcv]{bam}}, \code{\link[gbm]{gbm}},
  \code{\link[rpart]{rpart}}, \code{\link[nnet]{nnet}},
  \code{\link[mda]{fda}}, \code{\link[earth]{earth}},
  \code{\link[randomForest]{randomForest}}, \code{\link[maxnet]{maxnet}},
  \code{\link[xgboost]{xgboost}}, \code{\link{BIOMOD_FormatingData}},
  \code{\link{bm_ModelingOptions}}, \code{\link{bm_Tuning}}, 
  \code{\link{bm_CrossValidation}},
  \code{ \link{bm_VariablesImportance}}, \code{\link{BIOMOD_Projection}},
  \code{\link{BIOMOD_EnsembleModeling}}, \code{\link{bm_PlotEvalMean}},
  \code{\link{bm_PlotEvalBoxplot}}, \code{\link{bm_PlotVarImpBoxplot}},
  \code{\link{bm_PlotResponseCurves}}

Other Main functions: 
\code{\link{BIOMOD_EnsembleForecasting}()},
\code{\link{BIOMOD_EnsembleModeling}()},
\code{\link{BIOMOD_FormatingData}()},
\code{\link{BIOMOD_LoadModels}()},
\code{\link{BIOMOD_Projection}()},
\code{\link{BIOMOD_RangeSize}()}
}
\author{
Wilfried Thuiller, Damien Georges, Robin Engler
}
\concept{Main functions}
\keyword{models}
\keyword{multivariate}
\keyword{nonlinear}
\keyword{nonparametric}
\keyword{regression}
\keyword{tree}
