# refresh -----------------------

#' Refresh sourcoise cache by executing sources selected
#'
#' All scripts (passed to `sourcoise_refresh()`) are executed with logging enabled.
#'
#' The function returns the list of script executed but its main effect is a side-effect as scripts are executed and caches updates accordingly.
#' Note also that log files reflect execution and track possible errors.
#' Because of logging the execution comes with a loss in performance, which is not an issue if scripts are long to execute.
#'
#' It is possible to execute `sourcoise_refresh()` without execution forcing (`force_exec=FALSE`) or with it.
#' Forced execution means that the script is executed even if the cache is valid.
#' In the case of non forced execution, execution is triggered by other cache invalidation tests (change in source file, lapse or tacked files).
#'
#' When scripts are linked to qmds (i.e. when run in a quarto project), it is possible to unfreeeze and uncache those qmds with the option `unfreeze=TRUE`.
#' This allows to refresh the cahe and then render the qmds using the new data.
#'
#' It is possible to pass to refresh a function that will be executed before every script. This allows to load packages and declare global variables that can be used in each script.
#' If packages are loaded inside the script, then this is not needed.
#'
#' Parameters registered ins `sourcoise_status()` such as `wd` or `args` are used to execute the script.
#'
#' Defining a `priority` in `sourcoise()`, will change the order of execution of refresh. This can be set automatically using `priotirize` option.
#' After execution of one refresh, by setting higher priority to more used files.
#'
#' @param what (tibble) a tibble as generated by `sourcoise_status()`, possibly filtered, (defaut to `source_status()` ). What can also be a vector of strings to filter srouces files by name.
#' @param force_exec (boolean) (default `FALSE`) if `TRUE` code is executed, no matter what is cached
#' @param unfreeze (boolean) (default `TRUE`) when possible, unfreeze and uncache .qmd files in a quarto project when data used by those .qmd has been refreshed
#' @param quiet (boolean) (default `FALSE`) no message if TRUE
#' @param init_fn (function) (default `NULL`) execute a function before sourcing to allow initialization
#' @param root (default `NULL`) force root to be set, instead of letting the function finding the root, for advanced uses
#' @param log (character) (default `"INFO"`) log levels as in `logger::log_threshold()` (c("OFF", "INFO", ...)), comes with a small performance cost
#' @param priotirize (boolean) (defaut `TRUE`) will set priority based on pattern of execution
#' @param .progress (boolean) (default `TRUE`) displays a progression bar based on previous execution timings
#'
#' @family sourcoise
#'
#' @importFrom rlang .data
#' @return a list of r scripts (characters) executed, with timing and success and a side effect on caches
#' @export
#' @examples
#' dir <- tempdir()
#' set_sourcoise_root(dir)
#' fs::file_copy(
#'    fs::path_package("sourcoise", "some_data.R"),
#'    dir,
#'    overwrite = TRUE)
#' # Force execution
#' data <- sourcoise("some_data.R", force_exec = TRUE)
#' # we then refresh all caches
#' sourcoise_refresh()

sourcoise_refresh <- function(
    what = NULL,
    force_exec = TRUE,
    unfreeze = TRUE,
    quiet = FALSE,
    init_fn = getOption("sourcoise.init_fn"),
    root = getOption("sourcoise.root"),
    priotirize = TRUE,
    log = "INFO",
    .progress = TRUE) {

  refresh_start <- Sys.time()

  root_root <- try_find_root(root, src_in = "project")
  startup_log2("INFO", root_root)
  ww <- sourcoise_status(short = FALSE, prune = TRUE, root=root, quiet=quiet)
  n_sources <- nrow(ww)
  if(!is.null(what)) {
    if("character"%in%class(what)) {
      ww <- ww |>
        dplyr::filter(purrr::map(what, ~stringr::str_detect(ww$src,.x) ) |> purrr::reduce(`|`))
    }
    if("json_file"%in% names(what)) {
      ww <- ww |>
        dplyr::semi_join(what, dplyr::join_by(json_file))
    }
    what <- ww
  }
  if(is.null(what))
    what <- ww

  what <- what |> dplyr::filter(.data$exists)

  if(nrow(what)==0) {
    if(!quiet)
      cli::cli_alert_warning("No source files to refresh")
    return(invisible(list()))
  }

  if(!force_exec) {
    what <- what |>
      dplyr::group_by(.data$src) |>
      dplyr::filter(!any(.data$valid)) |>
      dplyr::ungroup()
  }

  # on en garde qu'un et on trie dans l'ordre des priorités
  what <- what |>
    dplyr::group_by(.data$src, .data$args) |>
    dplyr::arrange(dplyr::desc(.data$date)) |>
    dplyr::slice(1) |>
    dplyr::ungroup() |>
    dplyr::arrange(dplyr::desc(.data$priority))

  if(nrow(what)==0) {
    if(!quiet)
      cli::cli_alert_warning("No source files to refresh")
    return(invisible(list()))
  }

  if(force_exec) {
    on.exit(
      options(
        sourcoise.refreshing = FALSE,
        sourcoise.refreshing.2do = list(),
        sourcoise.refreshing.done = list(),
        sourcoise.refreshing.hit = list() ) )
    options(
      sourcoise.refreshing = TRUE,
      sourcoise.refreshing.2do = what[["src"]],
      sourcoise.refreshing.done = list(),
      sourcoise.refreshing.hit = list())
      }

  logger::log_info("Refreshing {nrow(what)} source files")
  if(!quiet)
    cli::cli_alert_info("Refreshing {nrow(what)} source files")

  if(!is.null(init_fn) && rlang::is_function(init_fn)) {
    init_fn()
    logger::log_info("Initializing with init_fn()")
    if(!quiet)
      cli::cli_alert_info("Initializing with init_fn()")
  }

  total_time <- ceiling(sum(what$timing, na.rm=TRUE))
  if(is.null(root))
    cwd <- getwd() |> path_abs()
  else
    cwd <- root
  if(.progress)
    idpgr <- cli::cli_progress_bar("refreshing", total = total_time)

  res <- purrr::pmap(
    what,
    function(src, wd, lapse, args, root, track, qmd_file, src_in, timing, log_file, data_date, ...) {
      exec_wd <- getwd()
      if(wd=="project")
        exec_wd <- root
      if(wd=="file")
        exec_wd <- fs::path_join(c(root, fs::path_dir(src))) |> fs::path_norm()
      if(wd=="qmd")
        exec_wd <- fs::path_join(c(root, fs::path_dir(qmd_file[[1]]))) |> fs::path_norm()
      src_todo <- fs::path_join(c(root, src)) |> fs::path_ext_remove()
      done <- src_todo %in% getOption("sourcoise.refreshing.done")

      src_data <- sourcoise_(
        path = src,
        force_exec = force_exec,
        track = track ,
        args = args |> as.list(),
        wd = wd,
        lapse = lapse,
        metadata = TRUE,
        quiet = TRUE,
        src_in = src_in,
        root = root,
        log = log)

      if(.progress)
        cli::cli_progress_update(inc = timing, id = idpgr)

      msrc <- fs::path_join(c(root, src)) |> fs::path_rel(cwd)
      if( src_data$ok == "exec" | done ) {
        if(src_data$data_date > data_date)
          new <- TRUE else
            new <- FALSE
          data_size <- glue::glue("{scales::label_bytes()(src_data$size)}")
          msg <- glue::glue(
            "{msrc} executed in {round(src_data$timing)} s. {ifelse(done, 'cached during refresh', '' )}")
          if(new)
            cli::cli_alert_success(
              "{msg}, {.strong new data generated} ({data_size})" ) else
                cli::cli_alert_success(
                  "{msg}, same data ({data_size})" )
      } else {
        cli::cli_alert_danger(
          "{msrc} failed (see log {.file {src_data$log_file}})" )
        cli::cli_alert(src_data$error|> errorCondition())
      }

      if(unfreeze)
        purrr::walk(src_data$qmd_file, ~{
          if(src_data$ok == "exec") {
            unfreeze(.x, root, quiet = TRUE)
            uncache(.x, root, quiet = TRUE)
          }
        })
      if(!is.null(src_data$error))
        list(src = fs::path_join(c(root, src)), ok = "error", timing = NA, size = NA)
      else
        list(src = fs::path_join(c(root, src)),
             ok = src_data$ok,
             timing = src_data$timing,
             size = src_data$size)
    }
  )

  if(.progress)
    cli::cli_process_done(id = idpgr)

  res <- purrr::transpose(res)
  dt <- difftime(Sys.time(), refresh_start, units = "secs") |> as.numeric() |> round()
  tsize <- res$size |> unlist() |>  sum(na.rm=TRUE)
  if(!quiet)
    cli::cli_alert_info("Total refresh in {dt} seconds for {scales::label_bytes()(tsize)} of data")

  if(priotirize & nrow(what)==n_sources) {
    allsrcs <- res$src |> unlist() |> fs::path_ext_remove()
    hits <- getOption("sourcoise.refreshing.hit") |> unlist() |> table()
    nohits <- setdiff(allsrcs, names(hits))
    srcs <- c(hits, rlang::set_names(rep(0, length(nohits)), nohits))
    srcs <- srcs[names(srcs)%in%allsrcs[res$ok=="exec"]]
    purrr::iwalk(srcs, ~sourcoise_priority(.y, 10 + .x))
  }
  invisible(res)
  }
