## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>"
)
library(rSRD)

## ----load-data----------------------------------------------------------------
path   <- system.file("extdata", "movies1994.csv", package = "rSRD")
movies <- read.csv(path, header = TRUE, sep = ";", row.names = 1,
                   check.names = FALSE)
movies

## ----preprocess---------------------------------------------------------------
movies_scaled <- utilsPreprocessDF(movies, method = "range_scale")
round(head(movies_scaled), 3)

## ----create-reference, eval = FALSE-------------------------------------------
# # Example: use the row-wise median as the reference
# movies_with_ref <- utilsCreateReference(movies_scaled[, -ncol(movies_scaled)],
#                                         method = "median")

## ----srd-values---------------------------------------------------------------
srd <- calculateSRDValues(movies_scaled, output_to_file = FALSE)
srd

## ----srd-bar, fig.width = 6, fig.height = 4, fig.alt = "Bar chart of normalised SRD values for each scoring system"----
barplot(sort(srd),
        horiz  = TRUE,
        las    = 1,
        xlab   = "Normalised SRD value",
        main   = "SRD values — movies1994",
        col    = "steelblue")

## ----srd-dist, eval = FALSE---------------------------------------------------
# sim <- calculateSRDDistribution(movies_scaled, seed = 42)
# 
# cat("XX1  (5% threshold):", sim$xx1,    "\n")
# cat("Median:             ", sim$median,  "\n")
# cat("XX19 (95% threshold):", sim$xx19,  "\n")

## ----srd-dist-hidden, echo = FALSE--------------------------------------------
# Run with a fixed seed so the vignette output is reproducible
sim <- calculateSRDDistribution(movies_scaled, seed = 42)
cat("XX1  (5% threshold):", sim$xx1,    "\n")
cat("Median:             ", sim$median,  "\n")
cat("XX19 (95% threshold):", sim$xx19,  "\n")

## ----perm-plot, eval = FALSE, fig.width = 7, fig.height = 5, fig.alt = "Permutation test plot showing SRD distribution and solution positions"----
# plotPermTest(movies_scaled, sim)

## ----perm-plot-hidden, echo = FALSE, fig.width = 7, fig.height = 5, fig.alt = "Permutation test plot showing SRD distribution and solution positions"----
plotPermTest(movies_scaled, sim)

## ----cv-hidden, echo = FALSE--------------------------------------------------
cv <- calculateCrossValidation(movies_scaled,
                               method          = "Wilcoxon",
                               number_of_folds = 7,
                               output_to_file  = FALSE,
                               seed            = 42)

## ----cv, eval = FALSE---------------------------------------------------------
# cv <- calculateCrossValidation(movies_scaled,
#                                method          = "Wilcoxon",
#                                number_of_folds = 7,
#                                output_to_file  = FALSE,
#                                seed            = 42)
# 
# cv$statistical_significance

## ----cv-plot, eval = FALSE, fig.width = 7, fig.height = 5, fig.alt = "Box-whisker plot of cross-validation SRD values by solution"----
# plotCrossValidation(cv)

## ----cv-plot-hidden, echo = FALSE, fig.width = 7, fig.height = 5, fig.alt = "Box-whisker plot of cross-validation SRD values by solution"----
plotCrossValidation(cv)

## ----heatmap, eval = FALSE----------------------------------------------------
# plotHeatmapSRD(movies_scaled)

## ----heatmap-hidden, echo = FALSE, fig.width = 7, fig.height = 6, fig.alt = "Heatmap of pairwise SRD distances between scoring systems"----
plotHeatmapSRD(movies_scaled)

## ----repro-demo, eval = FALSE-------------------------------------------------
# # Two unseeded runs -- XX1 may differ slightly
# sim_a <- calculateSRDDistribution(movies_scaled)
# sim_b <- calculateSRDDistribution(movies_scaled)
# cat("Run A -- XX1:", sim_a$xx1, "  XX19:", sim_a$xx19, "\n")
# cat("Run B -- XX1:", sim_b$xx1, "  XX19:", sim_b$xx19, "\n")
# 
# # Two seeded runs -- results are identical
# sim_1 <- calculateSRDDistribution(movies_scaled, seed = 42)
# sim_2 <- calculateSRDDistribution(movies_scaled, seed = 42)
# cat("Seed 42, run 1 -- XX1:", sim_1$xx1, "  XX19:", sim_1$xx19, "\n")
# cat("Seed 42, run 2 -- XX1:", sim_2$xx1, "  XX19:", sim_2$xx19, "\n")

## ----workflow-summary, eval = FALSE-------------------------------------------
# # 1. Load data (last column = reference)
# path   <- system.file("extdata", "movies1994.csv", package = "rSRD")
# movies <- read.csv(path, header = TRUE, sep = ";", row.names = 1,
#                    check.names = FALSE)
# 
# # 2. Preprocess
# movies_scaled <- utilsPreprocessDF(movies, method = "range_scale")
# 
# # 3. Compute SRD values
# srd <- calculateSRDValues(movies_scaled, output_to_file = FALSE)
# 
# # 4. Permutation test (set seed for reproducibility)
# sim <- calculateSRDDistribution(movies_scaled, seed = 42)
# plotPermTest(movies_scaled, sim)
# 
# # 5. Cross-validation
# cv <- calculateCrossValidation(movies_scaled,
#                                method          = "Wilcoxon",
#                                number_of_folds = 7,
#                                output_to_file  = FALSE,
#                                seed            = 42)
# plotCrossValidation(cv)