#!/bin/sh
# Configure script for sd2R
# 1. Download vocabulary header files needed for compilation
# 2. Resolve ggmlR paths
# 3. Resolve OpenMP flags from R's Makeconf
# 4. Auto-detect Vulkan support

# ---- Find R and Rscript (needed early for the download fallback) ----

: ${R_HOME=$(R RHOME 2>/dev/null)}
if [ -n "$R_HOME" ]; then
  RSCRIPT="${R_HOME}/bin/Rscript"
else
  RSCRIPT="Rscript"
fi

# ---- Download vocabulary files from GitHub Releases ----

VOCAB_BASE_URL="https://github.com/Zabis13/sd2R/releases/download/assets-v2"
VOCAB_DIR="src/sd/tokenizers/vocab"
# Core tokenizer vocab/merges embedded at compile time (~64 MB total):
#   clip_merges.hpp  — CLIP  (SD1/2/SDXL, Flux clip_l/clip_g)
#   t5.hpp           — T5    (Flux, SD3)
#   umt5.hpp         — UMT5  (Wan video)
#   qwen_merges.hpp  — Qwen2 (Qwen-Image)
VOCAB_FILES="clip_merges.hpp t5.hpp umt5.hpp qwen_merges.hpp"
# Optional vocabs for additional text encoders are available on the same
# release but are NOT downloaded by default (would add ~178 MB). To enable
# them, define SD2R_FULL_VOCAB in src/Makevars.in and add the files here:
#   mistral_vocab.hpp mistral_merges.hpp  — Mistral (Chroma-Radiance, Ernie-Image)
#   gemma_vocab.hpp   gemma_merges.hpp    — Gemma   (LTX audio/video)
#   gemma2_vocab.hpp  gemma2_merges.hpp   — Gemma2  (PID)
#   gpt_oss_vocab.hpp gpt_oss_merges.hpp  — GPT-OSS (Lens)
DOWNLOAD_FAILED=0

# Find optional external download tools, with R's own download.file() as a
# fallback that is always available when neither curl nor wget is present.
DOWNLOADER=""
if command -v curl >/dev/null 2>&1; then
  DOWNLOADER="curl"
elif command -v wget >/dev/null 2>&1; then
  DOWNLOADER="wget"
fi

# download_one URL DEST -> prints diagnostics, leaves a non-empty DEST on success
download_one() {
  _url="$1"; _dest="$2"; _err=""
  if [ "$DOWNLOADER" = "curl" ]; then
    _err=$(curl -fSL --retry 3 -o "${_dest}" "${_url}" 2>&1)
  elif [ "$DOWNLOADER" = "wget" ]; then
    _err=$(wget --tries=3 -O "${_dest}" "${_url}" 2>&1)
  fi
  if [ ! -s "${_dest}" ]; then
    _err=$("$RSCRIPT" -e "tryCatch(utils::download.file('${_url}', '${_dest}', mode='wb', quiet=TRUE), error=function(e) cat(conditionMessage(e)))" 2>&1)
  fi
  [ -n "$_err" ] && printf '%s\n' "    ${_err}"
}

for f in $VOCAB_FILES; do
  if [ -f "${VOCAB_DIR}/${f}" ] && [ -s "${VOCAB_DIR}/${f}" ]; then
    echo "* ${f}: already present, skipping download"
  else
    echo "* Downloading ${f}..."
    download_one "${VOCAB_BASE_URL}/${f}" "${VOCAB_DIR}/${f}"
    if [ ! -f "${VOCAB_DIR}/${f}" ] || [ ! -s "${VOCAB_DIR}/${f}" ]; then
      DOWNLOAD_FAILED=1
      rm -f "${VOCAB_DIR}/${f}"
      echo "* ${f}: download FAILED"
    else
      echo "* ${f}: downloaded successfully"
    fi
  fi

  # Released vocab files carry CRLF line endings; strip CR so R CMD check does
  # not flag "CR or CRLF line endings" on the unpacked sources. Runs for both
  # freshly downloaded and already-present files (e.g. left by an older install).
  if [ -f "${VOCAB_DIR}/${f}" ] && [ -s "${VOCAB_DIR}/${f}" ]; then
    if tr -d '\r' < "${VOCAB_DIR}/${f}" > "${VOCAB_DIR}/${f}.tmp" 2>/dev/null; then
      mv "${VOCAB_DIR}/${f}.tmp" "${VOCAB_DIR}/${f}"
    else
      rm -f "${VOCAB_DIR}/${f}.tmp"
    fi
  fi
done

if [ "$DOWNLOAD_FAILED" -eq 1 ]; then
  echo ""
  echo "ERROR: Failed to download required vocabulary files."
  echo ""
  echo "These files are required for compilation:"
  echo "  ${VOCAB_FILES}"
  echo ""
  echo "You can download them manually from:"
  echo "  ${VOCAB_BASE_URL}"
  echo ""
  echo "Place the files into the ${VOCAB_DIR}/ directory of the package source,"
  echo "then run install.packages() again."
  echo ""
  echo "If you don't have curl or wget, install one of them:"
  echo "  sudo apt install curl    # Debian/Ubuntu"
  echo "  sudo yum install curl    # RHEL/CentOS"
  echo "  brew install curl        # macOS"
  exit 1
fi

# ---- Resolve ggmlR installed paths ----

GGMLR_LIB=$($RSCRIPT -e "cat(system.file('lib', package='ggmlR'))" 2>/dev/null)
GGMLR_INCLUDE=$($RSCRIPT -e "cat(system.file('include', package='ggmlR'))" 2>/dev/null)

if [ -z "$GGMLR_LIB" ] || [ -z "$GGMLR_INCLUDE" ]; then
  echo "ERROR: ggmlR package not found. Install it first:"
  echo "  remotes::install_github(\"Zabis13/ggmlR\")"
  exit 1
fi

echo "* ggmlR lib: ${GGMLR_LIB}"
echo "* ggmlR include: ${GGMLR_INCLUDE}"

# ---- Resolve OpenMP flags from R's Makeconf ----
#
# CRAN R CMD check enforces strict pairing of SHLIB_OPENMP_*FLAGS macros.
# For mixed C/C++ packages this creates conflicts. Solution: resolve flags
# at configure time (same approach as RcppArmadillo and ggmlR).

MAKECONF="${R_HOME}/etc/Makeconf"
OPENMP_CFLAGS=""
OPENMP_CXXFLAGS=""
OPENMP_CPPFLAGS=""

if [ -f "$MAKECONF" ]; then
  OPENMP_CFLAGS=$(grep '^SHLIB_OPENMP_CFLAGS' "$MAKECONF" | sed 's/[^=]*= *//')
  OPENMP_CXXFLAGS=$(grep '^SHLIB_OPENMP_CXXFLAGS' "$MAKECONF" | sed 's/[^=]*= *//')
fi

if [ -n "$OPENMP_CFLAGS" ] || [ -n "$OPENMP_CXXFLAGS" ]; then
  OPENMP_CPPFLAGS="-DGGML_USE_OPENMP"
  echo "* OpenMP: enabled (C: ${OPENMP_CFLAGS}, C++: ${OPENMP_CXXFLAGS})"
else
  echo "* OpenMP: not available"
fi

# ---- Auto-detect Vulkan support ----
#
# Three conditions must ALL be true to enable Vulkan:
#   1. ggmlR's libggml.a contains a *defined* ggml_backend_vk_get_device_count
#      symbol (nm type T/t). This is the authoritative check — it proves the
#      Vulkan backend was actually compiled into the static library.
#      On CRAN Fedora, ggmlR's shared lib (ggmlR.so) may report Vulkan as
#      available at runtime, but libggml.a can be built without Vulkan objects.
#      Using nm on the .a is the only reliable way to detect this mismatch.
#   2. libvulkan can be found for linking (pkg-config or known paths)

VULKAN_CPPFLAGS=""
VULKAN_LIBS=""

# Step 1: check that libggml.a has Vulkan backend compiled in
VK_IN_LIBGGML="no"
if [ -f "${GGMLR_LIB}/libggml.a" ]; then
  if nm "${GGMLR_LIB}/libggml.a" 2>/dev/null | grep -q " [Tt] .*ggml_backend_vk_get_device_count"; then
    VK_IN_LIBGGML="yes"
  fi
fi

# Step 2: find libvulkan and enable
if [ "$VK_IN_LIBGGML" = "yes" ]; then
  if pkg-config --exists vulkan 2>/dev/null; then
    VULKAN_CPPFLAGS="-DSD_USE_VULKAN $(pkg-config --cflags vulkan)"
    VULKAN_LIBS="$(pkg-config --libs vulkan)"
    echo "* Vulkan: enabled (symbol found in libggml.a, libvulkan via pkg-config)"
  elif [ -f /usr/lib/libvulkan.so ] || [ -f /usr/lib/x86_64-linux-gnu/libvulkan.so ]; then
    VULKAN_CPPFLAGS="-DSD_USE_VULKAN"
    VULKAN_LIBS="-lvulkan"
    echo "* Vulkan: enabled (symbol found in libggml.a, system libvulkan)"
  else
    echo "* Vulkan: disabled (Vulkan backend in libggml.a but libvulkan not found for linking)"
  fi
else
  echo "* Vulkan: disabled (ggml_backend_vk_get_device_count not defined in libggml.a)"
fi

# ---- Detect ggml meta backend support in libggml.a ----
#
# The meta backend (ggml 0.11.0+, ggmlR >= 0.7.x) enables multi-GPU tensor
# split for a single model. sd2R uses it as an optional "second path"
# (sd_ctx(meta_backend = TRUE)). We must only compile the meta API calls when
# the symbol is actually defined in libggml.a — otherwise linking fails on
# older ggmlR. Detection mirrors the Vulkan symbol check above (nm on the .a).
#
# Minimum supported ggmlR version for the meta backend: 0.7.8.
# ggmlR has no compile-time version macro (only the runtime ggml_version()
# C function), so the symbol presence is the authoritative compile-time gate;
# the version above is documented here for reference and runtime checks.

META_CPPFLAGS=""
if [ -f "${GGMLR_LIB}/libggml.a" ]; then
  if nm --defined-only "${GGMLR_LIB}/libggml.a" 2>/dev/null | grep -q "ggml_backend_meta_device"; then
    META_CPPFLAGS="-DSD2R_HAVE_META_BACKEND=1"
    echo "* Meta backend: enabled (ggml_backend_meta_device found in libggml.a)"
  else
    echo "* Meta backend: disabled (ggml_backend_meta_device not defined in libggml.a; need ggmlR >= 0.7.8)"
  fi
else
  echo "* Meta backend: disabled (libggml.a not found)"
fi

# ---- Generate src/Makevars from src/Makevars.in ----

sed -e "s|@GGMLR_LIB@|${GGMLR_LIB}|g" \
    -e "s|@GGMLR_INCLUDE@|${GGMLR_INCLUDE}|g" \
    -e "s|@VULKAN_CPPFLAGS@|${VULKAN_CPPFLAGS}|g" \
    -e "s|@VULKAN_LIBS@|${VULKAN_LIBS}|g" \
    -e "s|@META_CPPFLAGS@|${META_CPPFLAGS}|g" \
    -e "s|@OPENMP_CPPFLAGS@|${OPENMP_CPPFLAGS}|g" \
    -e "s|@OPENMP_CFLAGS@|${OPENMP_CFLAGS}|g" \
    -e "s|@OPENMP_CXXFLAGS@|${OPENMP_CXXFLAGS}|g" \
    src/Makevars.in > src/Makevars

echo "* Wrote src/Makevars"

# ---- Patch vendored sd/ sources for R compatibility ----
# C++ <cstdio> does '#undef printf', so -include r_ggml_compat.h macros
# don't work for .cpp/.hpp files.  Direct text replacement instead.

if [ -x tools/patch_sd_sources.sh ]; then
  ./tools/patch_sd_sources.sh src/sd
fi
