## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  message  = FALSE,
  warning  = FALSE
)

## ----setup--------------------------------------------------------------------
library(rbiodatacr)
library(dplyr)
library(sf)
library(ggplot2)

## ----species-search-----------------------------------------------------------
bdcr_species_search("Panthera onca")

## ----count-single-------------------------------------------------------------
bdcr_count("Panthera onca")

## ----count-batch--------------------------------------------------------------
species <- c(
  "Tapirus bairdii",
  "Panthera onca",
  "Ara ambiguus",
  "Bradypus variegatus"
)

conteos <- bdcr_count_batch(species)
conteos

## ----occurrences-single-------------------------------------------------------
df_jaguar <- bdcr_occurrences("Panthera onca", rows = 100)
glimpse(df_jaguar)

## ----occurrences-batch--------------------------------------------------------
spp_with_data <- filter(conteos, n_records >= 10)

lista_occ <- bdcr_occurrences_batch(
  taxa = spp_with_data$taxon,
  rows = 100
)

# Number of records per species
purrr::map_int(lista_occ, nrow)

## ----quality-check------------------------------------------------------------
df_qc <- bdcr_quality_check(df_jaguar)

count(df_qc, quality_flag, sort = TRUE)

## ----quality-filter-----------------------------------------------------------
df_clean <- filter(df_qc, quality_flag == "ok",
                         !is.na(decimalLatitude),
                         !is.na(decimalLongitude))
nrow(df_clean)

## ----map, fig.width = 7, fig.height = 6---------------------------------------
# Convert to sf
df_sf <- st_as_sf(
  df_clean,
  coords = c("decimalLongitude", "decimalLatitude"),
  crs    = 4326
)

# Load Costa Rica national boundary included in rbiodatacr
# Source: GADM (gadm.org), level 0 = country boundary
data(cr_outline)

# Map
ggplot() +
  geom_sf(data = cr_outline, fill = "gray95", color = "gray50") +
  geom_sf(data = df_sf, color = "#E63946", size = 2, alpha = 0.7) +
  labs(
    title    = "Panthera onca — BIODATACR occurrence records",
    subtitle = paste0(nrow(df_sf), " clean records"),
    caption  = "Source: BIODATACR (biodiversidad.go.cr)",
    x = "Longitude",
    y = "Latitude"
  ) +
  theme_minimal()

## ----workflow-----------------------------------------------------------------
# 1. Check availability
species <- c("Tapirus bairdii", "Panthera onca",
             "Ara ambiguus",    "Bradypus variegatus")

conteos <- bdcr_count_batch(species)

# 2. Download species with enough data
con_datos <- filter(conteos, n_records >= 10)

lista_occ <- bdcr_occurrences_batch(
  taxa = con_datos$taxon,
  rows = 200
)

# 3. Quality control
lista_limpia <- purrr::map(lista_occ, bdcr_quality_check)

# 4. Consolidate and filter
df_final <- bind_rows(lista_limpia, .id = "taxon") |>
  filter(quality_flag == "ok",
         !is.na(decimalLatitude),
         !is.na(decimalLongitude))

# 5. Summary
df_final |>
  count(taxon, sort = TRUE) |>
  rename(clean_records = n)

