rbiodatacr is an R client for querying BIODATACR, the national
biodiversity information platform of Costa Rica managed by the Technical
Office of CONAGEBIO. The platform is built on the Atlas of Living Australia (ALA) API
infrastructure.
Before downloading occurrence records, use
bdcr_species_search() to verify that the species name is
recognized by BIODATACR and to retrieve its taxonomic identifier
(GUID).
bdcr_species_search("Panthera onca")
#> # A tibble: 2 × 7
#> name guid commonName scientificName rank taxonomicStatus nameComplete
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 Panthera o… 5219… "" Panthera onca… spec… accepted Panthera on…
#> 2 Panthera o… 5219… "" Panthera onca… subs… accepted Panthera on…The function may return more than one row when both the species and
subspecies are registered. The guid column contains the
unique identifier for each taxonomic concept — useful for precise
queries.
Use bdcr_count() to check how many occurrence records
are available before downloading.
For multiple species at once use bdcr_count_batch(),
which returns a tidy tibble with one row per species.
species <- c(
"Tapirus bairdii",
"Panthera onca",
"Ara ambiguus",
"Bradypus variegatus"
)
conteos <- bdcr_count_batch(species)
conteos
#> # A tibble: 4 × 2
#> taxon n_records
#> <chr> <int>
#> 1 Tapirus bairdii 1
#> 2 Panthera onca 313
#> 3 Ara ambiguus 1216
#> 4 Bradypus variegatus 4151bdcr_occurrences() downloads records for a single
species and returns a tibble with 15 fields relevant for biodiversity
analysis.
df_jaguar <- bdcr_occurrences("Panthera onca", rows = 100)
glimpse(df_jaguar)
#> Rows: 100
#> Columns: 15
#> $ scientificName <chr> "Panthera onca subsp. centralis (Mearns, 1901)", "Pan…
#> $ vernacularName <chr> "Central American Jaguar", "Jaguar Panthera onca", "J…
#> $ decimalLatitude <dbl> 10.91970, 9.95000, 10.47563, 10.68948, 10.48542, 10.5…
#> $ decimalLongitude <dbl> -85.01460, -84.00000, -83.46852, -84.14154, -83.81592…
#> $ year <int> 1993, NA, 2021, 2013, 2013, NA, 2013, NA, 2022, 2013,…
#> $ month <chr> "06", NA, "12", "06", "04", NA, "10", NA, "05", "09",…
#> $ basisOfRecord <chr> "PreservedSpecimen", "PreservedSpecimen", "HumanObser…
#> $ dataResourceName <chr> "Modelado de la distribución geográfica de mamíferos …
#> $ country <chr> "Costa Rica", "Costa Rica", "Costa Rica", "Costa Rica…
#> $ family <chr> "Felidae", "Felidae", "Felidae", "Felidae", "Felidae"…
#> $ species <chr> "Panthera onca", "Panthera onca", "Panthera onca", "P…
#> $ collector <chr> "NO DISPONIBLE", "Ch. d'Eternod", "UACFel (SINAC-Pant…
#> $ license <chr> "other", "other", "other", "other", "other", "other",…
#> $ geospatialKosher <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,…
#> $ taxonomicKosher <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,…For multiple species use bdcr_occurrences_batch(), which
returns a named list of tibbles — one per species.
spp_with_data <- filter(conteos, n_records >= 10)
lista_occ <- bdcr_occurrences_batch(
taxa = spp_with_data$taxon,
rows = 100
)
# Number of records per species
purrr::map_int(lista_occ, nrow)
#> Panthera onca Ara ambiguus Bradypus variegatus
#> 100 100 100bdcr_quality_check() adds a quality_flag
column to the occurrences tibble. Possible flags are:
| Flag | Condition |
|---|---|
"ok" |
No issues detected |
"no_coords" |
Missing coordinates |
"geospatial_issue" |
geospatialKosher == FALSE |
"taxonomic_issue" |
taxonomicKosher == FALSE |
"old_record" |
Year before minimum threshold (default 1950) |
df_qc <- bdcr_quality_check(df_jaguar)
count(df_qc, quality_flag, sort = TRUE)
#> # A tibble: 1 × 2
#> quality_flag n
#> <chr> <int>
#> 1 ok 100Keep only clean records:
df_clean <- filter(df_qc, quality_flag == "ok",
!is.na(decimalLatitude),
!is.na(decimalLongitude))
nrow(df_clean)
#> [1] 100Convert the clean tibble to an sf object and plot the
records over Costa Rica.
# Convert to sf
df_sf <- st_as_sf(
df_clean,
coords = c("decimalLongitude", "decimalLatitude"),
crs = 4326
)
# Load Costa Rica national boundary included in rbiodatacr
# Source: GADM (gadm.org), level 0 = country boundary
data(cr_outline)
# Map
ggplot() +
geom_sf(data = cr_outline, fill = "gray95", color = "gray50") +
geom_sf(data = df_sf, color = "#E63946", size = 2, alpha = 0.7) +
labs(
title = "Panthera onca — BIODATACR occurrence records",
subtitle = paste0(nrow(df_sf), " clean records"),
caption = "Source: BIODATACR (biodiversidad.go.cr)",
x = "Longitude",
y = "Latitude"
) +
theme_minimal()# 1. Check availability
species <- c("Tapirus bairdii", "Panthera onca",
"Ara ambiguus", "Bradypus variegatus")
conteos <- bdcr_count_batch(species)
# 2. Download species with enough data
con_datos <- filter(conteos, n_records >= 10)
lista_occ <- bdcr_occurrences_batch(
taxa = con_datos$taxon,
rows = 200
)
# 3. Quality control
lista_limpia <- purrr::map(lista_occ, bdcr_quality_check)
# 4. Consolidate and filter
df_final <- bind_rows(lista_limpia, .id = "taxon") |>
filter(quality_flag == "ok",
!is.na(decimalLatitude),
!is.na(decimalLongitude))
# 5. Summary
df_final |>
count(taxon, sort = TRUE) |>
rename(clean_records = n)
#> # A tibble: 3 × 2
#> taxon clean_records
#> <chr> <int>
#> 1 Panthera onca 200
#> 2 Ara ambiguus 199
#> 3 Bradypus variegatus 199