Skip to contents

Rebuild a get_range() model repeatedly from subsets of the occurrence data stored in a getRange object and evaluate each rebuild against held-out observations.

Usage

cv_range(
  range_object = NULL,
  cv = "random-cv",
  nfolds = 5,
  nblocks = 2,
  backpoints = 10000
)

Arguments

range_object

getRange object. Typically returned by get_range().

cv

Character. String specifying the cross-validation strategy: "random-cv" or "block-cv".

nfolds

Numeric. Number of folds.

nblocks

Numeric. Multiplier used when cv = "block-cv" to define the total number of spatial blocks as nfolds * nblocks.

backpoints

Numeric. Number of regularly spaced background points used as pseudo-absences. Default is 10000.

Value

A data frame with one row per fold plus a Mean row, and the columns TP, FA, TA, FP, Precision, Sensitivity, Specificity, and TSS.

Details

The function rebuilds the range map nfolds times. In each iteration, one fold is reserved for evaluation and the remaining folds are used for training.

Two strategies are available: random cross-validation and spatial block cross-validation. The latter reduces the influence of spatial autocorrelation by grouping nearby observations before splitting them across folds.

Because true absences are generally unavailable, the evaluation uses a regular grid of background points as pseudo-absences and reports precision, sensitivity, specificity, and TSS.

References

Roberts, D. R., Bahn, V., Ciuti, S., Boyce, M. S., Elith, J., Guillera‐ Arroita, G., ... & Dormann, C. F. (2017). Cross‐validation strategies for data with temporal, spatial, hierarchical, or phylogenetic structure. Ecography, 40(8), 913-929.

Chauvier, Y., Zimmermann, N. E., Poggiato, G., Bystrova, D., Brun, P., & Thuiller, W. (2021). Novel methods to correct for observer and sampling bias in presence‐only species distribution models. Global Ecology and Biogeography, 30(11), 2312-2325.

Examples

if (FALSE) { # \dontrun{
###########################################
### Example plot
###########################################

# Load available ecoregions
eco.terra <- read_ecoreg(
    ecoreg_name = "eco_terra",
    save_dir = NULL,
    format = "sf"
)

# First download the worldwide observations of Panthera tigris from GBIF
obs.pt <- get_gbif(sp_name = "Panthera tigris")

# Build a range map from occurrence points
range.tiger <- get_range(
    occ_coord = obs.pt,
    ecoreg = eco.terra,
    ecoreg_name = "ECO_NAME",
    format = "sf"
)

pt.test <- cv_range(
    range_object = range.tiger,
    cv = "block-cv",
    nfolds = 5,
    nblocks = 2
);pt.test


###########################################
### Package manuscript plot (Fig 2a-b)
###########################################

# Root and package
root_dir <- list.files(
    system.file(package = "gbif.range"),
    pattern = "extdata",
    full.names = TRUE
)
if (!dir.exists(file.path(root_dir, "fig_plots"))) {
    dir.create(file.path(root_dir, "fig_plots"))
}
if (!requireNamespace("colorspace", quietly = TRUE)) {
  install.packages("colorspace")
}

###########
##### Plant #########
###########

# Preliminary
spdf.world <- rnaturalearth::ne_countries(type = "countries",returnclass = "sv")
shp.lonlat <- terra::vect(
    paste0(
        system.file(package = "gbif.range"),
        "/extdata/shp_lonlat.shp"
    )
)
obs.arcto <- get_gbif("Arctostaphylos alpinus", geo = shp.lonlat, grain = 1)
rst <- terra::rast(
    paste0(
        system.file(package = "gbif.range"),
        "/extdata/rst.tif"
    )
)
my.eco <- make_ecoreg(rst, 200)
range.arcto <- get_range(
    occ_coord = obs.arcto,
    ecoreg = my.eco,
    ecoreg_name = "EcoRegion",
    res = 0.05,
    format = "SpatRaster"
)
ext.temp <- terra::ext(range.arcto$rangeOutput)
ext.temp <- c(ext.temp[1]-0.6, ext.temp[2]+0.05,
                ext.temp[3]-0.05, ext.temp[4]+0.05)

# Create pseudo-absences
    # First remove observations considered outliers in get_range()
xy.df <- range.arcto$init.args$occ_coord
r.ext <- terra::ext(range.arcto$rangeOutput)
Xrm.cond <- xy.df$decimalLongitude >= r.ext[1] &
                    xy.df$decimalLongitude <= r.ext[2]
Yrm.cond <-  xy.df$decimalLatitude >= r.ext[3] &
                    xy.df$decimalLatitude <= r.ext[4]
xy.df <- xy.df[Xrm.cond & Yrm.cond,]

    # Samples n regular background points over the original range extent
x.interv <- (r.ext[2] - r.ext[1]) / (sqrt(1e4)-1)
y.interv <- (r.ext[4] - r.ext[3]) / (sqrt(1e4)-1)
lx <- seq(r.ext[1], r.ext[2], x.interv)
ly <- seq(r.ext[3], r.ext[4], y.interv)
bp.xy <- expand.grid(decimalLongitude = lx, decimalLatitude = ly)

    # Combine observations with background
obs.xy <- xy.df[, c("decimalLongitude","decimalLatitude")]
all.xy <- rbind(obs.xy, bp.xy)
all.xy$Pres <- 0
all.xy[1:nrow(obs.xy), "Pres"] <- 1

    # Run block-cv
xy.pres <- all.xy$Pres
cv.strat <- make_blocks(
    nfolds = 5,
    df = all.xy[, c("decimalLongitude","decimalLatitude")],
    nblocks = 5*2,
    pres = xy.pres
)
all.xy$bcv <- cv.strat
all.xy[all.xy$bcv%in%1, "col"] <- "#e41a1c"
all.xy[all.xy$bcv%in%2, "col"] <- "#377eb8"
all.xy[all.xy$bcv%in%3, "col"] <- "#4daf4a"
all.xy[all.xy$bcv%in%4, "col"] <- "#984ea3"
all.xy[all.xy$bcv%in%5, "col"] <- "#ff7f00"
all.xy[all.xy$Pres%in%1, "col"] <- colorspace::darken(
    all.xy[all.xy$Pres%in%1, "col"],
    amount = 0.5
)

# Plot
    # Evaluate
ar.test <- cv_range(
    range_object = range.arcto,
    cv = "block-cv",
    nfolds = 5,
    nblocks = 2
)

    # First extract the world at the arcto extent and divide
    # presences/absences/outliers
world.local <- terra::crop(spdf.world, ext.temp)
world.local.ar <- terra::aggregate(world.local)
pres <- all.xy[all.xy$Pres%in%1, ]
abs <- all.xy[all.xy$Pres%in%0, ]
pres_coords <- as.data.frame(pres[, c("decimalLongitude", "decimalLatitude")])
id.in <- terra::extract(range.arcto$rangeOutput, pres_coords)
pres <- pres[!is.na(id.in[,2]), ]

    # Continue plotting
png(
    paste0(
        root_dir,
        "/fig_plots/fig2_arcto_cv.png"
    ),
    width = 100,
    height = 70,
    unit = "cm",
    res = 100,
    pointsize = 110
)
par(
    mfrow = c(1,1),
    mar = c(5,5,5,20),
    lwd = 1,
    cex = 0.5
)
terra::plot(world.local.ar, col = "#bcd1bc", axes = FALSE, lwd = 2)
graphics::points(abs, col = paste0(abs$col, "50"), pch = 16, cex = 0.7)
terra::plot(
    terra::as.polygons(range.arcto$rangeOutput),
    border = "black",
    lwd = 6,
    col = "#63636370",
    add = TRUE
)
graphics::points(pres, col = paste0(pres$col,"90"), pch = 16, cex = 1.5)
text(
    6.6,43.2,
    paste("Mean TSS =", round(tail(ar.test[,"TSS"],1),2)),
    cex = 1.5,
    font = 2
)
text(
    7.2,42.8,
    paste("Mean Precision =", round(tail(ar.test[,"Precision"],1),2)),
    cex = 1.5,
    font = 2
)
dev.off()

###########
##### Tiger #########
###########


# Convert the range polygon to raster
tiger_rast <- terra::rasterize(
    range.tiger$rangeOutput,
    terra::rast(
        terra::ext(range.tiger$rangeOutput),
        resolution = 0.1
    )
)

# Preliminary
ext.temp <- terra::ext(tiger_rast)
ext.temp <- c(ext.temp[1]-0.2, ext.temp[2]+0.2,
                ext.temp[3]-0.2, ext.temp[4]+0.2)

# Create pseudo-absences
    # First remove observations considered outliers in get_range()
xy.df <- range.tiger$init.args$occ_coord
r.ext <- terra::ext(tiger_rast)
Xrm.cond <- xy.df$decimalLongitude >= r.ext[1] &
                xy.df$decimalLongitude <= r.ext[2]
Yrm.cond <-  xy.df$decimalLatitude >= r.ext[3] &
                xy.df$decimalLatitude <= r.ext[4]
xy.df <- xy.df[Xrm.cond & Yrm.cond,]

    # Samples n regular background points over the original range extent
x.interv <- (r.ext[2] - r.ext[1]) / (sqrt(1e4) - 1)
y.interv <- (r.ext[4] - r.ext[3]) / (sqrt(1e4) - 1)
lx <- seq(r.ext[1], r.ext[2], x.interv)
ly <- seq(r.ext[3], r.ext[4], y.interv)
bp.xy <- expand.grid(decimalLongitude = lx, decimalLatitude = ly)

    # Combine observations with background
obs.xy <- xy.df[,c("decimalLongitude", "decimalLatitude")]
all.xy <- rbind(obs.xy, bp.xy)
all.xy$Pres <- 0
all.xy[1:nrow(obs.xy), "Pres"] <- 1

    # Run block-cv
xy.pres <- all.xy$Pres
cv.strat <- make_blocks(
    nfolds = 5,
    df = all.xy[, c("decimalLongitude","decimalLatitude")],
    nblocks = 5*2,
    pres = xy.pres
)
all.xy$bcv <- cv.strat
all.xy[all.xy$bcv%in%1,"col"] <- "#e41a1c"
all.xy[all.xy$bcv%in%2,"col"] <- "#377eb8"
all.xy[all.xy$bcv%in%3,"col"] <- "#4daf4a"
all.xy[all.xy$bcv%in%4,"col"] <- "#984ea3"
all.xy[all.xy$bcv%in%5,"col"] <- "#ff7f00"
all.xy[all.xy$Pres%in%1,"col"] <- colorspace::darken(
    all.xy[all.xy$Pres%in%1, "col"],
    amount = 0.5
)

# Plot
    # First extract the world at the tiger extent and divide
    # presences/absences/outliers
world.local <- terra::crop(spdf.world, ext.temp)
world.local.ti <- terra::aggregate(world.local)
pres <- all.xy[all.xy$Pres%in%1, ]
abs <- all.xy[all.xy$Pres%in%0, ]
pres_coords <- as.data.frame(pres[, c("decimalLongitude", "decimalLatitude")])
id.in <- terra::extract(tiger_rast, pres_coords)
pres <- pres[!is.na(id.in[,2]),]

    # Continue plotting
png(
    paste0(
        root_dir,
        "/fig_plots/fig2_tiger_cv.png"
    ),
    width = 100,
    height = 70,
    unit = "cm",
    res = 100,
    pointsize = 110
)
par(mfrow = c(1,1), mar = c(5,5,5,20), lwd = 1, cex = 0.5)
terra::plot(world.local.ti, col = "#bcd1bc", axes = FALSE, lwd = 2)
graphics::points(abs, col = paste0(abs$col,"50"), pch = 16, cex = 0.6)
terra::plot(
    terra::as.polygons(tiger_rast),
    border = "black",
    lwd = 6,
    col = "#63636370",
    add = TRUE
)
graphics::points(pres, col = paste0(pres$col,"80"), pch = 16, cex = 1.6)
text(86,49,
    paste("Mean TSS =", round(tail(pt.test[,"TSS"],1),2)),
    cex = 1.5,
    font = 2
)
text(90.4,45.4,
    paste("Mean Precision =", round(tail(pt.test[,"Precision"],1),2)),
    cex = 1.5, font = 2
)
dev.off()

} # }