library(glmmTMB) # for fitting GLMMs
library(cowplot) # for plotting multiple panels together via plot_grid function
library(ggeffects) # for extracting the marginal effects of each covariate in the model
library(MuMIn) # for AIC
library(performance) # for R2
library(terra) #raster
library(tidyverse)
library(raster)
library(sf)
library(data.table)
library(mgcv)
# Ensure the dataset has the proper format with predictors and the response variable (RealDets)
<- readRDS('data/datextract.RDS')
datextract <- readRDS('data/alldat.RDS')
alldat # As before, assemble our dataset remove NAs
<- cbind(datextract, alldat) %>% drop_na() %>% mutate(RealDets = as.factor(RealDets), Transmitter = as.factor(Transmitter)) datgam
RSF with GAMs
RSFs with GAMs
This vignette uses GAMs to calculate Resource Selection Functions. This section uses the same setup as the first tab, so we will just start with brining in that data
Run GAM model
We will just use all of the data to run the GAM model in this example. GAMS do well with spatial and temporal autocorrelation!
# Fit a GAM model with spatial and temporal autocorrelation (x, y, and Date). Stay tuned for
<- gam(RealDets ~ s(hw2020, k = 4) + s(tt2020, k = 4) + s(cov2020, k = 4) + s(sdcov2020, k = 4) + s(num2020, k = 4) +
gam_model s(Transmitter, bs = "re") + # ID
s(x, y, bs = "gp"), # Spatial component
family = binomial, data = datgam, method = "REML")
Summarizing the model
# Summarize the model
summary(gam_model)
##
## Family: binomial
## Link function: logit
##
## Formula:
## RealDets ~ s(hw2020, k = 4) + s(tt2020, k = 4) + s(cov2020, k = 4) +
## s(Transmitter, bs = "re") + s(x, y, bs = "gp")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.4897 0.5552 -6.286 3.26e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(hw2020) 2.939 2.997 42.41 < 2e-16 ***
## s(tt2020) 2.753 2.951 10.52 0.0226 *
## s(cov2020) 2.981 2.999 510.66 < 2e-16 ***
## s(Transmitter) 7.498 10.000 28.60 5.01e-05 ***
## s(x,y) 31.585 31.934 811.59 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.636 Deviance explained = 57.7%
## -REML = 1343.1 Scale est. = 1 n = 3948
# Visualize smooth effects for each covariate
plot(gam_model, pages = 1, all.terms = TRUE)
# Predict probabilities for the full dataset and evaluate accuracy. Let's skip the test vs. training - it's similar to GLMMs.
$predicted_probs_gam <- predict(gam_model, newdata = datgam, type = "response")
datgam$predicted_class_gam <- ifelse(datgam$predicted_probs_gam > 0.5, 1, 0) datgam
Model accuracy
Let’s look at the model accuracy. You will see it is a lot lower than the RF model but better than the GLMM
# Confusion matrix and accuracy for the GAM
<- table(datgam$RealDets, datgam$predicted_class_gam)
conf_matrix_gam
conf_matrix_gam##
## 0 1
## 0 1779 247
## 1 244 1678
<- sum(diag(conf_matrix_gam)) / sum(conf_matrix_gam)
accuracy_gam
accuracy_gam## [1] 0.8756332
Model Interpretation
Let’s map the predicted surface! Everything else is very similar to RF, GLMM
# Predict probabilities on spatial grid using GAMs
<- rast('data/cov2020.tif') #percent SAV cover
cov_2020 <- rast('data/sdcov2020.tif') #standard deviation of cover
sdcov_2020 <- rast('data/num2020.tif') #number of SAV species
numsp_2020 <- rast('data/hw2020.tif') #Halodule wrightii cover
hw_2020 <- rast('data/tt2020.tif')
tt_2020
<- st_read('data/trainr2021_mask.shp')
extent ## Reading layer `trainr2021_mask' from data source
## `C:\Users\jonro\OneDrive\Desktop\RSF_OTN_Workshop\RSF_OTN_Workshop\data\trainr2021_mask.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 518189.1 ymin: 2776305 xmax: 521241.5 ymax: 2780157
## Projected CRS: NAD83 / UTM zone 17N
<- terra::crop(cov_2020, extent)
cov2020 <- terra::crop(sdcov_2020, extent)
sdcov2020 <- terra::crop(numsp_2020, extent)
num2020 <- terra::crop(hw_2020, extent)
hw2020 <- terra::crop(tt_2020, extent)
tt2020
<- c(cov2020, sdcov2020, num2020, hw2020, tt2020)
rastdat <- terra::project(rastdat, 'epsg:2958')
rastdat # Convert rasters into a data frame and predict onto spatial data
<- raster::as.data.frame(rastdat, xy=T) %>%
newdata_gam mutate(Transmitter = "place-holder")# Use rastdat from earlier
$predicted_probs_gam <- predict(gam_model, newdata = newdata_gam, type = "response")
newdata_gam## Warning in predict.gam(gam_model, newdata = newdata_gam, type = "response"):
## factor levels place-holder not in original fit
# Create a raster from predicted probabilities
<- rast(ext(rastdat), resolution = res(rastdat), crs = crs(rastdat))
pred_raster_gam <- newdata_gam$predicted_probs_gam
pred_raster_gam[]
# Plot the spatial predictions
plot(pred_raster_gam, main = "GAM Predicted Probabilities of Presence")
# Convert rasters to data frames
<- as.data.frame(pred_raster_gam, xy = TRUE)
df_gam colnames(df_gam)[3] <- "GAM_Prob"
# Create the GAM plot
<- ggplot(df_gam, aes(x = x, y = y, fill = GAM_Prob)) +
(gam_plot geom_tile() +
scale_fill_viridis_c(limits = c(0, 1), name = "Probability") +
coord_equal() +
labs(title = "GAM Predicted Probabilities of Presence") +
theme_minimal())
Congrats, you have used GAMs and now have gone through the entire workshop!