Match Bathymetry and Raster Grids
Read in Data
Bathymetry
bathy_og<- raster("Data/raw/Multibeam/ElbowDec2015Cube_2m.tiff") #Original bathymetry
class : RasterLayer
dimensions : 13948, 4908, 68456784 (nrow, ncol, ncell)
resolution : 2, 2 (x, y)
extent : 184571, 194387, 3064261, 3092157 (xmin, xmax, ymin, ymax)
crs : +proj=utm +zone=17 +datum=WGS84 +units=m +no_defs
source : C:/Users/socce/Documents/Grad_School/Research/R_Projects/Elbow_Paper/Data/raw/Multibeam/ElbowDec2015Cube_2m.tiff
names : ElbowDec2015Cube_2m
values : -77.40717, -44.16052 (min, max)
Backscatter
backscatter_og<- raster("Data/raw/Multibeam/EL1_1mTimeSeriesBS_TrimmedtoMosaic_AVG800.tiff")
class : RasterLayer
dimensions : 26788, 8382, 224537016 (nrow, ncol, ncell)
resolution : 1, 1 (x, y)
extent : 185107.5, 193489.5, 3064871, 3091659 (xmin, xmax, ymin, ymax)
crs : +proj=utm +zone=17 +datum=WGS84 +units=m +no_defs
source : C:/Users/socce/Documents/Grad_School/Research/R_Projects/Elbow_Paper/Data/raw/Multibeam/EL1_1mTimeSeriesBS_TrimmedtoMosaic_AVG800.tiff
names : EL1_1mTimeSeriesBS_TrimmedtoMosaic_AVG800
values : -13.84911, -5.748498 (min, max)
EL_shp<- readOGR("Data/derived/shapefiles/EL_Shp.shp")
[1] "SpatialPolygonsDataFrame"
attr(,"package")
[1] "sp"
[1] "Extent: 777033 783941 3065379 3090118 (xmin, xmax, ymin, ymax)"
[1] "crs: +proj=utm +zone=16 +datum=WGS84 +units=m +no_defs"
Resample to macthing 10m grid
bathy<- projectRaster(from = bathy_og, res = 10, crs = make_crs("UTM 16N")) #Reproject/resample
bathy<- focal(bathy, w=matrix(data = 1, nrow = 3, ncol = 3), fun=mean, na.rm=TRUE, NAonly=TRUE) #Fill holes
names(bathy)<- "bathy"
backscatter<- projectRaster(from= backscatter_og, to =bathy) #Reproject/resample
backscatter<- focal(backscatter, w=matrix(data = 1, nrow = 3, ncol = 3), fun=mean, na.rm=TRUE, NAonly=TRUE) #Fill holes
names(backscatter)<- "backscatter"
Trim data to mask
bathy<- mask(x = bathy, EL_shp)
backscatter<- mask(x = backscatter, EL_shp)
Finalized Surfaces
Figure S1.2

Calculate Derivative Features
Determine scales of analysis
The Fibonacci sequence is a good way to systematically cover multiple-scales (Wilson, 2007).
my_window_sizes<- 1+ (2*fib_seq(10)[-c(1,2)])
print(my_window_sizes)
[1] 3 5 7 11 17 27 43 69
We will evaluate predictors at 8 different window sizes from 3x3 to 69x69
Bathymetric Derivative Features
Calculate bathymetric derivatives at various scales of analysis. This includes the slope, aspect (split into Northness and Eastness), Topographic Position Index (Whether an area is a local high or low), mean bathymetry, and standard deviation of bathymetry (a measure of rugosity).
slope<- terrain(bathy, opt= "slope", unit = "degrees")
names(slope)<- "slope_native"
aspect<- terrain(bathy, opt = c("aspect"), unit = "radians")
eastness<- sin(aspect)
names(eastness)<- "eastness_native"
northness<- cos(aspect)
names(northness)<- "northness_native"
for (i in my_window_sizes) {
print(i)
w<- matrix(data = 1, nrow = i, ncol = i)
curr_slope<- focal(x = slope$slope_native, w, fun=mean, na.rm=TRUE, pad=TRUE)
names(curr_slope)<- paste0("slope_",as.character(i),"x", as.character(i))
slope<- addLayer(slope, curr_slope)
curr_eastness<- focal(x = eastness$eastness_native, w, fun=mean, na.rm=TRUE, pad=TRUE)
names(curr_eastness)<- paste0("eastness_",as.character(i),"x", as.character(i))
eastness<- addLayer(eastness, curr_eastness)
curr_northness<- focal(x = northness$northness_native, w, fun=mean, na.rm=TRUE, pad=TRUE)
names(curr_northness)<- paste0("northness_",as.character(i),"x", as.character(i))
northness<- addLayer(northness, curr_northness)
curr_tpi<- TPI(bathy, d=c(i,i), na.rm=TRUE, pad = TRUE)
names(curr_tpi)<- paste0("tpi_",as.character(i),"x", as.character(i))
curr_BathyMean<- focal(x = bathy, w, fun=mean, na.rm=TRUE, pad=TRUE)
names(curr_BathyMean)<- paste0("BathyMean_",as.character(i),"x", as.character(i))
curr_BathySD<- focal(x = bathy, w, fun=sd, na.rm=TRUE, pad=TRUE)
names(curr_BathySD)<- paste0("BathySD_",as.character(i),"x", as.character(i))
if(i==3){
tpi<- curr_tpi
BathyMean<- curr_BathyMean
BathySD<- curr_BathySD
} else{
tpi<- addLayer(tpi, curr_tpi)
BathyMean<- addLayer(BathyMean, curr_BathyMean)
BathySD<- addLayer(BathySD, curr_BathySD)
}
}
bathy_deriv<- stack(BathyMean, BathySD, tpi, slope, eastness, northness)
bathy_deriv<- mask(x = bathy_deriv, EL_shp) #Trim back down b/c used pad and na.rm
Backscatter Derivative Features
Calculate backscatter derivatives at various scales of analysis. This includes the mean and standard deviation of bathymetry as well as texture metric derived from a Gray Level Co-occurence Matrix (GLCM).
The most common texture metrics from remote sensing were derived using the formulas from (Hall-Beyer 2017). Rotationally Invariant/Directionally Isotropic Harilick Texture Metrics were derived by calculating a symetrical GLCM with 32 gray levels. The texture metrics can be broken down into three groups: The contrast group (contrast, dissimilarity, and homogeneity), the orderliness group (ASM and Entropy), and the descriptive statistics group (mean, variance, and correlation;Hall-Beyer, 2017). At smaller window sizes, the correlation texture can sometimes be undefined (0/0) leading to holes in the resulting raster surfaces, so correlation at 3x3, 5x5, and 7x7 were removed from the analysis to prevent this.
backscatter_quant<- quantize_raster(backscatter, n_levels = 32,method = "equal prob")
for (i in my_window_sizes) {
print(i)
w<- matrix(data = 1, nrow = i, ncol = i)
curr_bs_mean<- focal(backscatter, w, fun=mean, na.rm=TRUE, pad=TRUE)
names(curr_bs_mean)<- "BackscatterMean"
curr_bs_sd<- focal(backscatter, w, fun=sd, na.rm=TRUE, pad=TRUE)
names(curr_bs_sd)<- "BackscatterSD"
curr_BSderiv<- stack(curr_bs_mean,
curr_bs_sd,
glcm_textures(backscatter_quant, w= c(i,i), n_levels=32, shift = list(c(1, 0), c(1, 1), c(0, 1), c(-1, 1)), metrics = c("glcm_contrast", "glcm_dissimilarity", "glcm_homogeneity", "glcm_ASM",
"glcm_entropy", "glcm_mean", "glcm_variance", "glcm_correlation"), quantization= "none", na_opt = "center", pad = TRUE))
names(curr_BSderiv)<- paste0(names(curr_BSderiv), "_", as.character(i), "x", as.character(i))
if(i==3){
backscatter_deriv<- curr_BSderiv} else{
backscatter_deriv<- stack(backscatter_deriv, curr_BSderiv)
rm(curr_BSderiv)
}
}
backscatter_deriv<- mask(backscatter_deriv, EL_shp)
backscatter_deriv<- dropLayer(backscatter_deriv, which(names(backscatter_deriv)=="glcm_correlation_3x3"))
backscatter_deriv<- dropLayer(backscatter_deriv, which(names(backscatter_deriv)=="glcm_correlation_5x5"))
backscatter_deriv<- dropLayer(backscatter_deriv, which(names(backscatter_deriv)=="glcm_correlation_7x7")) #remove these b/c have NA's that create holes due to zero in denominator
Georeference Ground-Truth Data
Transects
Solid lines represent transects for training data, and dashed line represents the transect where observations were withheld in order to test the accuracy of substrate predictions
Figure S1.3

Read in Video Habitat Observations
Read in Timestamps for Camera Images
cam<- read_csv("Data/derived/CBASS/camera.csv")
Read in Position and Ancillary Data Streams
Hypack<- read_hypack(list.files("Data/raw/Hypack/", pattern="\\.RAW$", full.names = TRUE))
cable_out<- read_tsv("Data/raw/payout/payout_Feb&Oct2016.tsv", col_types = list(.default=col_double(), timestamp=col_datetime()))
Lat_Long<- get_HypackPos(Hypack)
names(Lat_Long)[2:3]<- c("ShipLon", "ShipLat")
Ship_Speed<- get_HypackSpeed(Hypack)
T1_oneHz<- make_1hz(alt_path = "Data/raw/CBASS/sensors/T1_D4/altimeter_readings.tsv", compass_path = "Data/raw/CBASS/sensors/T1_D4/compass_readings.tsv", ctd_path = "Data/raw/CBASS/sensors/T1_D4/ctd_redo_sal.tsv", Lat_Long = Lat_Long, Ship_Speed = Ship_Speed, cable_out = cable_out)
T3_oneHz<- make_1hz(alt_path = "Data/raw/CBASS/sensors/T3_D1/altimeter_readings.tsv", compass_path = "Data/raw/CBASS/sensors/T3_D1/compass_readings.tsv", ctd_path = "Data/raw/CBASS/sensors/T3_D1/ctd_redo_sal.tsv", Lat_Long = Lat_Long, Ship_Speed = Ship_Speed, cable_out = cable_out)
T5_oneHz<- make_1hz(alt_path = "Data/raw/CBASS/sensors/T5_D14/altimeter_readings.tsv", compass_path = "Data/raw/CBASS/sensors/T5_D14/compass_readings.tsv", ctd_path = "Data/raw/CBASS/sensors/T5_D14/ctd_redo_sal.tsv", Lat_Long = Lat_Long, Ship_Speed = Ship_Speed, cable_out = cable_out)
T6_oneHz<- make_1hz(alt_path = "Data/raw/CBASS/sensors/T6_D13/altimeter_readings.tsv", compass_path = "Data/raw/CBASS/sensors/T6_D13/compass_readings.tsv", ctd_path = "Data/raw/CBASS/sensors/T6_D13/ctd_redo_sal.tsv", Lat_Long = Lat_Long, Ship_Speed = Ship_Speed, cable_out = cable_out)
one_hz<- bind_rows(mutate(T1_oneHz, Transect="T1"),
mutate(T3_oneHz, Transect="T3"),
mutate(T5_oneHz, Transect="T5"),
mutate(T6_oneHz, Transect="T6"))
one_hz
Estimate C-BASS Position
CBASS Position is estimated using Pythagorean theorem using the cable out (the hypotenuse) and CBASS depth (the height of the triangle). The cable out is zeroed when the the CBASS hits the water so to get true cable out from the trawl block, and the true height of this triangle, the height of the trawl block above the water (~6.1 m) is added to the recorded cable out and CBASS Depth. The layback behind the block is then calculated using pythagorean theorum. The y offset (fore/aft distance; ~24.4 m) between the trawl block and the GPS antenna is then added to this layback to get the layback of the CBASS system behind the logged GPS ship position. CBASS is assumed to follow the ship track but with a time delay. The layback distance is converted to a time delay by dividing the layback distance by the average ship speed over the past minute. To get the CBASS position for a given time, the the ship position from “x” number of seconds ago (the time delay) is grabbed from the table and assigned as the current CBASS position. For more information see the documentation and source code for mytools::calc_layback (arguments used GPS_Source=“Nothstar”, zeroed=“water”, cat_fact=1).
one_hz<- one_hz %>% mutate(Layback_m= calc_layback(payout = payout, depth = depth, GPS_Source = "Northstar", zeroed = "water", cat_fact = 1))
one_hz<- one_hz %>% mutate(Layback_sec= round(Layback_m/Ship_Speed_mps_1minAvg))
one_hz<- one_hz %>% mutate(TimeToMatch=timestamp-dseconds(Layback_sec))
temp_one_hz<- one_hz %>% select(timestamp, Transect, ShipLon, ShipLat)
names(temp_one_hz)<- c("TimeToMatch", "Transect", "CBASSLon", "CBASSLat")
one_hz<- one_hz %>% left_join(temp_one_hz, by=c("TimeToMatch","Transect")) %>% select(-TimeToMatch)
rm(temp_one_hz)
one_hz
write_csv(one_hz, "Data/derived/CBASS/onehz.csv")
Georeference Video Habitat Observations
Calculate Frame number. Videos are 1 minute long at 12 frames per second.
hab<- hab %>% mutate(Frame_num= (Vid * 12 * 60) + Sec*12)
Get timestamp for each frame to nearest second
hab<- hab %>% left_join(select(cam, timestamp, u_second, Frame_num, Transect), by= c("Transect", "Frame_num"))
hab<- hab %>% mutate(timestamp=round_date(timestamp + dmicroseconds(u_second), unit = "second")) %>% select(-u_second)
Link to position by timestamp
hab<- hab %>% left_join(select(one_hz, timestamp, Transect, CBASSLat, CBASSLon), by=c("timestamp", "Transect"))
hab
write_csv(hab, path = "Data/derived/CBASS/hab.csv")
Prepare training and validation set
Only include habitats that were the same as their previous and subsequent observations to remove transitional/mixed areas to create set used for training and validation.
TV_set<- hab %>% group_by(Transect) %>% const_hab("Substrate") %>% ungroup()
print(unique(TV_set$Substrate))
[1] "Sand" "Rock" "No_Vis"
TV_set<- TV_set %>% filter(Substrate!="No_Vis") #Remove bad visibility
TV_set$Substrate<- factor(TV_set$Substrate, levels = c("Rock", "Sand"))
TV_set<- TV_set %>% mutate(Set=ifelse(test = Transect=="T1", yes = "Validation", no = "Training")) #Reserve T1 for validation of results. Use other transects for training data
Create Spatial Object
TV_set<- TV_set %>% filter(!is.na(CBASSLat)) #Remove where there is no positioning data
TV_set<- spTransform(SpatialPointsDataFrame(coords = cbind(TV_set$CBASSLon, TV_set$CBASSLat),
data = as.data.frame(select(TV_set, -c(CBASSLon, CBASSLat))),
proj4string = make_crs("WGS84")), CRSobj = make_crs("UTM 16N"))
TV_set<- TV_set[EL_shp,] #Remove points outside beyond bounds of the study area
The ground-truth dataset consisted of 3680 observations, where each observation was the substrate classification determined from the video at 15 second intervals. This consisted of 473 observations of rock, 3195 observations of sand, and 12 observations where substrate was not discernible. After censoring substrate determinations that differed from their previous and subsequent observations, those where substrate was not visible, and observations beyond the bounds of the multibeam survey or did not have associated positioning data, there were 238 observations of rock and 2533 observations of sand. These data were then split into training and validation sets. The training data set consisted of 210observations of rock and 1947 observations of sand. The validation transect consisted of 28 observations of rock and 586 observations of sand.
Figure S1.4

hab_onehz_joined<- hab %>% left_join(one_hz)
Transects total to 109 km in length
Overlaying vertical relief on substrate
Because higher relief features relatively rare, and are fairly small relative to the error associated with positioning of the CBASS, we did not directly try to predict this in the supervised and unsupervised models. However, we can measure vertical relief as a difference between the minimum and maximum values. Total depth was calculated from the CBASS sensors as CBASS depth + CBASS altitude (where altitude measurements have been adjusted for the pitch of the system). Vertical relief for each 15 second bin was calculated as the maximum change in depth over that 15 second observation window. A box plot of the assigned visual relief class, and the measured vertical relief from the sensors is shown below. In blue, orange, and red are the relief cutoffs for low, moderate, and high relief respectively as proposed by Smith et al (2011) in their reef fish survey of the Florida Keys. This appears to match up fairly well with our observations, so we will use these cutoffs: low relief <= 1m ; 1m < moderate relief <= 2m; high relief > 2m.
Figure S1.13
one_hz<- one_hz %>% mutate(Total_Depth= depth+ altitude*cos(radians(pitch))) #Calculate Total depth
one_hz<- one_hz %>% mutate(vertical_relief_15s = NA_real_)
for (i in 8:(nrow(one_hz)-7)) {
curr_idx<- (i-7):(i+7)
if(length(unique(one_hz$Transect[curr_idx]))!=1){
next() #Prevent averaging across different transects
}
dvalues<- one_hz$Total_Depth[curr_idx]
dmin<- min(dvalues, na.rm=TRUE)
dmax= max(dvalues, na.rm=TRUE)
vrelief<- dmax-dmin
if(is.infinite(vrelief)){vrelief<-NA}
one_hz$vertical_relief_15s[i]<- vrelief
}
hab_relief<- hab %>% left_join(one_hz, by= c("timestamp","Transect")) %>% select(timestamp, Transect, Relief, vertical_relief_15s) %>% filter(Relief!="None" & Relief != "No_Vis")
hab_relief$Relief<- factor(hab_relief$Relief, levels=c("Low_Relief", "Moderate_Relief", "High_Relief"))
visual_relief_plot<- ggplot(data = hab_relief, mapping = aes(x= Relief, y=vertical_relief_15s))+
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 0, ymax = 1, fill= "dodgerblue", alpha=.2)+
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 1, ymax = 2, fill= "orange", alpha=.2)+
annotate("rect", xmin = -Inf, xmax = Inf, ymin = 2, ymax = Inf, fill= "red", alpha=.2)+
geom_boxplot()+
scale_y_continuous(breaks = seq(0, 8, by= 1), expand = c(0,0))+
ylab("Vertical Relief (m) over 15 s")+
xlab("Relief Class")
visual_relief_plot

These thresholds are then used to delineate higher relief areas directly from the bathymetry by calculating the difference between a central pixel and the minimum depth of surrounding pixels using a sliding 3x3 window.
Figure S1.14
w<- matrix(data = 1, nrow = 3,ncol = 3) #3x3 window
w[5]<- NA
min_bathy<- focal(x = bathy, w=w, fun=min, na.rm=TRUE) #Calculate minimum depth around a central pixel
min_bathy[is.infinite(min_bathy)]<- NA #Replace Inf with NA
relief<- bathy-min_bathy #Calculate vertical relief
relief_qrules<- matrix(c(-Inf,1,2, 1, 2, Inf, 1,2,3), nrow = 3,ncol = 3)
relief_classified<- raster::reclassify(relief, relief_qrules, include.lowest=FALSE, right=TRUE)
relief_classified<- as.factor(relief_classified)
levels(relief_classified)[[1]]$Relief_Class<- c("Low","Mod", "High")
Relief_Map_Plot<- tm_shape(relief, raster.downsample = FALSE)+
tm_raster(palette=pal1, midpoint = NA, style="cont", title = "Relief (m)")+
tm_graticules(lines=FALSE)
Relief_Classified_Plot<- tm_shape(st_as_stars(relief_classified, ignore_file=TRUE), raster.downsample = FALSE)+
tm_raster(palette=c("gray", "yellow", "red"), title = "Relief Class")+
tm_graticules(lines=FALSE)
tmap_arrange(Relief_Map_Plot, Relief_Classified_Plot)

Figure S1.15
sub_relief<- (sup_hab*10)+relief_classified
sub_relief[sub_relief==22]<-21
sub_relief[sub_relief==23]<-21 #Collapse sand to one category
sub_relief<- as.factor(sub_relief)
names(sub_relief)<- "Habitat"
levels(sub_relief)[[1]]$habitat<- c("LR Rock", "MR Rock", "HR Rock", "Sand")
sub_relief_plot<- tm_shape(droplevels(st_as_stars(sub_relief, ignore_file=TRUE), exclude=NA), raster.downsample = FALSE)+
tm_raster(palette = c("dodgerblue", "orange", "red", "#FFEBBE"))+
tm_graticules(lines=FALSE)
sub_relief_plot

