GeoMLA

Machine Learning algorithms for spatial and spatiotemporal data

166
75
R

----setup, include=FALSE------------------------------------------------

knitr::opts_chunk$set(echo = TRUE)

---- eval=FALSE, echo=TRUE----------------------------------------------

devtools::install_github(“imbs-hl/ranger”)

---- echo=TRUE----------------------------------------------------------

library(GSIF)
library(rgdal)
library(raster)
library(geoR)
library(ranger)

---- warning=FALSE------------------------------------------------------

library(gstat)
library(intamap)
library(plyr)
library(plotKML)
library(scales)
library(RCurl)
library(parallel)
library(lattice)
library(gridExtra)

------------------------------------------------------------------------

source(‘./RF_vs_kriging/R/RFsp_functions.R’)

----meuse---------------------------------------------------------------

demo(meuse, echo=FALSE)

----bufferdist----------------------------------------------------------

grid.dist0 <- GSIF::buffer.dist(meuse[“zinc”], meuse.grid[1], as.factor(1:nrow(meuse)))

------------------------------------------------------------------------

dn0 <- paste(names(grid.dist0), collapse=“+”)
fm0 <- as.formula(paste("zinc ~ ", dn0))
fm0

------------------------------------------------------------------------

ov.zinc <- over(meuse[“zinc”], grid.dist0)
rm.zinc <- cbind(meuse@data[“zinc”], ov.zinc)

------------------------------------------------------------------------

m.zinc <- ranger(fm0, rm.zinc, quantreg=TRUE, num.trees=150, seed=1)
m.zinc

------------------------------------------------------------------------

zinc.rfd <- predict(m.zinc, grid.dist0@data, type=“quantiles”, quantiles=quantiles)$predictions
str(zinc.rfd)

------------------------------------------------------------------------

meuse.grid$zinc_rfd = zinc.rfd[,2]
meuse.grid$zinc_rfd_range = (zinc.rfd[,3]-zinc.rfd[,1])/2

------------------------------------------------------------------------

zinc.geo <- as.geodata(meuse[“zinc”])
ini.v <- c(var(log1p(zinc.geo$data)),500)
zinc.vgm <- likfit(zinc.geo, lambda=0, ini=ini.v, cov.model=“exponential”)
zinc.vgm

------------------------------------------------------------------------

locs = meuse.grid@coords
zinc.ok <- krige.conv(zinc.geo, locations=locs, krige=krige.control(obj.model=zinc.vgm))
meuse.grid$zinc_ok = zinc.ok$predict
meuse.grid$zinc_ok_range = sqrt(zinc.ok$krige.var)

------------------------------------------------------------------------

meuse.grid$SW_occurrence = readGDAL(“./RF_vs_kriging/data/meuse/Meuse_GlobalSurfaceWater_occurrence.tif”)$band1[[email protected]]
meuse.grid$AHN = readGDAL(“./RF_vs_kriging/data/meuse/ahn.asc”)$band1[[email protected]]

------------------------------------------------------------------------

grids.spc = GSIF::spc(meuse.grid, as.formula(“~ SW_occurrence + AHN + ffreq + dist”))

------------------------------------------------------------------------

fm1 <- as.formula(paste("zinc ~ ", dn0, " + ", paste(names(grids.spc@predicted), collapse = “+”)))
fm1
ov.zinc1 <- over(meuse[“zinc”], grids.spc@predicted)
rm.zinc1 <- do.call(cbind, list(meuse@data[“zinc”], ov.zinc, ov.zinc1))

------------------------------------------------------------------------

m1.zinc <- ranger(fm1, rm.zinc1, importance=“impurity”, quantreg=TRUE, num.trees=150, seed=1)
m1.zinc

----rf-variableImportance, echo=FALSE-----------------------------------

xl <- as.list(ranger::importance(m1.zinc))
par(mfrow=c(1,1),oma=c(0.7,2,0,1), mar=c(4,3.5,1,0))
plot(vv <- t(data.frame(xl[order(unlist(xl), decreasing=TRUE)[10:1]])), 1:10, type = “n”, ylab = “”, yaxt = “n”, xlab = “Variable Importance (Node Impurity)”)
abline(h = 1:10, lty = “dotted”, col = “grey60”)
points(vv, 1:10)
axis(2, 1:10, labels = dimnames(vv)[[1]], las = 2)

------------------------------------------------------------------------

zinc.geo$covariate = ov.zinc1
sic.t = ~ PC1 + PC2 + PC3 + PC4 + PC5
zinc1.vgm <- likfit(zinc.geo, trend = sic.t, lambda=0, ini=ini.v, cov.model=“exponential”)
zinc1.vgm

------------------------------------------------------------------------

KC = krige.control(trend.d = sic.t, trend.l = ~ grids.spc@predicted$PC1 + grids.spc@predicted$PC2 + grids.spc@predicted$PC3 + grids.spc@predicted$PC4 + grids.spc@predicted$PC5, obj.model = zinc1.vgm)
zinc.uk <- krige.conv(zinc.geo, locations=locs, krige=KC)
meuse.grid$zinc_UK = zinc.uk$predict

------------------------------------------------------------------------

meuse@data = cbind(meuse@data, data.frame(model.matrix(~soil-1, meuse@data)))
summary(as.factor(meuse$soil1))

------------------------------------------------------------------------

fm.s1 = as.formula(paste(“soil1 ~ “, paste(names(grid.dist0), collapse=”+”), " + SW_occurrence + dist"))
rm.s1 <- do.call(cbind, list(meuse@data[“soil1”], over(meuse[“soil1”], meuse.grid), over(meuse[“soil1”], grid.dist0)))
m1.s1 <- ranger(fm.s1, rm.s1, mtry=22, num.trees=150, seed=1, quantreg=TRUE)
m1.s1

------------------------------------------------------------------------

fm.s1c <- as.formula(paste(“soil1c ~ “, paste(names(grid.dist0), collapse=”+”), " + SW_occurrence + dist"))
rm.s1$soil1c = as.factor(rm.s1$soil1)
m2.s1 <- ranger(fm.s1c, rm.s1, mtry=22, num.trees=150, seed=1, probability=TRUE, keep.inbag=TRUE)
m2.s1

------------------------------------------------------------------------

pred.regr <- predict(m1.s1, cbind(meuse.grid@data, grid.dist0@data), type=“response”)
pred.clas <- predict(m2.s1, cbind(meuse.grid@data, grid.dist0@data), type=“se”)

------------------------------------------------------------------------

fm.s = as.formula(paste(“soil ~ “, paste(names(grid.dist0), collapse=”+”), " + SW_occurrence + dist"))
fm.s

------------------------------------------------------------------------

rm.s <- do.call(cbind, list(meuse@data[“soil”], over(meuse[“soil”], meuse.grid), over(meuse[“soil”], grid.dist0)))
m.s <- ranger(fm.s, rm.s, mtry=22, num.trees=150, seed=1, probability=TRUE, keep.inbag=TRUE)
m.s

------------------------------------------------------------------------

m.s0 <- ranger(fm.s, rm.s, mtry=22, num.trees=150, seed=1)
m.s0

------------------------------------------------------------------------

pred.soil_rfc = predict(m.s, cbind(meuse.grid@data, grid.dist0@data), type=“se”)
pred.grids = meuse.grid[“soil”]
pred.grids@data = do.call(cbind, list(pred.grids@data, data.frame(pred.soil_rfc$predictions), data.frame(pred.soil_rfc$se)))
names(pred.grids) = c(“soil”, paste0(“pred_soil”, 1:3), paste0(“se_soil”, 1:3))
str(pred.grids@data)

---- comment=FALSE, warning=FALSE---------------------------------------

library(intamap)
library(gstat)
data(sic2004)
coordinates(sic.val) <- ~x+y
sic.val$value <- sic.val$joker
coordinates(sic.test) <- ~x+y
pred.sic2004 <- interpolate(sic.val, sic.test, maximumTime = 90)

------------------------------------------------------------------------

sd(sic.test$joker-pred.sic2004$predictions$mean)

------------------------------------------------------------------------

bbox=sic.val@bbox
bbox[,“min”]=bbox[,“min”]-4000
bbox[,“max”]=bbox[,“max”]+4000
de2km = plotKML::vect2rast(sic.val, cell.size=2000, bbox=bbox)
de2km$mask = 1
de2km = as(de2km[“mask”], “SpatialPixelsDataFrame”)
de.dist0 <- GSIF::buffer.dist(sic.val[“joker”], de2km, as.factor(1:nrow(sic.val@data)))

------------------------------------------------------------------------

ov.de = over(sic.val[“joker”], de.dist0)
de.dn0 <- paste(names(de.dist0), collapse=“+”)
de.fm1 <- as.formula(paste("joker ~ ", de.dn0))
de.rm = do.call(cbind, list(sic.val@data[“joker”], ov.de))
m1.gamma <- ranger(de.fm1, de.rm[complete.cases(de.rm),], mtry=1)
m1.gamma

------------------------------------------------------------------------

de2km$gamma_rfd1 = predict(m1.gamma, de.dist0@data)$predictions
ov.test <- over(sic.test, de2km[“gamma_rfd1”])
sd(sic.test$joker-ov.test$gamma_rfd1, na.rm=TRUE)

----rf-SIC2004joker, echo=FALSE, fig.width=6, fig.cap=“RFsp predicted gamma radiometrics with two extreme values.”----

par(oma=c(0,0,0,1), mar=c(0,0,4,3))
plot(raster(de2km[“gamma_rfd1”]), col=rev(bpy.colors()))
points(sic.val, pch=“+”)

------------------------------------------------------------------------

carson <- read.csv(file=“./RF_vs_kriging/data/NRCS/carson_CLYPPT.csv”)
str(carson)

------------------------------------------------------------------------

carson$DEPTH.f = ifelse(is.na(carson$DEPTH), 20, carson$DEPTH)
carson1km <- readRDS(“./RF_vs_kriging/data/NRCS/carson_covs1km.rds”)
coordinates(carson) <- ~X+Y
proj4string(carson) = carson1km@proj4string
rm.carson <- cbind(as.data.frame(carson), over(carson[“CLYPPT”], carson1km))
fm.clay <- as.formula(paste("CLYPPT ~ DEPTH.f + ", paste(names(carson1km), collapse = “+”)))
fm.clay

------------------------------------------------------------------------

rm.carson <- rm.carson[complete.cases(rm.carson[,all.vars(fm.clay)]),]
rm.carson.s <- rm.carson[sample.int(size=2000, nrow(rm.carson)),]

------------------------------------------------------------------------

m.clay <- ranger(fm.clay, rm.carson.s, num.trees=150, mtry=25, case.weights=1/(rm.carson.s$CLYPPT.sd^2), quantreg = TRUE)
m.clay

------------------------------------------------------------------------

geochem = readRDS(“./RF_vs_kriging/data/geochem/geochem.rds”)
usa5km = readRDS(“./RF_vs_kriging/data/geochem/usa5km.rds”)

---- warning=FALSE------------------------------------------------------

str(usa5km@data)

negative values are in fact detection limits:

for(i in c(“PB_ICP40”,“CU_ICP40”,“K_ICP40”,“MG_ICP40”)) { geochem[,i] = ifelse(geochem[,i] < 0, abs(geochem[,i])/2, geochem[,i]) }
coordinates(geochem) = ~coords.x1 + coords.x2
proj4string(geochem) = “+proj=longlat +ellps=clrk66 +towgs84=-9.0,151.0,185.0,0.0,0.0,0.0,0.0 +no_defs”
geochem$TYPEDESC = as.factor(paste(geochem$TYPEDESC))
summary(geochem$TYPEDESC)
geochem = spTransform(geochem, CRS(proj4string(usa5km)))
usa5km.spc = spc(usa5km, ~geomap+globedem+dTRI+nlights03+dairp+sdroads)
ov.geochem = over(x=geochem, y=usa5km.spc@predicted)

------------------------------------------------------------------------

t.vars = c(“PB_ICP40”,“CU_ICP40”,“K_ICP40”,“MG_ICP40”)

------------------------------------------------------------------------

df.lst = lapply(t.vars, function(i){cbind(geochem@data[,c(i,“TYPEDESC”)], ov.geochem)})
names(df.lst) = t.vars
for(i in t.vars){colnames(df.lst[[i]])[1] = “Y”}
for(i in t.vars){df.lst[[i]]$TYPE = i}

------------------------------------------------------------------------

rm.geochem = do.call(rbind, df.lst)
type.mat = data.frame(model.matrix(~TYPE-1, rm.geochem))
typed.mat = data.frame(model.matrix(~TYPEDESC-1, rm.geochem))

------------------------------------------------------------------------

rm.geochem.e = do.call(cbind, list(rm.geochem[,c(“Y”,paste0(“PC”,1:21))], type.mat, typed.mat))

------------------------------------------------------------------------

fm.g = as.formula(paste0("Y ~ ", paste0(names(rm.geochem.e)[-1], collapse = “+”)))
fm.g
m1.geochem <- ranger::ranger(fm.g, rm.geochem.e[complete.cases(rm.geochem.e),], importance = “impurity”, seed = 1)
m1.geochem

------------------------------------------------------------------------

co_prec = readRDS(“./RF_vs_kriging/data/st_prec/boulder_prcp.rds”)
str(co_prec)

------------------------------------------------------------------------

co_prec$cdate = floor(unclass(as.POSIXct(as.POSIXct(paste(co_prec$DATE), format=“%Y-%m-%d”)))/86400)
co_prec$doy = as.integer(strftime(as.POSIXct(paste(co_prec$DATE), format=“%Y-%m-%d”), format = “%j”))

------------------------------------------------------------------------

co_locs.sp = co_prec[!duplicated(co_prec$STATION),c(“STATION”,“LATITUDE”,“LONGITUDE”)]
coordinates(co_locs.sp) = ~ LONGITUDE + LATITUDE
proj4string(co_locs.sp) = CRS(“+proj=longlat +datum=WGS84”)
co_grids = readRDS(“./RF_vs_kriging/data/st_prec/boulder_grids.rds”)
co_grids = as(co_grids, “SpatialPixelsDataFrame”)
co_locs.sp = spTransform(co_locs.sp, co_grids@proj4string)
sel.co <- over(co_locs.sp, co_grids[1])
co_locs.sp <- co_locs.sp[!is.na(sel.co$elev_1km),]

------------------------------------------------------------------------

grid.distP <- GSIF::buffer.dist(co_locs.sp[“STATION”], co_grids[1], as.factor(1:nrow(co_locs.sp)))
dnP <- paste(names(grid.distP), collapse=“+”)

------------------------------------------------------------------------

fmP <- as.formula(paste(“PRCP ~ cdate + doy + elev_1km + PRISM_prec +”, dnP))
fmP

------------------------------------------------------------------------

ov.prec <- do.call(cbind, list(co_locs.sp@data, over(co_locs.sp, grid.distP), over(co_locs.sp, co_grids[c(“elev_1km”,“PRISM_prec”)])))
rm.prec <- plyr::join(co_prec, ov.prec)
rm.prec <- rm.prec[complete.cases(rm.prec[,c(“PRCP”,“elev_1km”,“cdate”)]),]