This page provides R code that can be used to generate CSV data files in a format appropriate for AADIS. The data sets referenced in the code below are found in the data folder on the workshop USB. Copy these files to your working folder (assumed to be C:\TEMP). ZIP files for the R packages referenced in the code are in the packages folder on the workshop USB. See the software page for notes on package installation.
Load libraries and set your working directory:
library(geoR); library(sp); library(rgdal); library(maptools); library(spatstat); library(raster); library(spatial)
setwd("C:\\TEMP")
Read in the weather station location details:
ws.df <- read.csv("weather_station.csv", sep = ",")
names(ws.df)
Read in the weather station data:
wsd.df <- read.csv("weather_station_data.csv", sep = ",")
names(wsd.df)
Read in the centroids of the AADIS grid:
pred.df <- read.csv("weather_grid_data.csv", sep = ",")
pred.df <- pred.df[,1:4]
head(pred.df)
Attach the longitude and latitude coordinates for each weather station to the weather data frame:
wsd.df$lon <- ws.df$longitude[match(wsd.df$site, ws.df$id)]
wsd.df$lat <- ws.df$latitude[match(wsd.df$site, ws.df$id)]
data.frame(names(wsd.df))
Read in a boundary map of the study area:
bnd.shp <- readShapePoly("USDA_AADIS_adm0-LL.shp")
proj4string(bnd.shp) <- CRS("+init=epsg:4326")
Load libraries and set your working directory:
library(readxl); library(lubridate); library(dplyr)
setwd("C:\\TEMP")
Read in US weather station location details:
ws <- data.frame(read_excel("Weather Station final list info.xlsx", sheet = 1))
ws <- ws[,c(1,2,6,7,8)]
head(ws)
Make a list of weather station data spreadsheets to be processed:
ss <- c("Pull04edit.xlsx", "Pull05edit.xlsx", "Pull06edit.xlsx", "Pull07edit.xlsx", "Pull08edit.xlsx", "Pull09edit.xlsx","Pull10edit.xlsx", "Pull11edit.xlsx", "Pull12edit.xlsx", "Pull13edit.xlsx", "Pull14edit.xlsx","Pull15edit.xlsx", "Pull16edit.xlsx", "Pull17edit.xlsx","Pull18edit.xlsx")
Create a data frame to gather results:
rval <- data.frame(mnth = -99, yr = -99, station = "N", mtemp = -99, lon = -99, lat = -99)
Take each spreadsheet in turn and calculate average temperature for each month:
for(i in 1:length(ss)){
tdat <- data.frame(read_excel(ss[i], sheet = 1,
col_types = c("text","date","numeric","numeric","numeric","numeric","numeric","numeric","numeric")))
tdat$ob <- as.Date(tdat$ob, format = "%Y-%m-%d")
tdat$yr <- as.numeric(year(tdat$ob))
# Summarise the temperature data by month:
tdat <- tdat %>%
mutate(mtemp = (tempmin + tempmax) / 2, mnth = month(ob)) %>%
group_by(yr, mnth)
tdat <- data.frame(summarise(tdat, station = first(station), mtemp = mean(mtemp, na.rm = TRUE)))
# Look up the lon and lat for this weather station:
tdat$lon <- ws$Longitude[match(tdat$station, ws$Station)]
tdat$lat <- ws$Latitude[match(tdat$station, ws$Station)]
rval <- rbind(rval, tdat)
}
Write the monthly average temperatures out to a CSV file:
rval <- rval[-1,]
head(rval)
rval <- rval[,c(3,5,6,2,1,4)]
write.table(rval, "USDA_AADIS_monthly_avg_temperature_data.csv", row.names = FALSE, col.names = TRUE, sep = ",")
This step will give you an idea if you have a sufficient number to allow smoothing to be appropriate:
wdat <- read.table("USDA_AADIS_monthly_avg_temperature_data.csv", header = TRUE, sep = ",")
table(wdat$yr, wdat$mnth)
table(wdat$station)
Plot the location of the weather stations with valid data:
bnd.shp <- readShapePoly("USDA_AADIS_adm0-LL.shp")
proj4string(bnd.shp) <- CRS("+init=epsg:4326")
tmp <- wdat %>%
group_by(station)
tmp <- data.frame(summarise(tmp, lon = min(lon), lat = min(lat)))
windows(); plot(bnd.shp)
points(x = tmp$lon, y = tmp$lat, pch = 16)
Read in the weather station data file processed above. We re-project the data into UTM which will make selection of a bandwidth for kernel smoothing more intuitive:
EPSG <- make_EPSG()
EPSG[grep("16N", EPSG$note), 1:2]
bnd.utm <- spTransform(bnd.shp, CRS("+init=epsg:32616"))
bnd.w <- as(as(bnd.utm, "SpatialPolygons"), "owin")
windows(); plot(bnd.w, axes = TRUE)
Select complete cases:
id <- complete.cases(wdat)
wdat <- wdat[id,]
Make a SpatialPointsDataFrame:
coords <- SpatialPoints(wdat[,2:3])
wdat.spdf <- SpatialPointsDataFrame(coords, data = wdat)
proj4string(wdat.spdf) <- CRS("+init=epsg:4326")
wdat.spdf <- spTransform(wdat.spdf, CRS("+init=epsg:32616"))
Set the year and month combinations to be processed and create a data frame to collect the results:
combo <- expand.grid(yr = 2011:2017, mnth = 1:12)
combo <- combo[order(combo$yr, combo$mnth),]
rval.df <- data.frame(id = pred.df$grid_ID, lat = pred.df$centre_lat, lon = pred.df$centre_long)
Loop through each row of data frame combo:
for(i in 1:nrow(combo)){
id <- wdat.spdf$yr == combo$yr[i] & wdat.spdf$mnth == combo$mnth[i]
twdat.spdf <- wdat.spdf[id,]
# Create a spatstat ppp object:
twdat.ppp <- ppp(x = coordinates(twdat.spdf)[,1], y = coordinates(twdat.spdf)[,2], marks = twdat.spdf$mtemp, window = bnd.w)
# windows(); plot(twdat.ppp)
# Inverse distance weight estimates:
twdat.idw <- idw(X = twdat.ppp, power = 2, at = "pixels", se = TRUE)
# windows(); plot(twdat.idw$estimate)
# Kernel smoothing using an 18 km bandwidth:
twdat.ksm <- Smooth(X = twdat.ppp, weights = twdat.ppp$marks, sigma = 18000,
at = "pixels", kernel = "gaussian", dimyx = c(200,200))
# windows(); plot(twdat.ksm)
# Coerce wsd.ksm from a spatstat im object to a raster object:
wsd.r <- raster(twdat.ksm)
proj4string(wsd.r) <- CRS("+init=epsg:32616")
# windows(); plot(wsd.r)
# Re-project wsd.r to WGS84:
wsd.r <- projectRaster(wsd.r, crs = CRS("+init=epsg:4326"))
# windows(); plot(wsd.r)
# Coerce wsd.r into AADIS format:
nx <- length(pred.df$grid_ID[pred.df$centre_lat == min(pred.df$centre_lat)])
ny <- length(pred.df$grid_ID[pred.df$centre_lon == min(pred.df$centre_lon)])
# Set the minimum and maximum longitude, recognising that the points used by AADIS are the centroids of each grid cell:
tclen.x <- max(pred.df$centre_lon) - min(pred.df$centre_lon)
tclen.y <- max(pred.df$centre_lat) - min(pred.df$centre_lat)
# Dimensions of each cell in the horizontal (x) and vertical (y) direction:
len.x <- tclen.x / (nx - 1)
len.y <- tclen.y / (ny - 1)
# Set the x and y limits:
xmn <- min(pred.df$centre_long) - (0.5 * len.x)
xmx <- max(pred.df$centre_long) + (0.5 * len.x)
ymn <- min(pred.df$centre_lat) - (0.5 * len.y)
ymx <- max(pred.df$centre_lat) + (0.5 * len.y)
# Generate an empty raster using the extent and number of row and column values calculated above:
aadis.r <- raster(nrows = ny, ncols = nx, xmn = xmn, xmx = xmx, ymn = ymn, ymx = ymx)
# Re-sample wsd.r to match aadis.r:
rwsd.r <- resample(x = wsd.r, y = aadis.r, method = "ngb")
# windows(); plot(rwsd.r)
# Append rwsd.r as a column to rval.df and give it an appropriate name:
rwsd.df <- as.data.frame(rwsd.r, xy = TRUE)
rwsd.df$layer[is.na(rwsd.df$layer)] <- -99
rval.df <- cbind(rval.df, rwsd.df[,3])
names(rval.df)[ncol(rval.df)] <- paste("v", combo$yr[i], "-", combo$mnth[i], sep = "")
head(rval.df)
}
Write the AADIS-formatted weather data out to a CSV file:
write.table(rval.df, "USDA_AADIS_weather_grid_data.csv", row.names = FALSE, col.names = TRUE, sep = ",")
Here we convert a digital elevation raster map directly into AADIS grid format:
mydat.r <- raster("USDA_DEM-ll.tif", sep = "")
windows(); plot(mydat.r)
Coerce mydat.r into AADIS format. What is the range of the AADIS grid in the east-west direction?
range(pred.df$centre_long)
How many points in each direction?
nx <- length(pred.df$grid_ID[pred.df$centre_lat == min(pred.df$centre_lat)])
ny <- length(pred.df$grid_ID[pred.df$centre_lon == min(pred.df$centre_lon)])
nx; ny
Answer: 475 points in the east-west direction; 200 points north-south. Set the minimum and maximum longitude, recognising that points are the centroids of each grid cell. Below "t" stands for total and "c" stands for centroid and "len" stands for length:
tclen.x <- max(pred.df$centre_lon) - min(pred.df$centre_lon)
tclen.y <- max(pred.df$centre_lat) - min(pred.df$centre_lat)
Length of each cell in the horizontal (x) and vertical (y) direction:
clen.x <- tclen.x / (nx - 1)
clen.y <- tclen.y / (ny - 1)
Set the x and y limits:
xmn <- min(pred.df$centre_long) - (0.5 * clen.x)
xmx <- max(pred.df$centre_long) + (0.5 * clen.x)
ymn <- min(pred.df$centre_lat) - (0.5 * clen.y)
ymx <- max(pred.df$centre_lat) + (0.5 * clen.y)
Generate an empty raster using these values:
aadis.r <- raster(nrows = ny, ncols = nx, xmn = xmn, xmx = xmx, ymn = ymn, ymx = ymx)
Re-sample mydat.r to match aadis.r:
rmydat.r <- resample(x = mydat.r, y = aadis.r, method = "ngb")
windows(); plot(rmydat.r)
Write rmydat.r out as a three column data frame (lon, lat, value):
rmydat.df <- as.data.frame(rmydat.r, xy = TRUE)
names(rmydat.df)[3] <- "val"
rmydat.df$val[is.na(rmydat.df$val)] <- -99
write.table(rmydat.df, "USDA_AADIS_DEM.csv", row.names = FALSE, col.names = TRUE, sep = ",")
NOT TESTED. Here we read into R a vegetation raster map where the number assigned to each raster cell is a code representing the predominant vegetation type. We then re-sample the vegetation raster map to conform to the AADIS grid format where the numeric value for each grid cell equals the proportion of a given vegetation type in that cell.
mydat.r <- raster("USDA_vegetation-ll.tif", sep = "")
windows(); plot(mydat.r)
Raster cellS with the value of 557 are of interest. Here we reclassify raster cells with a value of 557 as one and zero otherwise. Values > 0 and ≤ 556 become 0, values > 556 and ≤ 557 become 1 and values > 557 and ≤ 600 become 0:
m <- c(0, 556, 0, 556, 557, 1, 557, 600, 0)
rclmat <- matrix(m, ncol = 3, byrow = TRUE)
rmydat.r <- reclassify(x = mydat.r, rcl = rclmat)
windows(); plot(rmydat.r)
Coerce rmydat.r into AADIS format. What is the range of the AADIS grid in the east-west direction?
range(pred.df$centre_long)
How many points in each direction?
nx <- length(pred.df$grid_ID[pred.df$centre_lat == min(pred.df$centre_lat)])
ny <- length(pred.df$grid_ID[pred.df$centre_lon == min(pred.df$centre_lon)])
nx; ny
Answer: 475 points in the east-west direction; 200 points north-south. Set the minimum and maximum longitude, recognising that points are the centroids of each grid cell. Below "t" stands for total and "c" stands for centroid and "len" stands for length:
tclen.x <- max(pred.df$centre_lon) - min(pred.df$centre_lon)
tclen.y <- max(pred.df$centre_lat) - min(pred.df$centre_lat)
Length of each cell in the horizontal (x) and vertical (y) direction:
clen.x <- tclen.x / (nx - 1)
clen.y <- tclen.y / (ny - 1)
Set the x and y limits:
xmn <- min(pred.df$centre_long) - (0.5 * clen.x)
xmx <- max(pred.df$centre_long) + (0.5 * clen.x)
ymn <- min(pred.df$centre_lat) - (0.5 * clen.y)
ymx <- max(pred.df$centre_lat) + (0.5 * clen.y)
Generate an empty raster using these values:
aadis.r <- raster(nrows = ny, ncols = nx, xmn = xmn, xmx = xmx, ymn = ymn, ymx = ymx)
Calculate the ratio of rmydat.res to aadis.res to the nearest whole number for aggregation:
fact <- round(dim(rmydat.r)[1:2] / dim(aadis.r)[1:2], digits = 0)
fact
Variable fact equals 1 so there is no need (in this example) to aggregate the raster. Use this code if aggregation is not required:
armydat.r <- rmydat.r
rarmydat.r <- resample(x = armydat.r, y = aadis.r)
Use this code if aggregation is required (i.e. fact is greater than one). Here the returned value for each raster cell is the proportion with a value of one:
# armydat.r <- aggregate(x = rmydat.r, fact = fact, fun = function(x, na.rm = TRUE) {mean(x == 1, na.rm = na.rm)})
# rarmydat.r <- resample(x = armydat.rl, y = aadis.r)
Plot to check:
windows(); plot(rarmydat.r)
Write rarmydat.r out as a three column data frame (lon, lat, value):
rarmydat.df <- as.data.frame(rarmydat.r, xy = TRUE)
names(rarmydat.df)[3] <- "val"
rarmydat.df$val[is.na(rarmydat.df$val)] <- -99
write.table(rarmydat.df, "USDA_AADIS_vegetation.csv", row.names = FALSE, col.names = TRUE, sep = ",")