## ----setup, echo=TRUE, include=FALSE------------------------------------------ library(knitr) library(rasterVis) ## ----nlcd--------------------------------------------------------------------- library(raster) nlcd <- brick('data/rs/nlcd-L1.tif') names(nlcd) <- c("nlcd2001", "nlcd2011") # The class names and colors for plotting nlcdclass <- c("Water", "Developed", "Barren", "Forest", "Shrubland", "Herbaceous", "Planted/Cultivated", "Wetlands") classdf <- data.frame(classvalue1 = c(1,2,3,4,5,7,8,9), classnames1 = nlcdclass) # Hex codes of colors classcolor <- c("#5475A8", "#B50000", "#D2CDC0", "#38814E", "#AF963C", "#D1D182", "#FBF65D", "#C8E6F8") # Now we ratify (RAT = "Raster Attribute Table") the ncld2011 (define RasterLayer as a categorical variable). This is helpful for plotting. nlcd2011 <- nlcd[[2]] nlcd2011 <- ratify(nlcd2011) rat <- levels(nlcd2011)[[1]] # rat$landcover <- nlcdclass levels(nlcd2011) <- rat ## ----training sites----------------------------------------------------------- # Load the training sites locations # Set the random number generator to reproduce the results set.seed(99) # Sampling samp2011 <- sampleStratified(nlcd2011, size = 200, na.rm = TRUE, sp = TRUE) samp2011 # Number of samples in each class table(samp2011$nlcd2011) ## ----plots, fig.height=8, fig.width=8----------------------------------------- library(rasterVis) plt <- levelplot(nlcd2011, col.regions = classcolor, main = 'Distribution of Training Sites') print(plt + layer(sp.points(samp2011, pch = 3, cex = 0.5, col = 1))) ## ----landsat5----------------------------------------------------------------- landsat5 <- stack('data/rs/centralvalley-2011LT5.tif') names(landsat5) <- c('blue', 'green', 'red', 'NIR', 'SWIR1', 'SWIR2') ## ----extractvalues------------------------------------------------------------ # Extract the layer values for the locations sampvals <- extract(landsat5, samp2011, df = TRUE) # sampvals no longer has the spatial information. To keep the spatial information you use `sp=TRUE` argument in the `extract` function. # drop the ID column sampvals <- sampvals[, -1] # combine the class information with extracted values sampdata <- data.frame(classvalue = samp2011@data$nlcd2011, sampvals) ## ----cartplot, fig.height=6, fig.width=6-------------------------------------- library(rpart) # Train the model cart <- rpart(as.factor(classvalue)~., data=sampdata, method = 'class', minsplit = 5) # print(model.class) # Plot the trained classification tree plot(cart, uniform=TRUE, main="Classification Tree") text(cart, cex = 0.8) ## ----prediction--------------------------------------------------------------- # Now predict the subset data based on the model; prediction for entire area takes longer time pr2011 <- predict(landsat5, cart, type='class') pr2011 ## ----dectree, fig.width=8, fig.height=8--------------------------------------- pr2011 <- ratify(pr2011) rat <- levels(pr2011)[[1]] rat$legend <- classdf$classnames levels(pr2011) <- rat levelplot(pr2011, maxpixels = 1e6, col.regions = classcolor, scales=list(draw=FALSE), main = "Decision Tree classification of Landsat 5") ## ----------------------------------------------------------------------------- library(dismo) set.seed(99) j <- kfold(sampdata, k = 5, by=sampdata$classvalue) table(j) ## ----------------------------------------------------------------------------- x <- list() for (k in 1:5) { train <- sampdata[j!= k, ] test <- sampdata[j == k, ] cart <- rpart(as.factor(classvalue)~., data=train, method = 'class', minsplit = 5) pclass <- predict(cart, test, type='class') # create a data.frame using the reference and prediction x[[k]] <- cbind(test$classvalue, as.integer(pclass)) } ## ----------------------------------------------------------------------------- y <- do.call(rbind, x) y <- data.frame(y) colnames(y) <- c('observed', 'predicted') conmat <- table(y) # change the name of the classes colnames(conmat) <- classdf$classnames rownames(conmat) <- classdf$classnames conmat ## ----Accuracy Statistics------------------------------------------------------ # number of cases n <- sum(conmat) n # number of correctly classified cases per class diag <- diag(conmat) # Overall Accuracy OA <- sum(diag) / n OA ## ----kappa-------------------------------------------------------------------- # observed (true) cases per class rowsums <- apply(conmat, 1, sum) p <- rowsums / n # predicted cases per class colsums <- apply(conmat, 2, sum) q <- colsums / n expAccuracy <- sum(p*q) kappa <- (OA - expAccuracy) / (1 - expAccuracy) kappa ## ----User/Producer accuracy--------------------------------------------------- # Producer accuracy PA <- diag / colsums # User accuracy UA <- diag / rowsums outAcc <- data.frame(producerAccuracy = PA, userAccuracy = UA) outAcc