## ----setup, echo=TRUE, include=FALSE------------------------------------------
library(knitr)
library(rasterVis)


## ----nlcd---------------------------------------------------------------------
library(raster)
nlcd <- brick('data/rs/nlcd-L1.tif')
names(nlcd) <- c("nlcd2001", "nlcd2011")

# The class names and colors for plotting
nlcdclass <- c("Water", "Developed", "Barren", "Forest", "Shrubland", "Herbaceous", "Planted/Cultivated", "Wetlands")
classdf <- data.frame(classvalue1 = c(1,2,3,4,5,7,8,9), classnames1 = nlcdclass) 

# Hex codes of colors
classcolor <- c("#5475A8", "#B50000", "#D2CDC0", "#38814E", "#AF963C", "#D1D182", "#FBF65D", "#C8E6F8") 

# Now we ratify (RAT = "Raster Attribute Table") the ncld2011 (define RasterLayer as a categorical variable). This is helpful for plotting. 
nlcd2011 <- nlcd[[2]]
nlcd2011 <- ratify(nlcd2011)
rat <- levels(nlcd2011)[[1]]

# 
rat$landcover <- nlcdclass
levels(nlcd2011) <- rat


## ----training sites-----------------------------------------------------------
# Load the training sites locations
# Set the random number generator to reproduce the results
set.seed(99)

# Sampling
samp2011 <- sampleStratified(nlcd2011, size = 200, na.rm = TRUE, sp = TRUE)
samp2011
# Number of samples in each class
table(samp2011$nlcd2011)


## ----plots, fig.height=8, fig.width=8-----------------------------------------
library(rasterVis)
plt <- levelplot(nlcd2011, col.regions = classcolor, main = 'Distribution of Training Sites')
print(plt + layer(sp.points(samp2011, pch = 3, cex = 0.5, col = 1)))


## ----landsat5-----------------------------------------------------------------
landsat5 <- stack('data/rs/centralvalley-2011LT5.tif')
names(landsat5) <- c('blue', 'green', 'red', 'NIR', 'SWIR1', 'SWIR2')


## ----extractvalues------------------------------------------------------------
# Extract the layer values for the locations
sampvals <- extract(landsat5, samp2011, df = TRUE)

# sampvals no longer has the spatial information. To keep the spatial information you use `sp=TRUE` argument in the `extract` function. 

# drop the ID column
sampvals <- sampvals[, -1]

# combine the class information with extracted values
sampdata <- data.frame(classvalue = samp2011@data$nlcd2011, sampvals)


## ----cartplot, fig.height=6, fig.width=6--------------------------------------
library(rpart) 

# Train the model
cart <- rpart(as.factor(classvalue)~., data=sampdata, method = 'class', minsplit = 5)

# print(model.class)

# Plot the trained classification tree
plot(cart, uniform=TRUE, main="Classification Tree")
text(cart, cex = 0.8)


## ----prediction---------------------------------------------------------------
# Now predict the subset data based on the model; prediction for entire area takes longer time
pr2011 <- predict(landsat5, cart, type='class')
pr2011


## ----dectree, fig.width=8, fig.height=8---------------------------------------
pr2011 <- ratify(pr2011)

rat <- levels(pr2011)[[1]]

rat$legend <- classdf$classnames

levels(pr2011) <- rat

levelplot(pr2011, maxpixels = 1e6,
          col.regions = classcolor,
          scales=list(draw=FALSE),
          main = "Decision Tree classification of Landsat 5")


## -----------------------------------------------------------------------------
library(dismo)
set.seed(99)
j <- kfold(sampdata, k = 5, by=sampdata$classvalue)
table(j)


## -----------------------------------------------------------------------------
x <- list()

for (k in 1:5) {
	train <- sampdata[j!= k, ]
	test <- sampdata[j == k, ]
	cart <- rpart(as.factor(classvalue)~., data=train, method = 'class', minsplit = 5)
	pclass <- predict(cart, test, type='class')
	# create a data.frame using the reference and prediction
	x[[k]] <- cbind(test$classvalue, as.integer(pclass))
}


## -----------------------------------------------------------------------------
y <- do.call(rbind, x)
y <- data.frame(y)
colnames(y) <- c('observed', 'predicted')


conmat <- table(y)
# change the name of the classes
colnames(conmat) <- classdf$classnames
rownames(conmat) <- classdf$classnames
conmat


## ----Accuracy Statistics------------------------------------------------------
# number of cases
n <- sum(conmat) 
n

# number of correctly classified cases per class
diag <- diag(conmat) 

# Overall Accuracy
OA <- sum(diag) / n
OA


## ----kappa--------------------------------------------------------------------
# observed (true) cases per class
rowsums <- apply(conmat, 1, sum) 
p <- rowsums / n 

# predicted cases per class
colsums <- apply(conmat, 2, sum) 
q <- colsums / n 

expAccuracy <- sum(p*q)
kappa <- (OA - expAccuracy) / (1 - expAccuracy)
kappa


## ----User/Producer accuracy---------------------------------------------------

# Producer accuracy
PA <- diag / colsums

# User accuracy
UA <- diag / rowsums

outAcc <- data.frame(producerAccuracy = PA, userAccuracy = UA)

outAcc