## ----landsat------------------------------------------------------------------
library(terra)

# We read the 6 bands from the Landsat image we previously used
raslist <- paste0('data/rs/LC08_044034_20170614_B', 2:7, ".tif")
landsat <- rast(raslist)
names(landsat) <- c('blue', 'green', 'red', 'NIR', 'SWIR1', 'SWIR2')


## ----samples, fig.height=4.23-------------------------------------------------
# load polygons with land use land cover information
samp <- readRDS("data/rs/lcsamples.rds")

# check the distribution of the polygons
plot(samp)
text(samp, samp$class)


## ----sample-points, fig.height=4.23-------------------------------------------
set.seed(1)
# generate point samples from the polygons 
ptsamp <- spatSample(samp, 200, method="random")
plot(ptsamp, "class")


## ----nlcd, fig.height=4.23----------------------------------------------------
nlcd <- rast('data/rs/nlcd-L1.tif')
names(nlcd) <- c("nlcd2001", "nlcd2011")
nlcd2011 <- nlcd[[2]]

# assign class names as categories (levels)
nlcdclass <- c("Water", "Developed", "Barren", "Forest", "Shrubland", "Herbaceous", "Cultivated", "Wetlands")
classdf <- data.frame(value = c(1,2,3,4,5,7,8,9), names = nlcdclass) 
levels(nlcd2011) <- classdf

# colors (as hexidecimal codes)
classcolor <- c("#5475A8", "#B50000", "#D2CDC0", "#38814E", "#AF963C", "#D1D182", "#FBF65D", "#C8E6F8") 

# plot the locations on top of the original nlcd raster
plot(nlcd2011, col=classcolor)
ptlonlat <- project(ptsamp, crs(nlcd2011))
points(ptlonlat)


## -----------------------------------------------------------------------------
# Sampling
samp2011 <- spatSample(nlcd2011, size = 200, method="regular")

# Number of samples in each class
table(samp2011[,1])



## ----extractvalues------------------------------------------------------------
# extract the reflectance values for the locations 
df <- extract(landsat, ptsamp, ID=FALSE)

# Quick check for the extracted values
head(df)

# combine lulc class information with extracted values
sampdata <- data.frame(class = ptsamp$class, df)


## ----cart-train---------------------------------------------------------------
library(rpart) 

# Train the model
cartmodel <- rpart(as.factor(class)~., data = sampdata, method = 'class', minsplit = 5)



## ----cart-plot, fig.height=6, fig.width=6-------------------------------------
# print trained model
print(cartmodel)

# Plot the trained classification tree
plot(cartmodel, uniform=TRUE, main="Classification Tree")
text(cartmodel, cex = 1)


## ----prediction, fig.height=5.3, fig.width=9.5--------------------------------
classified <- predict(landsat, cartmodel, na.rm = TRUE)
classified
plot(classified)


## ----combine-results1---------------------------------------------------------
lulc <- which.max(classified)
lulc


## ----combine-results2, fig.width=9, fig.height=7------------------------------
cls <- c("built","cropland","fallow","open","water")
df <- data.frame(id = 1:5, class=cls)
levels(lulc) <- df
lulc

mycolor <- c("darkred", "yellow", "burlywood", "cyan", "blue")
plot(lulc, col=mycolor)


## ----kfold-setup--------------------------------------------------------------
set.seed(99)

# number of folds
k <- 5
j <- sample(rep(1:k, each = round(nrow(sampdata))/k))
table(j)


## ----k-fold-------------------------------------------------------------------

x <- list()

for (k in 1:5) {
	train <- sampdata[j!= k, ]
	test <- sampdata[j == k, ]
	cart <- rpart(as.factor(class)~., data=train, method = 'class', 
	              minsplit = 5)
	pclass <- predict(cart, test, na.rm = TRUE)
	# assign class to maximum probablity
	pc <- apply(pclass, 1, which.max)
	# use labels instead of numbers 
	pc <- colnames(pclass)[pc]
	# create a data.frame using the reference and prediction
	x[[k]] <- cbind(test$class, pc)
}


## ----confusion-matrix---------------------------------------------------------
y <- do.call(rbind, x)
y <- data.frame(y)
colnames(y) <- c('observed', 'predicted')

# confusion matrix
conmat <- table(y)
print(conmat)


## ----overallaccuracy----------------------------------------------------------
# number of total cases/samples
n <- sum(conmat) 
n

# number of correctly classified cases per class
diag <- diag(conmat) 

# Overall Accuracy
OA <- sum(diag) / n
OA


## ----kappa--------------------------------------------------------------------
# observed (true) cases per class
rowsums <- apply(conmat, 1, sum) 
p <- rowsums / n 

# predicted cases per class
colsums <- apply(conmat, 2, sum) 
q <- colsums / n 

expAccuracy <- sum(p*q)
kappa <- (OA - expAccuracy) / (1 - expAccuracy)
kappa


## ----User/Producer accuracy---------------------------------------------------
# Producer accuracy
PA <- diag / colsums

# User accuracy
UA <- diag / rowsums

outAcc <- data.frame(producerAccuracy = PA, userAccuracy = UA)
outAcc