7. Spatial autocorrelation¶

7.1 Introduction¶

This handout accompanies Chapter 7 in O’Sullivan and Unwin (2010).

Get the data¶

You can download the data here: auctb, or with the script below.

dir.create('data', showWarnings = FALSE)
filename <- 'auctb.rds'
localfile <- file.path('data', filename)
if (!file.exists(localfile)) {
}


7.2 Area of a polygon¶

Create a polygon like in Figure 7.2 (page 192).

pol <- matrix(c(1.7, 2.6, 5.6, 8.1, 7.2, 3.3, 1.7, 4.9, 7, 7.6, 6.1, 2.7, 2.7, 4.9), ncol=2)
library(raster)
sppol <- spPolygons(pol)


For illustration purposes, we create the “negative area” polygon as well

negpol <- rbind(pol[c(1,6:4),], cbind(pol[4,1], 0), cbind(pol[1,1], 0))
spneg <- spPolygons(negpol)


Now plot

cols <- c('light gray', 'light blue')
plot(sppol, xlim=c(1,9), ylim=c(1,10), col=cols[1], axes=FALSE, xlab='', ylab='',
lwd=2, yaxs="i", xaxs="i")
segments(pol[,1], pol[,2], pol[,1], 0)
text(pol, LETTERS[1:6], pos=3, col='red', font=4)
arrows(1, 1, 9, 1, 0.1, xpd=T)
arrows(1, 1, 1, 9, 0.1, xpd=T)
text(1, 9.5, 'y axis', xpd=T)
text(10, 1, 'x axis', xpd=T)
legend(6, 9.5, c('"positive" area', '"negative" area'), fill=cols, bty = "n")


Compute area

p <- rbind(pol, pol[1,])
x <- p[-1,1] - p[-nrow(p),1]
y <- (p[-1,2] + p[-nrow(p),2]) / 2
sum(x * y)
## [1] 23.81


Or simply use an existing function

# make sure that the coordinates are interpreted as planar (not longitude/latitude)
crs(sppol) <- '+proj=utm +zone=1'
area(sppol)
## [1] 23.81


7.3 Contact numbers¶

“Contact numbers” for the lower 48 states. Get the polygons:

library(raster)
## Error in getData("GADM", country = "USA", level = 1): unused arguments (country = "USA", level = 1)
usa <- usa[! usa$NAME_1 %in% c('Alaska', 'Hawaii'), ] ## Error in eval(expr, envir, enclos): object 'usa' not found  To find adjacent polygons, we can use the spdep package. library(spdep)  We use poly2nb to create a neighbors-list. And from that a neighbors matrix. # patience, this takes a while: wus <- poly2nb(usa, row.names=usa$OBJECTID, queen=FALSE)
wus
wmus <- nb2mat(wus, style='B', zero.policy = TRUE)
## Error in nb2mat(wus, style = "B", zero.policy = TRUE): object 'wus' not found
dim(wmus)


Compute the number of neighbors for each state.

i <- rowSums(wmus)
round(100 * table(i) / length(i), 1)
## i
##   2
## 100


Apparantly, I am using a different data set than OSU (compare the above with table 7.1). By changing the level argument to 2 in the getData function you can run the same for counties. Which county has 13 neighbors?

7.4 Spatial structure¶

library(raster)
## Warning in readRDS("data/auctb.rds"): invalid or incomplete compressed data


I did not have the tuberculosis data so I guestimated them from figure 7.7. Compare:

par(mai=c(0,0,0,0))
classes <- seq(0,450,50)
cuts <- cut(pols$TB, classes) ## Error in cut(pols$TB, classes): object 'pols' not found
n <- length(classes)
cols <- rev(gray(0:n / n))
plot(pols, col=cols[as.integer(cuts)])
legend('bottomleft', levels(cuts), fill=cols)


Create a Rooks’ case neighborhood object.

wr <- poly2nb(pols, row.names=pols$Id, queen=FALSE) ## Error in extends(class(pl), "SpatialPolygons"): object 'pols' not found class(wr) ## Error in eval(expr, envir, enclos): object 'wr' not found summary(wr) ## Error in summary(wr): object 'wr' not found  Plot the links between the polygons. par(mai=c(0,0,0,0)) plot(pols, col='gray', border='blue') ## Error in plot(pols, col = "gray", border = "blue"): object 'pols' not found xy <- coordinates(pols) ## Error in coordinates(pols): object 'pols' not found plot(wr, xy, col='red', lwd=2, add=TRUE) ## Error in plot(wr, xy, col = "red", lwd = 2, add = TRUE): object 'wr' not found  We can create a matrix from the links list. wm <- nb2mat(wr, style='B') ## Error in nb2mat(wr, style = "B"): object 'wr' not found dim(wm) ## Error in eval(expr, envir, enclos): object 'wm' not found  And inspect the content or wr and wm wr[1:6] ## Error in eval(expr, envir, enclos): object 'wr' not found wm[1:6,1:11] ## Error in eval(expr, envir, enclos): object 'wm' not found  Question 1:Explain the meaning of the first lines returned by wr[1:6]) Now let’s recreate Figure 7.6 (page 202). We already have the first one (Rook’s case adjacency, plotted above). Queen’s case adjacency: wq <- poly2nb(pols, row.names=pols$Id, queen=TRUE)


Distance based:

wd1 <- dnearneigh(xy, 0, 1000)
wd25 <- dnearneigh(xy, 0, 2500)


Nearest neighbors:

k3 <- knn2nb(knearneigh(xy, k=3, RANN=FALSE))
## Error in knearneigh(xy, k = 3, RANN = FALSE): object 'xy' not found
k6 <- knn2nb(knearneigh(xy, k=6, RANN=FALSE))
## Error in knearneigh(xy, k = 6, RANN = FALSE): object 'xy' not found


Delauny:

library(deldir)
d <- deldir(xy[,1], xy[,2], suppressMsge=TRUE)


Lag-two Rook:

wr2 <- wr
for (i in 1:length(wr)) {
lag1 <- wr[[i]]
lag2 <- wr[lag1]
lag2 <- sort(unique(unlist(lag2)))
lag2 <- lag2[!(lag2 %in% c(wr[[i]], i))]
wr2[[i]] <- lag2
}


And now we plot them all using the plotit function.

  plotit <- function(nb, lab='') {
plot(pols, col='gray', border='white')
text(2659066, 6482808, paste0('(', lab, ')'), cex=1.25)
}

par(mfrow=c(4, 2), mai=c(0,0,0,0))
plotit(wr, 'i')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plotit(wq, 'ii')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plotit(wd1, 'iii')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plotit(wd25, 'iv')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plotit(k3, 'v')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plotit(k6, 'vi')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plot(pols, col='gray', border='white')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
## Error in plot(d, wlines = "triang", add = TRUE, pch = 20): object 'd' not found
text(2659066, 6482808, '(vii)', cex=1.25)
## Error in text.default(2659066, 6482808, "(vii)", cex = 1.25): plot.new has not been called yet
plotit(wr2, 'viii')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found


7.5 Moran’s I¶

Below I compute Moran’s index according to formula 7.7 on page 205 of OSU.

$I = \frac{n}{\sum_{i=1}^n (y_i - \bar{y})^2} \frac{\sum_{i=1}^n \sum_{j=1}^n w_{ij}(y_i - \bar{y})(y_j - \bar{y})}{\sum_{i=1}^n \sum_{j=1}^n w_{ij}}$

The number of observations

n <- length(pols)


Values ‘y’ and ‘ybar’ (the mean of y).

y <- pols$TB ## Error in eval(expr, envir, enclos): object 'pols' not found ybar <- mean(y)  Now we need $(y_i - \bar{y})(y_j - \bar{y})$ That is, (yi-ybar)(yj-ybar) for all pairs. I show two methods to compute that. Method 1: dy <- y - ybar g <- expand.grid(dy, dy) yiyj <- g[,1] * g[,2]  Method 2: yi <- rep(dy, each=n) yj <- rep(dy) yiyj <- yi * yj  Make a matrix of the multiplied pairs pm <- matrix(yiyj, ncol=n) round(pm[1:6, 1:9]) ## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] ## [1,] 1 1 2 1 1 -1 -2 -2 -1 ## [2,] 2 2 5 4 4 -2 -5 -3 -3 ## [3,] 1 1 4 3 -1 -1 -4 -2 -2 ## [4,] -1 -2 -2 -1 1 1 2 1 1 ## [5,] -2 -5 -5 -4 2 2 6 3 1 ## [6,] -1 -3 -3 -2 1 3 3 2 0  And multiply this matrix with the weights to set to zero the value for the pairs that are not adjacent. pmw <- pm * wm ## Error in eval(expr, envir, enclos): object 'wm' not found wm[1:6, 1:9] ## Error in eval(expr, envir, enclos): object 'wm' not found round(pmw[1:6, 1:9]) ## Error in eval(expr, envir, enclos): object 'pmw' not found  We sum the values, to get this bit of Moran’s I: $\sum_{i=1}^n \sum_{j=1}^n w_{ij}(y_i - \bar{y})(y_j - \bar{y})$ spmw <- sum(pmw) ## Error in eval(expr, envir, enclos): object 'pmw' not found spmw ## Error in eval(expr, envir, enclos): object 'spmw' not found  The next step is to divide this value by the sum of weights. That is easy. smw <- sum(wm) ## Error in eval(expr, envir, enclos): object 'wm' not found sw <- spmw / smw ## Error in eval(expr, envir, enclos): object 'spmw' not found  And compute the inverse variance of y vr <- n / sum(dy^2)  The final step to compute Moran’s I MI <- vr * sw ## Error in eval(expr, envir, enclos): object 'sw' not found MI ## Error in eval(expr, envir, enclos): object 'MI' not found  This is how you can (theoretically) estimate the expected value of Moran’s I. That is, the value you would get in the absence of spatial autocorrelation. Note that it is not zero for small values of n. EI <- -1/(n-1) EI ## [1] -0.1111111  After doing this ‘by hand’, now let’s use the spdep package to compute Moran’s I and do a significance test. To do this we need to create a listw type spatial weights object. To get the same value as above we use style='B' to use binary (TRUE/FALSE) distance weights. ww <- nb2listw(wr, style='B') ## Error in nb2listw(wr, style = "B"): object 'wr' not found ww ## Error in eval(expr, envir, enclos): object 'ww' not found  On to the moran function. Have a look at ?moran. The function is defined as moran(y, ww, n, Szero(ww)). Note the odd arguments n and S0. I think they are odd, because ww has that information. Anyway, we supply them and it works. There probably are cases where it makes sense to use other values. moran(pols$TB, ww, n=length(ww$neighbours), S0=Szero(ww)) ## Error in moran(pols$TB, ww, n = length(ww$neighbours), S0 = Szero(ww)): object 'ww' not found  Note that the global sum ow weights Szero(ww) ## Error in unlist(listw$weights): object 'ww' not found


Should be the same as

sum(pmw != 0)


Now we can test for significance. First analytically, using linear regression based logic and assumptions.

moran.test(pols$TB, ww, randomisation=FALSE) ## Error in moran.test(pols$TB, ww, randomisation = FALSE): object 'ww' not found


And now using Monte Carlo simulation — which is the preferred method. In fact, the only good method to use.

moran.mc(pols$TB, ww, nsim=99) ## Error in moran.mc(pols$TB, ww, nsim = 99): object 'ww' not found


Question 2: How do you interpret these results (the significance tests)?

Question 3: What would a good value be for nsim?

To make a Moran scatter plot we first get the neighbouring values for each value.

n <- length(pols)
ms <- cbind(id=rep(1:n, each=n), y=rep(y, each=n), value=as.vector(wm * y))


Remove the zeros

ms <- ms[ms[,3] > 0, ]


And compute the average neighbour value

ams <- aggregate(ms[,2:3], list(ms[,1]), FUN=mean)
## Error in aggregate(ms[, 2:3], list(ms[, 1]), FUN = mean): object 'ms' not found
ams <- ams[,-1]
colnames(ams) <- c('y', 'spatially lagged y')
## Error in colnames(ams) <- c("y", "spatially lagged y"): object 'ams' not found


Finally, the plot.

plot(ams)
reg <- lm(ams[,2] ~ ams[,1])
abline(reg, lwd=2)
abline(h=mean(ams[,2]), lt=2)
abline(v=ybar, lt=2)
## Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet


Note that the slope of the regression line:

coefficients(reg)[2]


is almost the same as Moran’s I.

Here is a more direct approach to accomplish the same thing (but hopefully the above makes it clearer how this is actually computed). Note the row standardisation of the weights matrix:

rwm <- mat2listw(wm, style='W')
# Checking if rows add up to 1
mat <- listw2mat(rwm)
apply(mat, 1, sum)[1:15]


Now we can plot

moran.plot(y, rwm)

gearyC <- ((n-1)/sum(( "----")\^2)) * sum(wm * (" --- ")\^2) / (2 * sum(wm))