7. Spatial autocorrelation

7.1 Introduction

This handout accompanies Chapter 7 in O’Sullivan and Unwin (2010).

Get the data

You can download the data here: auctb, or with the script below.

dir.create('data', showWarnings = FALSE)
filename <- 'auctb.rds'
localfile <- file.path('data', filename)
if (!file.exists(localfile)) {
    download.file(paste0('https://biogeo.ucdavis.edu/data/rspatial/', filename), dest=localfile)
}

7.2 Area of a polygon

Create a polygon like in Figure 7.2 (page 192).

pol <- matrix(c(1.7, 2.6, 5.6, 8.1, 7.2, 3.3, 1.7, 4.9, 7, 7.6, 6.1, 2.7, 2.7, 4.9), ncol=2)
library(raster)
sppol <- spPolygons(pol)

For illustration purposes, we create the “negative area” polygon as well

negpol <- rbind(pol[c(1,6:4),], cbind(pol[4,1], 0), cbind(pol[1,1], 0))
spneg <- spPolygons(negpol)

Now plot

cols <- c('light gray', 'light blue')
plot(sppol, xlim=c(1,9), ylim=c(1,10), col=cols[1], axes=FALSE, xlab='', ylab='',
      lwd=2, yaxs="i", xaxs="i")
plot(spneg, col=cols[2], add=T)
plot(spneg, add=T, density=8, angle=-45, lwd=1)
segments(pol[,1], pol[,2], pol[,1], 0)
text(pol, LETTERS[1:6], pos=3, col='red', font=4)
arrows(1, 1, 9, 1, 0.1, xpd=T)
arrows(1, 1, 1, 9, 0.1, xpd=T)
text(1, 9.5, 'y axis', xpd=T)
text(10, 1, 'x axis', xpd=T)
legend(6, 9.5, c('"positive" area', '"negative" area'), fill=cols, bty = "n")

image0

Compute area

p <- rbind(pol, pol[1,])
x <- p[-1,1] - p[-nrow(p),1]
y <- (p[-1,2] + p[-nrow(p),2]) / 2
sum(x * y)
## [1] 23.81

Or simply use an existing function

# make sure that the coordinates are interpreted as planar (not longitude/latitude)
crs(sppol) <- '+proj=utm +zone=1'
area(sppol)
## [1] 23.81

7.3 Contact numbers

“Contact numbers” for the lower 48 states. Get the polygons:

library(raster)
usa <- getData('GADM', country='USA', level=1)
## Error in getData("GADM", country = "USA", level = 1): unused arguments (country = "USA", level = 1)
usa <- usa[! usa$NAME_1 %in% c('Alaska', 'Hawaii'), ]
## Error in eval(expr, envir, enclos): object 'usa' not found

To find adjacent polygons, we can use the spdep package.

library(spdep)

We use poly2nb to create a neighbors-list. And from that a neighbors matrix.

# patience, this takes a while:
wus <- poly2nb(usa, row.names=usa$OBJECTID, queen=FALSE)
## Error in extends(class(pl), "SpatialPolygons"): object 'usa' not found
wus
## Error in eval(expr, envir, enclos): object 'wus' not found
wmus <- nb2mat(wus, style='B', zero.policy = TRUE)
## Error in nb2mat(wus, style = "B", zero.policy = TRUE): object 'wus' not found
dim(wmus)
## Error in eval(expr, envir, enclos): object 'wmus' not found

Compute the number of neighbors for each state.

i <- rowSums(wmus)
## Error in rowSums(wmus): object 'wmus' not found
round(100 * table(i) / length(i), 1)
## i
##   2
## 100

Apparantly, I am using a different data set than OSU (compare the above with table 7.1). By changing the level argument to 2 in the getData function you can run the same for counties. Which county has 13 neighbors?

7.4 Spatial structure

Read the Auckland data (download here).

library(raster)
pols <- readRDS('data/auctb.rds')
## Warning in readRDS("data/auctb.rds"): invalid or incomplete compressed data
## Error in readRDS("data/auctb.rds"): error reading from connection

I did not have the tuberculosis data so I guestimated them from figure 7.7. Compare:

par(mai=c(0,0,0,0))
classes <- seq(0,450,50)
cuts <- cut(pols$TB, classes)
## Error in cut(pols$TB, classes): object 'pols' not found
n <- length(classes)
cols <- rev(gray(0:n / n))
plot(pols, col=cols[as.integer(cuts)])
## Error in plot(pols, col = cols[as.integer(cuts)]): object 'pols' not found
legend('bottomleft', levels(cuts), fill=cols)
## Error in levels(cuts): object 'cuts' not found

Create a Rooks’ case neighborhood object.

wr <- poly2nb(pols, row.names=pols$Id, queen=FALSE)
## Error in extends(class(pl), "SpatialPolygons"): object 'pols' not found
class(wr)
## Error in eval(expr, envir, enclos): object 'wr' not found
summary(wr)
## Error in summary(wr): object 'wr' not found

Plot the links between the polygons.

par(mai=c(0,0,0,0))
plot(pols, col='gray', border='blue')
## Error in plot(pols, col = "gray", border = "blue"): object 'pols' not found
xy <- coordinates(pols)
## Error in coordinates(pols): object 'pols' not found
plot(wr, xy, col='red', lwd=2, add=TRUE)
## Error in plot(wr, xy, col = "red", lwd = 2, add = TRUE): object 'wr' not found

We can create a matrix from the links list.

wm <- nb2mat(wr, style='B')
## Error in nb2mat(wr, style = "B"): object 'wr' not found
dim(wm)
## Error in eval(expr, envir, enclos): object 'wm' not found

And inspect the content or wr and wm

wr[1:6]
## Error in eval(expr, envir, enclos): object 'wr' not found
wm[1:6,1:11]
## Error in eval(expr, envir, enclos): object 'wm' not found

Question 1:Explain the meaning of the first lines returned by wr[1:6])

Now let’s recreate Figure 7.6 (page 202).

We already have the first one (Rook’s case adjacency, plotted above). Queen’s case adjacency:

wq <- poly2nb(pols, row.names=pols$Id, queen=TRUE)
## Error in extends(class(pl), "SpatialPolygons"): object 'pols' not found

Distance based:

wd1 <- dnearneigh(xy, 0, 1000)
## Error in dnearneigh(xy, 0, 1000): object 'xy' not found
wd25 <- dnearneigh(xy, 0, 2500)
## Error in dnearneigh(xy, 0, 2500): object 'xy' not found

Nearest neighbors:

k3 <- knn2nb(knearneigh(xy, k=3, RANN=FALSE))
## Error in knearneigh(xy, k = 3, RANN = FALSE): object 'xy' not found
k6 <- knn2nb(knearneigh(xy, k=6, RANN=FALSE))
## Error in knearneigh(xy, k = 6, RANN = FALSE): object 'xy' not found

Delauny:

library(deldir)
d <- deldir(xy[,1], xy[,2], suppressMsge=TRUE)
## Error in is.data.frame(x): object 'xy' not found

Lag-two Rook:

wr2 <- wr
## Error in eval(expr, envir, enclos): object 'wr' not found
for (i in 1:length(wr)) {
    lag1 <- wr[[i]]
    lag2 <- wr[lag1]
    lag2 <- sort(unique(unlist(lag2)))
    lag2 <- lag2[!(lag2 %in% c(wr[[i]], i))]
    wr2[[i]] <- lag2
}
## Error in eval(expr, envir, enclos): object 'wr' not found

And now we plot them all using the plotit function.

  plotit <- function(nb, lab='') {
  plot(pols, col='gray', border='white')
  plot(nb, xy, add=TRUE, pch=20)
  text(2659066, 6482808, paste0('(', lab, ')'), cex=1.25)
}

par(mfrow=c(4, 2), mai=c(0,0,0,0))
plotit(wr, 'i')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plotit(wq, 'ii')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plotit(wd1, 'iii')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plotit(wd25, 'iv')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plotit(k3, 'v')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plotit(k6, 'vi')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plot(pols, col='gray', border='white')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found
plot(d, wlines='triang', add=TRUE, pch=20)
## Error in plot(d, wlines = "triang", add = TRUE, pch = 20): object 'd' not found
text(2659066, 6482808, '(vii)', cex=1.25)
## Error in text.default(2659066, 6482808, "(vii)", cex = 1.25): plot.new has not been called yet
plotit(wr2, 'viii')
## Error in plot(pols, col = "gray", border = "white"): object 'pols' not found

7.5 Moran’s I

Below I compute Moran’s index according to formula 7.7 on page 205 of OSU.

\[I = \frac{n}{\sum_{i=1}^n (y_i - \bar{y})^2} \frac{\sum_{i=1}^n \sum_{j=1}^n w_{ij}(y_i - \bar{y})(y_j - \bar{y})}{\sum_{i=1}^n \sum_{j=1}^n w_{ij}}\]

The number of observations

n <- length(pols)
## Error in eval(expr, envir, enclos): object 'pols' not found

Values ‘y’ and ‘ybar’ (the mean of y).

y <- pols$TB
## Error in eval(expr, envir, enclos): object 'pols' not found
ybar <- mean(y)

Now we need

\[(y_i - \bar{y})(y_j - \bar{y})\]

That is, (yi-ybar)(yj-ybar) for all pairs. I show two methods to compute that.

Method 1:

dy <- y - ybar
g <- expand.grid(dy, dy)
yiyj <- g[,1] * g[,2]

Method 2:

yi <- rep(dy, each=n)
yj <- rep(dy)
yiyj <- yi * yj

Make a matrix of the multiplied pairs

pm <- matrix(yiyj, ncol=n)
round(pm[1:6, 1:9])
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,]    1    1    2    1    1   -1   -2   -2   -1
## [2,]    2    2    5    4    4   -2   -5   -3   -3
## [3,]    1    1    4    3   -1   -1   -4   -2   -2
## [4,]   -1   -2   -2   -1    1    1    2    1    1
## [5,]   -2   -5   -5   -4    2    2    6    3    1
## [6,]   -1   -3   -3   -2    1    3    3    2    0

And multiply this matrix with the weights to set to zero the value for the pairs that are not adjacent.

pmw <- pm * wm
## Error in eval(expr, envir, enclos): object 'wm' not found
wm[1:6, 1:9]
## Error in eval(expr, envir, enclos): object 'wm' not found
round(pmw[1:6, 1:9])
## Error in eval(expr, envir, enclos): object 'pmw' not found

We sum the values, to get this bit of Moran’s I:

\[\sum_{i=1}^n \sum_{j=1}^n w_{ij}(y_i - \bar{y})(y_j - \bar{y})\]
spmw <- sum(pmw)
## Error in eval(expr, envir, enclos): object 'pmw' not found
spmw
## Error in eval(expr, envir, enclos): object 'spmw' not found

The next step is to divide this value by the sum of weights. That is easy.

smw <- sum(wm)
## Error in eval(expr, envir, enclos): object 'wm' not found
sw  <- spmw / smw
## Error in eval(expr, envir, enclos): object 'spmw' not found

And compute the inverse variance of y

vr <- n / sum(dy^2)

The final step to compute Moran’s I

MI <- vr * sw
## Error in eval(expr, envir, enclos): object 'sw' not found
MI
## Error in eval(expr, envir, enclos): object 'MI' not found

This is how you can (theoretically) estimate the expected value of Moran’s I. That is, the value you would get in the absence of spatial autocorrelation. Note that it is not zero for small values of n.

EI <- -1/(n-1)
EI
## [1] -0.1111111

After doing this ‘by hand’, now let’s use the spdep package to compute Moran’s I and do a significance test. To do this we need to create a listw type spatial weights object. To get the same value as above we use style='B' to use binary (TRUE/FALSE) distance weights.

ww <- nb2listw(wr, style='B')
## Error in nb2listw(wr, style = "B"): object 'wr' not found
ww
## Error in eval(expr, envir, enclos): object 'ww' not found

On to the moran function. Have a look at ?moran. The function is defined as moran(y, ww, n, Szero(ww)). Note the odd arguments n and S0. I think they are odd, because ww has that information. Anyway, we supply them and it works. There probably are cases where it makes sense to use other values.

moran(pols$TB, ww, n=length(ww$neighbours), S0=Szero(ww))
## Error in moran(pols$TB, ww, n = length(ww$neighbours), S0 = Szero(ww)): object 'ww' not found

Note that the global sum ow weights

Szero(ww)
## Error in unlist(listw$weights): object 'ww' not found

Should be the same as

sum(pmw != 0)
## Error in eval(expr, envir, enclos): object 'pmw' not found

Now we can test for significance. First analytically, using linear regression based logic and assumptions.

moran.test(pols$TB, ww, randomisation=FALSE)
## Error in moran.test(pols$TB, ww, randomisation = FALSE): object 'ww' not found

And now using Monte Carlo simulation — which is the preferred method. In fact, the only good method to use.

moran.mc(pols$TB, ww, nsim=99)
## Error in moran.mc(pols$TB, ww, nsim = 99): object 'ww' not found

Question 2: How do you interpret these results (the significance tests)?

Question 3: What would a good value be for ``nsim``?

To make a Moran scatter plot we first get the neighbouring values for each value.

n <- length(pols)
## Error in eval(expr, envir, enclos): object 'pols' not found
ms <- cbind(id=rep(1:n, each=n), y=rep(y, each=n), value=as.vector(wm * y))
## Error in as.vector(wm * y): object 'wm' not found

Remove the zeros

ms <- ms[ms[,3] > 0, ]
## Error in eval(expr, envir, enclos): object 'ms' not found

And compute the average neighbour value

ams <- aggregate(ms[,2:3], list(ms[,1]), FUN=mean)
## Error in aggregate(ms[, 2:3], list(ms[, 1]), FUN = mean): object 'ms' not found
ams <- ams[,-1]
## Error in eval(expr, envir, enclos): object 'ams' not found
colnames(ams) <- c('y', 'spatially lagged y')
## Error in colnames(ams) <- c("y", "spatially lagged y"): object 'ams' not found
head(ams)
## Error in head(ams): object 'ams' not found

Finally, the plot.

plot(ams)
## Error in plot(ams): object 'ams' not found
reg <- lm(ams[,2] ~ ams[,1])
## Error in eval(predvars, data, env): object 'ams' not found
abline(reg, lwd=2)
## Error in abline(reg, lwd = 2): object 'reg' not found
abline(h=mean(ams[,2]), lt=2)
## Error in mean(ams[, 2]): object 'ams' not found
abline(v=ybar, lt=2)
## Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet

Note that the slope of the regression line:

coefficients(reg)[2]
## Error in coefficients(reg): object 'reg' not found

is almost the same as Moran’s I.

Here is a more direct approach to accomplish the same thing (but hopefully the above makes it clearer how this is actually computed). Note the row standardisation of the weights matrix:

rwm <- mat2listw(wm, style='W')
## Error in mat2listw(wm, style = "W"): object 'wm' not found
# Checking if rows add up to 1
mat <- listw2mat(rwm)
## Error in listw2mat(rwm): object 'rwm' not found
apply(mat, 1, sum)[1:15]
## Error in apply(mat, 1, sum): object 'mat' not found

Now we can plot

moran.plot(y, rwm)
## Error in moran.plot(y, rwm): object 'rwm' not found

Question 4: Show how to use the ‘geary’ function to compute Geary’s C

Question 5: Write your own Monte Carlo simulation test to compute p-values for Moran’sI, replicating the results we obtained with the function from spdep. Show a figure similar to Figure 7.9 in OSU.

Question 6: Write your own Geary C function, by completing the function below

gearyC <- ((n-1)/sum(( "----")\^2)) * sum(wm * (" --- ")\^2) / (2 * sum(wm))