Vector data manipulation

This chapter illustrates some ways in which we can manipulate vector data. We start with an example SpatVector that we read from a shapefile.

library(terra)
f <- system.file("ex/lux.shp", package="terra")
p <- vect(f)
p
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 12, 5  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : +proj=longlat +datum=WGS84 +no_defs
##  names       :  ID_1   NAME_1  ID_2   NAME_2  AREA
##  type        : <num>    <chr> <num>    <chr> <num>
##  values      :     1 Diekirch     1 Clervaux   312
##                    1 Diekirch     2 Diekirch   218
##                    1 Diekirch     3  Redange   259

We can plot these data in many ways. For example:

plot(p, "NAME_2")

image0

Basics

Geometry and attributes

To extract the attributes (data.frame) from a SpatVector, use:

d <- as.data.frame(p)
head(d)
##   ID_1       NAME_1 ID_2     NAME_2 AREA
## 1    1     Diekirch    1   Clervaux  312
## 2    1     Diekirch    2   Diekirch  218
## 3    1     Diekirch    3    Redange  259
## 4    1     Diekirch    4    Vianden   76
## 5    1     Diekirch    5      Wiltz  263
## 6    2 Grevenmacher    6 Echternach  188

You can also extract the geometry as a a matrix (this is rarely needed).

g <- geom(p)
head(g)
##      geom part        x        y hole
## [1,]    1    1 6.026519 50.17767    0
## [2,]    1    1 6.031361 50.16563    0
## [3,]    1    1 6.035646 50.16410    0
## [4,]    1    1 6.042747 50.16157    0
## [5,]    1    1 6.043894 50.16116    0
## [6,]    1    1 6.048243 50.16008    0

Or as “well-known-text”

g <- geom(p, wkt=TRUE)
substr(g, 1, 50)
##  [1] "POLYGON ((6.026519 50.17767, 6.031361 50.165627, 6"
##  [2] "POLYGON ((6.178368 49.876823, 6.185479 49.870525, "
##  [3] "POLYGON ((5.881378 49.870148, 5.881672 49.868866, "
##  [4] "POLYGON ((6.131309 49.972565, 6.134291 49.972382, "
##  [5] "POLYGON ((5.977929 50.026016, 5.982312 50.022949, "
##  [6] "POLYGON ((6.385532 49.837029, 6.3886 49.833683, 6."
##  [7] "POLYGON ((6.316665 49.623375, 6.31835 49.623157, 6"
##  [8] "POLYGON ((6.425158 49.731644, 6.42657 49.73082, 6."
##  [9] "POLYGON ((5.998312 49.699924, 5.998632 49.698559, "
## [10] "POLYGON ((6.039474 49.448261, 6.036906 49.448696, "
## [11] "POLYGON ((6.155963 49.685047, 6.159284 49.685036, "
## [12] "POLYGON ((6.067982 49.828465, 6.071922 49.825478, "

Variables

You can extract a variable as you would do with a data.frame.

p$NAME_2
##  [1] "Clervaux"         "Diekirch"         "Redange"          "Vianden"
##  [5] "Wiltz"            "Echternach"       "Remich"           "Grevenmacher"
##  [9] "Capellen"         "Esch-sur-Alzette" "Luxembourg"       "Mersch"

To sub-set a SpatVector to one or more variables you can use the notation below. Note how this is different from the above example. Above a vector of values is returned. With the approach below you get a new SpatVector with only one variable.

p[, "NAME_2"]
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 12, 1  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : +proj=longlat +datum=WGS84 +no_defs
##  names       :   NAME_2
##  type        :    <chr>
##  values      : Clervaux
##                Diekirch
##                 Redange

You can add a new variable to a SpatVector just as if it were a data.frame.

set.seed(0)
p$lets <- sample(letters, nrow(p))
p
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 12, 6  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : +proj=longlat +datum=WGS84 +no_defs
##  names       :  ID_1   NAME_1  ID_2   NAME_2  AREA  lets
##  type        : <num>    <chr> <num>    <chr> <num> <chr>
##  values      :     1 Diekirch     1 Clervaux   312     n
##                    1 Diekirch     2 Diekirch   218     y
##                    1 Diekirch     3  Redange   259     d

Note that to get the number of geometries of SpatVector p, you can use nrow(p), or size(p). You can also do perim(p) to get the “length” of the spatial objects (zero for points, the length of the lines, or the perimeter of the polygons).

perim(p)
##  [1] 117100.12  93477.28  84502.45  44919.14  85032.61  74708.05  57991.42
##  [8]  81203.93  74443.82  95564.74  80618.76  70810.67

Assigning a new value to an existing variable.

p$lets <- sample(LETTERS, nrow(p))
head(p)
##   ID_1       NAME_1 ID_2     NAME_2 AREA lets
## 1    1     Diekirch    1   Clervaux  312    J
## 2    1     Diekirch    2   Diekirch  218    V
## 3    1     Diekirch    3    Redange  259    N
## 4    1     Diekirch    4    Vianden   76    Z
## 5    1     Diekirch    5      Wiltz  263    G
## 6    2 Grevenmacher    6 Echternach  188    I

To get rid of a variable, set it to NULL.

p$lets <- NULL

Merge

You can assign an attributes table (data.frame) to a SpatVector with values<-. To add attributes to a SpatVector that already has attributes use merge (or cbind if you know the order of the records is the same).

dfr <- data.frame(District=p$NAME_1, Canton=p$NAME_2, Value=round(runif(length(p), 100, 1000)))
dfr <- dfr[order(dfr$Canton), ]
pm <- merge(p, dfr, by.x=c('NAME_1', 'NAME_2'), by.y=c('District', 'Canton'))
pm
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 50, 6  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : +proj=longlat +datum=WGS84 +no_defs
##  names       :   NAME_1   NAME_2  ID_1  ID_2  AREA Value
##  type        :    <chr>    <chr> <num> <num> <num> <num>
##  values      : Diekirch Diekirch     1     1   312   406
##                Diekirch Diekirch     1     1   312   534
##                Diekirch Diekirch     1     1   312   640
head(pm)
##     NAME_1   NAME_2 ID_1 ID_2 AREA Value
## 1 Diekirch Diekirch    1    1  312   406
## 2 Diekirch Diekirch    1    1  312   534
## 3 Diekirch Diekirch    1    1  312   640
## 4 Diekirch Diekirch    1    1  312   544
## 5 Diekirch Diekirch    1    1  312   268
## 6 Diekirch Diekirch    1    2  218   406

Note the new variable Value added to pm

Records

Selecting rows (records).

i <- which(p$NAME_1 == 'Grevenmacher')
g <- p[i,]
g
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 3, 5  (geometries, attributes)
##  extent      : 6.169137, 6.528252, 49.46498, 49.85403  (xmin, xmax, ymin, ymax)
##  coord. ref. : +proj=longlat +datum=WGS84 +no_defs
##  names       :  ID_1       NAME_1  ID_2       NAME_2  AREA
##  type        : <num>        <chr> <num>        <chr> <num>
##  values      :     2 Grevenmacher     6 Grevenmacher   188
##                    2 Grevenmacher     7 Grevenmacher   129
##                    2 Grevenmacher    12 Grevenmacher   210

It is also possible to interactively select and query records by clicking on a plotted dataset. That is difficult to show here. See ?select for interactively selecting geometries and ?click to identify attributes by clicking on a plot (map).

Append and aggregate

Append

More example data. Object z consists of four polygons; z2 is one of these four polygons.

z <- rast(p)
dim(z) <- c(2,2)
values(z) <- 1:4
names(z) <- 'Zone'
# coerce SpatRaster to SpatVector polygons
z <- as.polygons(z)
z
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 4, 1  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : +proj=longlat +datum=WGS84 +no_defs
##  names       :  Zone
##  type        : <num>
##  values      :     1
##                    2
##                    3
z2 <- z[2,]
plot(p)
plot(z, add=TRUE, border='blue', lwd=5)
plot(z2, add=TRUE, border='red', lwd=2, col='red')

image1

To append SpatVector objects of the same (vector) type you can use c

b <- rbind(p, z)
# with older versions
# b <- c(p, z)
head(b)
##   ID_1       NAME_1 ID_2       NAME_2 AREA Zone
## 1    1     Diekirch    1     Diekirch  312   NA
## 2    1     Diekirch    2     Diekirch  218   NA
## 3    1     Diekirch    3     Diekirch  259   NA
## 4    1     Diekirch    4     Diekirch   76   NA
## 5    1     Diekirch    5     Diekirch  263   NA
## 6    2 Grevenmacher    6 Grevenmacher  188   NA
tail(b)
##    ID_1     NAME_1 ID_2     NAME_2 AREA Zone
## 11    3 Luxembourg   10 Luxembourg  237   NA
## 12    3 Luxembourg   11 Luxembourg  233   NA
## 13  NaN       <NA>  NaN       <NA>  NaN    1
## 14  NaN       <NA>  NaN       <NA>  NaN    2
## 15  NaN       <NA>  NaN       <NA>  NaN    3
## 16  NaN       <NA>  NaN       <NA>  NaN    4

Note how rbind (c for older versions of terra) allows you to append SpatVect objects with different attribute names, unlike the standard rbind for data.frames.

Aggregate

It is common to aggregate (“dissolve”) polygons that have the same value for an attribute of interest. In this case, if we do not care about the second level subdivisions of Luxembourg, we could aggregate by the first level subdivisions.

pa <- aggregate(p, by='NAME_1')
za <- aggregate(z)
plot(za, col='light gray', border='light gray', lwd=5)
plot(pa, add=TRUE, col=rainbow(3), lwd=3, border='white')

image2

It is also possible to aggregate polygons without dissolving the borders.

zag <- aggregate(z, dissolve=FALSE)
zag
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 1, 0  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : +proj=longlat +datum=WGS84 +no_defs
plot(zag, col="light gray")

image3

This is a structure that is similar to what you may get for an archipelago: multiple polygons represented as one entity (one row). Use disaggregate to split these up into their parts.

zd <- disaggregate(zag)
zd
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 4, 0  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : +proj=longlat +datum=WGS84 +no_defs

Overlay

There are many different ways to “overlay” vector data. Here are some examples:

Erase

Erase a part of a SpatVector

e <- erase(p, z2)
plot(e)

image4

Intersect

Intersect SpatVectors

i <- intersect(p, z2)
plot(i)

image5

You can also intersect or crop with a SpatExtent (rectangle). The difference between intersect and crop is that with crop the geometry of the second argument is not added to the output.

e <- ext(6, 6.4, 49.7, 50)
pe <- crop(p, e)
plot(p)
plot(e, add=TRUE, lwd=3, col="red")
plot(pe, col='light blue', add=TRUE)
plot(e, add=TRUE, lwd=3, border="blue")

image6

Union

Get the union of two SpatVectors.

u <- union(p, z)
u
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 28, 7  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : +proj=longlat +datum=WGS84 +no_defs
##  names       :  ID_1   NAME_1  ID_2   NAME_2  AREA  Zone aggregate_by_variable
##  type        : <num>    <chr> <num>    <chr> <num> <num>                 <num>
##  values      :     1 Diekirch     1 Diekirch   312     1                     1
##                    1 Diekirch     1 Diekirch   312     2                     1
##                    1 Diekirch     2 Diekirch   218     1                     1

Note that there are many more polygons now. One for each unique combination of polygons (and attributes in this case).

set.seed(5)
plot(u, col=sample(rainbow(length(u))))

image7

Cover

cover is a combination of intersect and union. intersect returns new (intersected) geometries with the attributes of both input datasets. union appends the geometries and attributes of the input. cover returns the intersection and appends the other geometries and attributes of both datasets.

cov <- cover(p, z[c(1,4),])
cov
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 11, 7  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : +proj=longlat +datum=WGS84 +no_defs
##  names       :  ID_1   NAME_1  ID_2   NAME_2  AREA  Zone aggregate_by_variable
##  type        : <num>    <chr> <num>    <chr> <num> <num>                 <num>
##  values      :     1 Diekirch     1 Diekirch   312    NA                   NaN
##                    1 Diekirch     2 Diekirch   218    NA                   NaN
##                    1 Diekirch     3 Diekirch   259    NA                   NaN
plot(cov)

image8

Difference

The symmetrical difference of two SpatVectors

dif <- symdif(z,p)
plot(dif, col=rainbow(length(dif)))

image9

dif
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 4, 2  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : +proj=longlat +datum=WGS84 +no_defs
##  names       :  Zone aggregate_by_variable
##  type        : <num>                 <num>
##  values      :     1                     1
##                    2                     1
##                    3                     1

Spatial queries

We can query polygons with points (“point-in-polygon query”).

pts <- matrix(c(6, 6.1, 5.9, 5.7, 6.4, 50, 49.9, 49.8, 49.7, 49.5), ncol=2)
spts <- vect(pts, proj4string=crs(p))
plot(z, col='light blue', lwd=2)
points(spts, col='light gray', pch=20, cex=6)
text(spts, 1:nrow(pts), col='red', font=2, cex=1.5)
lines(p, col='blue', lwd=2)

image10

extract is used for queries between SpatVector and SpatRaster objects, and also for queries between SpatVectors.

extract(spts, p)
##      id.x id.y
## [1,]    1   NA
extract(spts, z)
##      id.x id.y
## [1,]    1   NA