Vector data manipulation

This chapter illustrates some ways in which we can manipulate vector data. We start with an example SpatVector that we read from a shapefile.

library(terra)
## terra 1.8.9
f <- system.file("ex/lux.shp", package="terra")
p <- vect(f)
p
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 12, 6  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  source      : lux.shp
##  coord. ref. : lon/lat WGS 84 (EPSG:4326)
##  names       :  ID_1   NAME_1  ID_2   NAME_2  AREA       POP
##  type        : <num>    <chr> <num>    <chr> <num>     <num>
##  values      :     1 Diekirch     1 Clervaux   312 1.808e+04
##                    1 Diekirch     2 Diekirch   218 3.254e+04
##                    1 Diekirch     3  Redange   259 1.866e+04

We can plot these data in many ways. For example:

plot(p, "NAME_2")

image1

Basics

Geometry and attributes

To extract the attributes (data.frame) from a SpatVector, use:

d <- as.data.frame(p)
head(d)
##   ID_1       NAME_1 ID_2     NAME_2 AREA   POP
## 1    1     Diekirch    1   Clervaux  312 18081
## 2    1     Diekirch    2   Diekirch  218 32543
## 3    1     Diekirch    3    Redange  259 18664
## 4    1     Diekirch    4    Vianden   76  5163
## 5    1     Diekirch    5      Wiltz  263 16735
## 6    2 Grevenmacher    6 Echternach  188 18899

You can also extract the geometry as a a matrix (this is rarely needed).

g <- geom(p)
head(g)
##      geom part        x        y hole
## [1,]    1    1 6.026519 50.17767    0
## [2,]    1    1 6.031361 50.16563    0
## [3,]    1    1 6.035646 50.16410    0
## [4,]    1    1 6.042747 50.16157    0
## [5,]    1    1 6.043894 50.16116    0
## [6,]    1    1 6.048243 50.16008    0

Or as “well-known-text”

g <- geom(p, wkt=TRUE)
substr(g, 1, 50)
##  [1] "POLYGON ((6.026519 50.17767, 6.031361 50.165627, 6"
##  [2] "POLYGON ((6.178368 49.876823, 6.185479 49.870525, "
##  [3] "POLYGON ((5.881378 49.870148, 5.881672 49.868866, "
##  [4] "POLYGON ((6.131309 49.972565, 6.134291 49.972382, "
##  [5] "POLYGON ((5.977929 50.026016, 5.982312 50.022949, "
##  [6] "POLYGON ((6.385532 49.837029, 6.3886 49.833683, 6."
##  [7] "POLYGON ((6.316665 49.623375, 6.31835 49.623157, 6"
##  [8] "POLYGON ((6.425158 49.731644, 6.42657 49.73082, 6."
##  [9] "POLYGON ((5.998312 49.699924, 5.998632 49.698559, "
## [10] "POLYGON ((6.039474 49.448261, 6.036906 49.448696, "
## [11] "POLYGON ((6.155963 49.685047, 6.159284 49.685036, "
## [12] "POLYGON ((6.067982 49.828465, 6.071922 49.825478, "

Variables

You can extract a variable as you would do with a data.frame.

p$NAME_2
##  [1] "Clervaux"         "Diekirch"         "Redange"          "Vianden"
##  [5] "Wiltz"            "Echternach"       "Remich"           "Grevenmacher"
##  [9] "Capellen"         "Esch-sur-Alzette" "Luxembourg"       "Mersch"

To sub-set a SpatVector to one or more variables you can use the notation below. Note how this is different from the above example. Above a vector of values is returned. With the approach below you get a new SpatVector with only one variable.

p[, "NAME_2"]
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 12, 1  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  source      : lux.shp
##  coord. ref. : lon/lat WGS 84 (EPSG:4326)
##  names       :   NAME_2
##  type        :    <chr>
##  values      : Clervaux
##                Diekirch
##                 Redange

You can add a new variable to a SpatVector just as if it were a data.frame.

set.seed(0)
p$lets <- sample(letters, nrow(p))
p
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 12, 7  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  source      : lux.shp
##  coord. ref. : lon/lat WGS 84 (EPSG:4326)
##  names       :  ID_1   NAME_1  ID_2   NAME_2  AREA       POP  lets
##  type        : <num>    <chr> <num>    <chr> <num>     <num> <chr>
##  values      :     1 Diekirch     1 Clervaux   312 1.808e+04     n
##                    1 Diekirch     2 Diekirch   218 3.254e+04     y
##                    1 Diekirch     3  Redange   259 1.866e+04     d

Note that to get the number of geometries of SpatVector p, you can use nrow(p), or size(p). You can also do perim(p) to get the “length” of the spatial objects (zero for points, the length of the lines, or the perimeter of the polygons).

perim(p)
##  [1] 117100.12  93477.28  84502.45  44919.14  85032.61  74708.05  57991.42
##  [8]  81203.93  74443.82  95564.74  80618.76  70810.67

Assigning a new value to an existing variable.

p$lets <- sample(LETTERS, nrow(p))
head(p)
##   ID_1       NAME_1 ID_2     NAME_2 AREA   POP lets
## 1    1     Diekirch    1   Clervaux  312 18081    J
## 2    1     Diekirch    2   Diekirch  218 32543    V
## 3    1     Diekirch    3    Redange  259 18664    N
## 4    1     Diekirch    4    Vianden   76  5163    Z
## 5    1     Diekirch    5      Wiltz  263 16735    G
## 6    2 Grevenmacher    6 Echternach  188 18899    I

To get rid of a variable, set it to NULL.

p$lets <- NULL

Merge

You can assign an attributes table (data.frame) to a SpatVector with values<-. To add attributes to a SpatVector that already has attributes use merge (or cbind if you know the order of the records is the same).

dfr <- data.frame(District=p$NAME_1, Canton=p$NAME_2, Value=round(runif(length(p), 100, 1000)))
dfr <- dfr[order(dfr$Canton), ]
pm <- merge(p, dfr, by.x=c('NAME_1', 'NAME_2'), by.y=c('District', 'Canton'))
pm
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 12, 7  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : lon/lat WGS 84 (EPSG:4326)
##  names       :   NAME_1   NAME_2  ID_1  ID_2  AREA       POP Value
##  type        :    <chr>    <chr> <num> <num> <num>     <num> <num>
##  values      : Diekirch Clervaux     1     1   312 1.808e+04   406
##                Diekirch Diekirch     1     2   218 3.254e+04   534
##                Diekirch  Redange     1     3   259 1.866e+04   640
head(pm)
##         NAME_1     NAME_2 ID_1 ID_2 AREA   POP Value
## 1     Diekirch   Clervaux    1    1  312 18081   406
## 2     Diekirch   Diekirch    1    2  218 32543   534
## 3     Diekirch    Redange    1    3  259 18664   640
## 4     Diekirch    Vianden    1    4   76  5163   544
## 5     Diekirch      Wiltz    1    5  263 16735   268
## 6 Grevenmacher Echternach    2    6  188 18899   845

Note the new variable Value added to pm

Records

Selecting rows (records).

i <- which(p$NAME_1 == 'Grevenmacher')
g <- p[i,]
g
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 3, 6  (geometries, attributes)
##  extent      : 6.169137, 6.528252, 49.46498, 49.85403  (xmin, xmax, ymin, ymax)
##  coord. ref. : lon/lat WGS 84 (EPSG:4326)
##  names       :  ID_1       NAME_1  ID_2       NAME_2  AREA       POP
##  type        : <num>        <chr> <num>        <chr> <num>     <num>
##  values      :     2 Grevenmacher     6   Echternach   188  1.89e+04
##                    2 Grevenmacher     7       Remich   129 2.237e+04
##                    2 Grevenmacher    12 Grevenmacher   210 2.983e+04

It is also possible to interactively select and query records by clicking on a plotted dataset. That is difficult to show here. See ?sel for interactively selecting geometries and ?click to identify attributes by clicking on a plot (map).

Append and aggregate

Append

More example data. Object z consists of four polygons; z2 is one of these four polygons.

z <- rast(p)
dim(z) <- c(2,2)
values(z) <- 1:4
names(z) <- 'Zone'
# coerce SpatRaster to SpatVector polygons
z <- as.polygons(z)
z
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 4, 1  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : lon/lat WGS 84 (EPSG:4326)
##  names       :  Zone
##  type        : <int>
##  values      :     1
##                    2
##                    3
z2 <- z[2,]
plot(p)
plot(z, add=TRUE, border='blue', lwd=5)
plot(z2, add=TRUE, border='red', lwd=2, col='red')

image2

To append SpatVector objects of the same (vector) type you can use rbind

b <- rbind(p, z)
head(b)
##   ID_1       NAME_1 ID_2     NAME_2 AREA   POP Zone
## 1    1     Diekirch    1   Clervaux  312 18081   NA
## 2    1     Diekirch    2   Diekirch  218 32543   NA
## 3    1     Diekirch    3    Redange  259 18664   NA
## 4    1     Diekirch    4    Vianden   76  5163   NA
## 5    1     Diekirch    5      Wiltz  263 16735   NA
## 6    2 Grevenmacher    6 Echternach  188 18899   NA
tail(b)
##    ID_1     NAME_1 ID_2     NAME_2 AREA    POP Zone
## 11    3 Luxembourg   10 Luxembourg  237 182607   NA
## 12    3 Luxembourg   11     Mersch  233  32112   NA
## 13  NaN       <NA>  NaN       <NA>  NaN    NaN    1
## 14  NaN       <NA>  NaN       <NA>  NaN    NaN    2
## 15  NaN       <NA>  NaN       <NA>  NaN    NaN    3
## 16  NaN       <NA>  NaN       <NA>  NaN    NaN    4

Note how rbind allows you to append SpatVector objects with different attribute names, unlike the standard rbind for data.frames.

Aggregate

It is common to aggregate (“dissolve”) polygons that have the same value for an attribute of interest. In this case, if we do not care about the second level subdivisions of Luxembourg, we could aggregate by the first level subdivisions.

pa <- aggregate(p, by='NAME_1')
za <- aggregate(z)
plot(za, col='light gray', border='light gray', lwd=5)
plot(pa, add=TRUE, col=rainbow(3), lwd=3, border='white')

image3

It is also possible to aggregate polygons without dissolving the borders.

zag <- aggregate(z, dissolve=FALSE)
zag
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 1, 0  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : lon/lat WGS 84 (EPSG:4326)
plot(zag, col="light gray")

image4

This is a structure that is similar to what you may get for an archipelago: multiple polygons represented as one entity (one row). Use disagg to split these up into their parts.

zd <- disagg(zag)
zd
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 4, 0  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : lon/lat WGS 84 (EPSG:4326)

Overlay

There are many different ways to “overlay” vector data. Here are some examples:

Erase

Erase a part of a SpatVector

e <- erase(p, z2)
plot(e)

image5

Intersect

Intersect SpatVectors

i <- intersect(p, z2)
plot(i)

image6

You can also intersect or crop with a SpatExtent (rectangle). The difference between intersect and crop is that with crop the geometry of the second argument is not added to the output.

e <- ext(6, 6.4, 49.7, 50)
pe <- crop(p, e)
plot(p)
plot(e, add=TRUE, lwd=3, col="red")
plot(pe, col='light blue', add=TRUE)
plot(e, add=TRUE, lwd=3, border="blue")

image7

Union

Get the union of two SpatVectors.

u <- union(p, z)
u
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 28, 7  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : lon/lat WGS 84 (EPSG:4326)
##  names       :  Zone  ID_1 NAME_1  ID_2 NAME_2  AREA   POP
##  type        : <int> <num>  <chr> <num>  <chr> <num> <num>
##  values      :     1   NaN     NA   NaN     NA   NaN   NaN
##                    2   NaN     NA   NaN     NA   NaN   NaN
##                    3   NaN     NA   NaN     NA   NaN   NaN

Note that there are many more polygons now. One for each unique combination of polygons (and attributes in this case).

set.seed(5)
plot(u, col=sample(rainbow(length(u))))

image8

Cover

cover is a combination of intersect and union. intersect returns new (intersected) geometries with the attributes of both input datasets. union appends the geometries and attributes of the input. cover returns the intersection and appends the other geometries and attributes of both datasets.

cov <- cover(p, z[c(1,4),])
cov
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 11, 7  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : lon/lat WGS 84 (EPSG:4326)
##  names       :  ID_1   NAME_1  ID_2   NAME_2  AREA       POP  Zone
##  type        : <num>    <chr> <num>    <chr> <num>     <num> <int>
##  values      :     1 Diekirch     1 Clervaux   312 1.808e+04    NA
##                    1 Diekirch     2 Diekirch   218 3.254e+04    NA
##                    1 Diekirch     3  Redange   259 1.866e+04    NA
plot(cov)

image9

Difference

The symmetrical difference of two SpatVectors

dif <- symdif(z,p)
plot(dif, col=rainbow(length(dif)))

image10

dif
##  class       : SpatVector
##  geometry    : polygons
##  dimensions  : 4, 1  (geometries, attributes)
##  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
##  coord. ref. : lon/lat WGS 84 (EPSG:4326)
##  names       :  Zone
##  type        : <int>
##  values      :     1
##                    2
##                    3

Spatial queries

We can query polygons with points (“point-in-polygon query”).

pts <- matrix(c(6, 6.1, 5.9, 5.7, 6.4, 50, 49.9, 49.8, 49.7, 49.5), ncol=2)
spts <- vect(pts, crs=crs(p))
plot(z, col='light blue', lwd=2)
points(spts, col='light gray', pch=20, cex=6)
text(spts, 1:nrow(pts), col='red', font=2, cex=1.5)
lines(p, col='blue', lwd=2)

image11

extract is used for queries between SpatVector and SpatRaster objects, and also for queries between SpatVectors.

extract(spts, p)
##       id.y id.x
##  [1,]    1  NaN
##  [2,]    2  NaN
##  [3,]    3  NaN
##  [4,]    4  NaN
##  [5,]    5  NaN
##  [6,]    6  NaN
##  [7,]    7  NaN
##  [8,]    8  NaN
##  [9,]    9  NaN
## [10,]   10  NaN
## [11,]   11  NaN
## [12,]   12  NaN
extract(spts, z)
##      id.y id.x
## [1,]    1  NaN
## [2,]    2  NaN
## [3,]    3  NaN
## [4,]    4  NaN