examples.Rmd
The hierarchy of sf
objects goes
The sfg_
group of functions assume the input is a single geometry.
sfg_point( 1:2 )
# POINT (1 2)
sfg_point( 1:3 )
# POINT Z (1 2 3)
sfg_point( 1:4 )
# POINT ZM (1 2 3 4)
sfg_linestring( 1:4 )
# LINESTRING ZM (1 2 3 4)
df <- data.frame(
x = c(1,1,4,4,1)
, y = c(1,4,4,1,1)
)
sfg_linestring(df)
# LINESTRING (1 1, 1 4, 4 4, 4 1, 1 1)
sfg_polygon(df)
# POLYGON ((1 1, 1 4, 4 4, 4 1, 1 1))
The sfc_
group of functions let you specify an ‘id’ value to identify individual geometries. (Except for sfc_point()
, in this case every row is an individual geometry).
df <- data.frame(
id = c(1,1,2,2,3)
, x = c(1,1,4,4,1)
, y = c(1,4,4,1,1)
)
sfc_point( df )
# Geometry set for 5 features
# geometry type: POINT
# dimension: XYZ
# bbox: xmin: 1 ymin: 1 xmax: 3 ymax: 4
# z_range: zmin: 1 zmax: 4
# m_range: mmin: NA mmax: NA
# epsg (SRID): NA
# proj4string: NA
# POINT Z (1 1 1)
# POINT Z (1 1 4)
# POINT Z (2 4 4)
# POINT Z (2 4 1)
# POINT Z (3 1 1)
sfc_multipoint( df, multipoint_id = "id" )
# Geometry set for 3 features
# geometry type: MULTIPOINT
# dimension: XY
# bbox: xmin: 1 ymin: 1 xmax: 4 ymax: 4
# z_range: zmin: NA zmax: NA
# m_range: mmin: NA mmax: NA
# epsg (SRID): NA
# proj4string: NA
# MULTIPOINT (1 1, 1 4)
# MULTIPOINT (4 4, 4 1)
# MULTIPOINT (1 1)
sfc_linestring( df, linestring_id = "id" )
# Geometry set for 3 features
# geometry type: LINESTRING
# dimension: XY
# bbox: xmin: 1 ymin: 1 xmax: 4 ymax: 4
# z_range: zmin: NA zmax: NA
# m_range: mmin: NA mmax: NA
# epsg (SRID): NA
# proj4string: NA
# LINESTRING (1 1, 1 4)
# LINESTRING (4 4, 4 1)
# LINESTRING (1 1)
## leaving the 'id' field blank
sfc_polygon( df )
# Geometry set for 1 feature
# geometry type: POLYGON
# dimension: XYZ
# bbox: xmin: 1 ymin: 1 xmax: 3 ymax: 4
# z_range: zmin: 1 zmax: 4
# m_range: mmin: NA mmax: NA
# epsg (SRID): NA
# proj4string: NA
# POLYGON Z ((1 1 1, 1 1 4, 2 4 4, 2 4 1, 3 1 1, ...
The sf_
functions also let you specify an ‘id’ value, but in this case the id is retained and kept on the object
df <- data.frame(
id = c(1,1,2,2,3)
, x = c(1,1,4,4,1)
, y = c(1,4,4,1,1)
)
sf_point( df )
# Simple feature collection with 5 features and 0 fields
# geometry type: POINT
# dimension: XYZ
# bbox: xmin: 1 ymin: 1 xmax: 3 ymax: 4
# z_range: zmin: 1 zmax: 4
# m_range: mmin: NA mmax: NA
# epsg (SRID): NA
# proj4string: NA
# geometry
# 1 POINT Z (1 1 1)
# 2 POINT Z (1 1 4)
# 3 POINT Z (2 4 4)
# 4 POINT Z (2 4 1)
# 5 POINT Z (3 1 1)
sf_multipoint( df, multipoint_id = "id" )
# Simple feature collection with 3 features and 1 field
# geometry type: MULTIPOINT
# dimension: XY
# bbox: xmin: 1 ymin: 1 xmax: 4 ymax: 4
# z_range: zmin: NA zmax: NA
# m_range: mmin: NA mmax: NA
# epsg (SRID): NA
# proj4string: NA
# id geometry
# 1 1 MULTIPOINT (1 1, 1 4)
# 2 2 MULTIPOINT (4 4, 4 1)
# 3 3 MULTIPOINT (1 1)
sf_linestring( df, linestring_id = "id" )
# Simple feature collection with 3 features and 1 field
# geometry type: LINESTRING
# dimension: XY
# bbox: xmin: 1 ymin: 1 xmax: 4 ymax: 4
# z_range: zmin: NA zmax: NA
# m_range: mmin: NA mmax: NA
# epsg (SRID): NA
# proj4string: NA
# id geometry
# 1 1 LINESTRING (1 1, 1 4)
# 2 2 LINESTRING (4 4, 4 1)
# 3 3 LINESTRING (1 1)
## leaving the 'id' field blank
sf_polygon( df )
# Simple feature collection with 1 feature and 1 field
# geometry type: POLYGON
# dimension: XYZ
# bbox: xmin: 1 ymin: 1 xmax: 3 ymax: 4
# z_range: zmin: 1 zmax: 4
# m_range: mmin: NA mmax: NA
# epsg (SRID): NA
# proj4string: NA
# id geometry
# 1 1 POLYGON Z ((1 1 1, 1 1 4, 2...
In all these examples I haven’t needed to specify the geometry columns, because other than the id
field, all the other columns are used for the coordinates.
If your data.frame has other, non-geometry and non-id columns, you must specify at least the x
and y
paramters
df <- data.frame(
id = c(1,1,2,2,3)
, x = c(1,1,4,4,1)
, y = c(1,4,4,1,1)
)
df$val <- letters[ df$id ]
sf_point( df, x = "x", y = "y" )
# Simple feature collection with 5 features and 0 fields
# geometry type: POINT
# dimension: XY
# bbox: xmin: 1 ymin: 1 xmax: 4 ymax: 4
# z_range: zmin: NA zmax: NA
# m_range: mmin: NA mmax: NA
# epsg (SRID): NA
# proj4string: NA
# geometry
# 1 POINT (1 1)
# 2 POINT (1 4)
# 3 POINT (4 4)
# 4 POINT (4 1)
# 5 POINT (1 1)
sf_linestring( df, x = "x", y = "y", linestring_id = "id" )
# Simple feature collection with 3 features and 1 field
# geometry type: LINESTRING
# dimension: XY
# bbox: xmin: 1 ymin: 1 xmax: 4 ymax: 4
# z_range: zmin: NA zmax: NA
# m_range: mmin: NA mmax: NA
# epsg (SRID): NA
# proj4string: NA
# id geometry
# 1 1 LINESTRING (1 1, 1 4)
# 2 2 LINESTRING (4 4, 4 1)
# 3 3 LINESTRING (1 1)
If you want to keep all the other columns, set keep = TRUE
sf_linestring( df, x = "x", y = "y", linestring_id = "id", keep = TRUE )
# Simple feature collection with 3 features and 2 fields
# geometry type: LINESTRING
# dimension: XY
# bbox: xmin: 1 ymin: 1 xmax: 4 ymax: 4
# z_range: zmin: NA zmax: NA
# m_range: mmin: NA mmax: NA
# epsg (SRID): NA
# proj4string: NA
# id val geometry
# 1 1 a LINESTRING (1 1, 1 4)
# 2 2 b LINESTRING (4 4, 4 1)
# 3 3 c LINESTRING (1 1)
As of version 1.0 you can now convert from sfg
, sfc
and sf
objects to data.frames
df <- data.frame(
id = c(1,1,2,2,3)
, x = c(1,1,4,4,1)
, y = c(1,4,4,1,1)
)
df$val <- letters[ df$id ]
sf <- sf_linestring( df, x = "x", y = "y", linestring_id = "id", keep = TRUE )
sf_to_df( sf )
# sfg_id linestring_id x y
# 1 1 1 1 1
# 2 1 1 1 4
# 3 2 2 4 4
# 4 2 2 4 1
# 5 3 3 1 1
And if you want to keep all the other columns and fill them down each row of the data.frame, set fill = TRUE
sf_to_df( sf, fill = TRUE )
# id val sfg_id linestring_id x y
# 1 1 a 1 1 1 1
# 2 1 a 1 1 1 4
# 3 2 b 2 2 4 4
# 4 2 b 2 2 4 1
# 5 3 c 3 3 1 1
Here’s a quick benchmark showing how well this library performs
n <- 1e5
df <- data.frame(
id = rep(1:(n/5), each = 5)
, x = rnorm(n)
, y = rnorm(n)
)
library(data.table)
library(microbenchmark)
dt <- as.data.table( df )
microbenchmark(
dt = {
sf <- dt[
, {
geometry <- sf::st_linestring( x = matrix( c( x, y ), ncol = 2, byrow = T ))
geometry <- sf::st_sf( geometry = sf::st_sfc( geometry ) )
}
, by = id
]
sf <- sf::st_as_sf( sf )
},
sfheaders = {
sfh <- sfheaders::sf_linestring(
obj = df
, linestring_id = "id"
)
},
times = 5
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# dt 6599.67479 6654.12357 6779.23543 6750.19807 6833.46262 7058.71809 5
# sfheaders 21.07775 21.30438 23.20592 23.21665 25.06429 25.36654 5