Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement osmdata_data_frame supporting out meta; and adiff queries #285

Merged
merged 18 commits into from
Dec 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export(osm_points)
export(osm_poly2line)
export(osm_polygons)
export(osmdata)
export(osmdata_data_frame)
export(osmdata_sc)
export(osmdata_sf)
export(osmdata_sp)
Expand Down
303 changes: 296 additions & 7 deletions R/get-osmdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,6 @@ fill_overpass_data <- function (obj, doc, quiet = TRUE, encoding = "UTF-8") {
doc <- xml2::read_xml (doc)
}
obj <- get_metadata (obj, doc)
doc <- as.character (doc)
}

list (obj = obj, doc = doc)
Expand Down Expand Up @@ -223,6 +222,18 @@ get_metadata <- function (obj, doc) {
meta$datetime_to <- x [2]
meta$query_type <- "date"

} else if (grepl ("adiff", x [1])) {

if (length (x) < 2) {
stop ("unrecongised query format")
}
meta$datetime_from <- x [2]
meta$datetime_to <- x [4]
if (!is_datetime (meta$datetime_to)) { # adiff opq without datetime2
meta$datetime_to <- xml2::xml_text (xml2::xml_find_all (doc, "//meta/@osm_base"))
}
meta$query_type <- "adiff"

} else if (grepl ("diff", x [1])) {

if (length (x) < 4) {
Expand All @@ -233,19 +244,55 @@ get_metadata <- function (obj, doc) {
meta$query_type <- "diff"
}

} else {
} else if (inherits (q, "overpass_query")) {

if (!is.null (attr (q, "datetime2"))) {

meta$datetime_to <- attr (q, "datetime2")
meta$datetime_from <- attr (q, "datetime")
meta$query_type <- "diff"
meta$datetime_to <- attr (q, "datetime2")

if (grepl ("adiff", q$prefix) ||
"action" %in% xml2::xml_name (xml2::xml_children (doc))) {
meta$query_type <- "adiff"
} else {
meta$query_type <- "diff"
}

} else if (!is.null (attr (q, "datetime"))) {

meta$datetime_to <- attr (q, "datetime")
meta$query_type <- "date"
if (grepl ("adiff", q$prefix) ||
"action" %in% xml2::xml_name (xml2::xml_children (doc))) {
meta$datetime_from <- attr (q, "datetime")
meta$datetime_to <- xml2::xml_text (xml2::xml_find_all (doc, "//meta/@osm_base"))
meta$query_type <- "adiff"
} else {
meta$datetime_to <- attr (q, "datetime")
meta$query_type <- "date"
}

}

} else { # is.null (q)

if ("action" %in% xml2::xml_name (xml2::xml_children (doc))) {
osm_actions <- xml2::xml_find_all (doc, ".//action")
action_type <- xml2::xml_attr (osm_actions, attr = "type")
# Adiff have <new> for deleted objects, but diff have not.
if (length(sel_del <- which (action_type %in% "delete")) > 0) {
if ("new" %in% xml2::xml_name (xml2::xml_children (osm_actions[sel_del[1]]))) {
meta$query_type <- "adiff"
} else {
meta$query_type <- "diff"
}
} else {
meta$query_type <- "diff"
warning ("OSM data is ambiguous and can correspond either to a diff ",
"or an adiff query. As \"q\" parameter is missing, it's ",
"not possible to distinguish.\n\tAssuming diff.")
}

}

}

obj$meta <- meta
Expand All @@ -256,6 +303,12 @@ get_metadata <- function (obj, doc) {
return (obj)
}

is_datetime <- function (x) {
jmaspons marked this conversation as resolved.
Show resolved Hide resolved

ptn <- "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[A-Z]$"
mpadge marked this conversation as resolved.
Show resolved Hide resolved
grepl (ptn, x)
}

#' Make an 'sf' object from an 'sfc' list and associated data matrix returned
#' from 'rcpp_osmdata_sf'
#'
Expand Down Expand Up @@ -478,7 +531,7 @@ osmdata_sc <- function (q, doc, quiet = TRUE) {
message ("converting OSM data to sc format")
}

res <- rcpp_osmdata_sc (paste0 (temp$doc))
res <- rcpp_osmdata_sc (paste0 (doc))

if (nrow (res$object_link_edge) > 0L) {
res$object_link_edge$native_ <- TRUE
Expand Down Expand Up @@ -526,3 +579,239 @@ getbb_sc <- function (x) {
apply (x$vertex [, 1:2], 2, range) %>%
bbox_to_string ()
}

#' Return an OSM Overpass query as a \link{data.frame} object.
#'
# TODO: recommend queries with `out tags;` when implemented.
#'
#' @inheritParams osmdata_sp
#' @param q An object of class `overpass_query` constructed with
#' \link{opq} and \link{add_osm_feature}. May be be omitted,
#' in which case the attributes of the \link{data.frame} will not include
#' the query.
#' @param stringsAsFactors Should character strings in the 'data.frame' be
#' coerced to factors?
#' @return A `data.frame` with id, type and tags of the the objects from the query.
#'
#' @family extract
#' @export
#'
#' @examples
#' \dontrun{
#' hampi_df <- opq ("hampi india") %>%
#' add_osm_feature (key = "historic", value = "ruins") %>%
#' osmdata_data_frame ()
#' attr (hampi_df, "bbox")
#' attr (hampi_df, "overpass_call")
#' attr (hampi_df, "meta")
#' }
osmdata_data_frame <- function (q, doc, quiet = TRUE, stringsAsFactors = FALSE) {

obj <- osmdata () # uses class def

if (missing (q)) {
if (missing (doc)) {
stop ('arguments "q" and "doc" are missing, with no default. ',
"At least one must be provided.")
}
if (!quiet) {
message ("q missing: osmdata object will not include query")
}
} else if (is (q, "overpass_query")) {
obj$bbox <- q$bbox
obj$overpass_call <- opq_string_intern (q, quiet = quiet)
} else if (is.character (q)) {
obj$overpass_call <- q
} else {
stop ("q must be an overpass query or a character string")
}

temp <- fill_overpass_data (obj, doc, quiet = quiet)
obj <- temp$obj
doc <- temp$doc

if (!quiet) {
message ("converting OSM data to a data.frame")
}

if (isTRUE (obj$meta$query_type == "adiff")) {
datetime_from <- obj$meta$datetime_from
if (is.null(datetime_from)) datetime_from <- "old"
datetime_to <- obj$meta$datetime_to
if (is.null(datetime_to)) datetime_to <- "new"
df <- xml_adiff_to_df (doc, datetime_from = datetime_from, datetime_to = datetime_to,
stringsAsFactors = stringsAsFactors)
} else {
df <- xml_to_df (doc, stringsAsFactors = stringsAsFactors)
if (isTRUE (obj$meta$query_type == "diff")) {
df <- unique (df)
}
}
attr (df, "bbox") <- obj$bbox
attr (df, "overpass_call") <- obj$overpass_call
attr (df, "meta") <- obj$meta

return (df)
}

xml_to_df <- function (doc, stringsAsFactors = FALSE) {

osm_obj <- xml2::xml_find_all (doc, ".//node|.//way|.//relation")

if (length (osm_obj) == 0) {
return (data.frame (osm_type = character (), osm_id = character (),
stringsAsFactors = stringsAsFactors))
}

osm_type <- xml2::xml_name (osm_obj)
osm_id <- xml2::xml_attr (osm_obj, attr = "id")

tags <- xml2::xml_find_all (osm_obj, xpath = ".//tag", flatten = FALSE)
tagsL <- lapply (tags, function(x) {
tag <- xml2::xml_attrs (x)
## Improvement for geometries (many nodes without tags) but worst for `out tags;`
# if (length (tag) == 0) return (list2DF (nrow = 1))
tag <- structure (lapply (tag, function (y) y["v"]),
names = lapply (tag, function (y) y["k"]))
list2DF (tag, nrow = 1)
})

df <- do.call (rbind_add_columns, c (tagsL, list (stringsAsFactors = stringsAsFactors)))
df <- df[, order (names (df))]

if (all (xml2::xml_has_attr (osm_obj,
c ("version", "timestamp", "changeset", "uid", "user")))) {

osm_version <- xml2::xml_attr (osm_obj, attr = "version")
osm_timestamp <- xml2::xml_attr (osm_obj, attr = "timestamp")
osm_changeset <- xml2::xml_attr (osm_obj, attr = "changeset")
osm_uid <- xml2::xml_attr (osm_obj, attr = "uid")
osm_user <- xml2::xml_attr (osm_obj, attr = "user")

df <- data.frame (osm_type, osm_id, osm_version, osm_timestamp,
osm_changeset, osm_uid, osm_user, df,
stringsAsFactors = stringsAsFactors, check.names = FALSE)

} else {
df <- data.frame (osm_type, osm_id, df,
stringsAsFactors = stringsAsFactors, check.names = FALSE)
}

return (df)
}


xml_adiff_to_df <- function (doc, datetime_from, datetime_to, stringsAsFactors=FALSE) {

osm_actions <- xml2::xml_find_all (doc, ".//action")

if (length (osm_actions) == 0) {
return (data.frame (osm_type = character (), osm_id = character (),
stringsAsFactors = stringsAsFactors))
}

action_type <- xml2::xml_attr (osm_actions, attr = "type")

dfL <- mapply (function (action, type) {
osm_obj <- xml2::xml_find_all (action, ".//node|.//way|.//relation")
osm_type <- xml2::xml_name (osm_obj)
osm_id <- xml2::xml_attr (osm_obj, attr = "id")

if (type == "modify") {

dates <- c (datetime_from, datetime_to)
tags <- xml2::xml_find_all (osm_obj, xpath = ".//tag", flatten = FALSE)
tagsL <- mapply (function(x, adiff_date) {
tag <- xml2::xml_attrs (x)
tag <- structure (lapply (tag, function (y) y["v"]),
names = lapply (tag, function (y) y["k"]))
list2DF (c (list (adiff_action = "modify",
adiff_date = adiff_date), tag), nrow = 1)
}, x = tags, adiff_date = dates, SIMPLIFY = FALSE)
df <- do.call (rbind_add_columns, c (tagsL, list (stringsAsFactors = stringsAsFactors)))

} else if (type == "delete") {

dates <- c (datetime_from, datetime_to)
osm_visible <- xml2::xml_attr (osm_obj, attr = "visible")
tags <- xml2::xml_find_all (osm_obj, xpath = ".//tag", flatten = FALSE)
tagsL <- mapply (function(x, adiff_date, adiff_visible) {
tag <- xml2::xml_attrs (x)
if (length (tag) == 0) return (list2DF (nrow = 1))
tag <- structure (lapply (tag, function (y) y["v"]),
names = lapply (tag, function (y) y["k"]))
list2DF (c (list (adiff_action = "delete", adiff_date = adiff_date,
adiff_visible = adiff_visible), tag), nrow = 1)
}, x = tags, adiff_date = dates, adiff_visible = osm_visible, SIMPLIFY = FALSE)
df <- do.call (rbind_add_columns, c (tagsL, list (stringsAsFactors = stringsAsFactors)))

} else if (type == "create") {

tags <- xml2::xml_find_all (osm_obj, xpath = ".//tag", flatten = TRUE)
tag <- xml2::xml_attrs (tags)
tag <- structure (lapply (tag, function (y) y["v"]),
names = lapply (tag, function (y) y["k"]))
df <- list2DF (c (list (adiff_action = "create",
adiff_date = datetime_to), tag), nrow = 1)

}

meta <- all (xml2::xml_has_attr (xml2::xml_find_all (osm_actions, ".//node|.//way|.//relation"),
c ("version", "timestamp", "changeset", "uid", "user")))
if (meta) {
osm_version <- xml2::xml_attr (osm_obj, attr = "version")
osm_timestamp <- xml2::xml_attr (osm_obj, attr = "timestamp")
osm_changeset <- xml2::xml_attr (osm_obj, attr = "changeset")
osm_uid <- xml2::xml_attr (osm_obj, attr = "uid")
osm_user <- xml2::xml_attr (osm_obj, attr = "user")

df <- data.frame (osm_type, osm_id, osm_version, osm_timestamp,
osm_changeset, osm_uid, osm_user, df,
stringsAsFactors = stringsAsFactors, check.names = FALSE)
} else {
df <- data.frame (osm_id, osm_type, df,
stringsAsFactors = stringsAsFactors, check.names = FALSE)
}

return (df)
}, action = osm_actions, type = action_type)

df <- do.call (rbind_add_columns, c (dfL, list (stringsAsFactors = stringsAsFactors)))
sel_FALSE <- which (df$adiff_visible == "false")
sel_TRUE <- which (df$adiff_visible == "true")
df$adiff_visible <- NA
df$adiff_visible[sel_FALSE]<- FALSE
df$adiff_visible[sel_TRUE]<- TRUE

ord_cols <- intersect (c ("adiff_action", "adiff_date", "adiff_visible",
"osm_type", "osm_id", "osm_version", "osm_timestamp",
"osm_changeset", "osm_uid", "osm_user"),
names(df))

ord_cols <- c (ord_cols, setdiff (sort(names (df)), ord_cols))
df <- df[, ord_cols]

return (df)
}

rbind_add_columns <- function (..., deparse.level = 0, make.row.names = FALSE,
stringsAsFactors=FALSE) {

input <- list(...)
col_names <- unique (unlist (lapply (input, names)))

res <- lapply (input, function (x) {
mis_cols <- setdiff (col_names, names (x))
mis_cols <- structure (as.list ( rep (list (rep (NA, nrow (x))),
length (mis_cols))), names = mis_cols)
out <- list2DF (c (x, mis_cols))
out <- out[, col_names]
})

res <- c (res, list (deparse.level = deparse.level,
make.row.names = make.row.names,
stringsAsFactors = stringsAsFactors))
res <- do.call (rbind, res)

return (res)
}
Loading