Skip to content

Commit

Permalink
Adding parser for timespan from iso8601 (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
dgkf committed Jul 21, 2022
1 parent abb3edb commit 1e44667
Show file tree
Hide file tree
Showing 22 changed files with 445 additions and 88 deletions.
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ S3method(as.timespan,default)
S3method(c,partial_time)
S3method(definitely,partial_time_logical)
S3method(dim,partial_time)
S3method(end,timespan)
S3method(format,partial_time)
S3method(format,partial_time_logical)
S3method(format,pillar_shaft_partial_time)
Expand All @@ -32,6 +33,7 @@ S3method(impute_time,partial_time)
S3method(includes,partial_time)
S3method(includes.partial_time,partial_time)
S3method(is.na,partial_time)
S3method(is.na,timespan)
S3method(lubridate::hour,partial_time)
S3method(lubridate::mday,partial_time)
S3method(lubridate::minute,partial_time)
Expand All @@ -57,6 +59,9 @@ S3method(print,partial_difftime)
S3method(print,partial_time_logical)
S3method(rep,partial_time)
S3method(second,partial_time)
S3method(start,timespan)
S3method(stats::end,timespan)
S3method(stats::start,timespan)
S3method(to_gmt,array)
S3method(to_gmt,matrix)
S3method(to_gmt,partial_time)
Expand All @@ -75,6 +80,7 @@ S3method(vec_cast.logical,partial_time)
S3method(vec_cast.partial_time,character)
S3method(vec_cast.partial_time,default)
S3method(vec_cast.partial_time,matrix)
S3method(vec_cast.timespan,character)
S3method(vec_cast.timespan,default)
S3method(vec_cast.timespan,double)
S3method(vec_cast.timespan,numeric)
Expand Down Expand Up @@ -154,6 +160,7 @@ importFrom(pillar,style_na)
importFrom(pillar,style_subtle)
importFrom(pillar,type_sum)
importFrom(utils,head)
importFrom(utils,packageName)
importFrom(utils,tail)
importFrom(vctrs,obj_print_data)
importFrom(vctrs,obj_print_footer)
Expand Down
22 changes: 1 addition & 21 deletions R/class_partial_time_coercion.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,27 +129,7 @@ vec_cast.partial_time.character <- function(x, to, ..., format = parse_iso8601)
parse_to_parttime_matrix("")[NULL, , drop = FALSE]
}

if (is.parttime(pttm_mat)) {
return(pttm_mat)
}

if (!all(datetime_parts %in% colnames(pttm_mat))) {
pttm_mat <- complete_parsed_parttime_matrix(pttm_mat)
}

storage.mode(pttm_mat) <- "numeric"
tzhour_na <- is.na(pttm_mat[, "tzhour"])
all_na <- apply(is.na(pttm_mat), 1, all)

if (any(tzhour_na)) {
gmt_offset <- interpret_tz(getOption("parttime.assume_tz_offset", NA))
pttm_mat[!all_na & tzhour_na, "tzhour"] <- gmt_offset %/% 60
pttm_mat[!all_na & tzhour_na, "tzmin"] <- gmt_offset %% 60
}

# when tzhour is available
pttm_mat[!tzhour_na & is.na(pttm_mat[, "tzmin"]), "tzmin"] <- 0

pttm_mat <- clean_parsed_parttime_matrix(pttm_mat)
as.parttime(pttm_mat)
}

Expand Down
1 change: 0 additions & 1 deletion R/class_timespan.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ methods::setClass("timespan")
timespan <- function(start, end, inclusive = c(TRUE, FALSE)) {
common_size <- vctrs::vec_size_common(start, end)


inclusive <- t(matrix(
rep_len(inclusive, common_size * 2),
nrow = 2)
Expand Down
29 changes: 26 additions & 3 deletions R/class_timespan_coercion.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
#' Cast an object to a timespan
#'
#' @param x an object to cast
#' @inheritParams as.parttime
#'
#' @export
as.timespan <- function(x) {
as.timespan <- function(x, ..., format = parse_iso8601_as_timespan) {
UseMethod("as.timespan")
}



#' @export
as.timespan.default <- function(x) {
vctrs::vec_cast(x, structure(0L, class = "timespan"))
as.timespan.default <- function(x, ...) {
vctrs::vec_cast(x, structure(0L, class = "timespan"), ...)
}



#' Cast to timespan object
#'
#' @inheritParams vctrs::vec_cast
#'
#' @importFrom vctrs vec_cast
#' @exportS3Method vec_cast timespan
#'
vec_cast.timespan <- function(x, to, ...) {
if (is.timespan(x)) return(x)
UseMethod("vec_cast.timespan")
Expand All @@ -30,15 +34,34 @@ vec_cast.timespan <- function(x, to, ...) {
#' Default handler for casting to a timespan
#'
#' @inheritParams vctrs::vec_cast
#'
#' @importFrom vctrs stop_incompatible_cast vec_recycle
#' @exportS3Method vec_cast.timespan default
#'
vec_cast.timespan.default <- function(x, to, ...) {
if (!all(is.na(x) | is.null(x))) vctrs::stop_incompatible_cast(x, to)
vctrs::vec_recycle(timespan(NA), size = length(x))
}



#' Cast partial time to timespan, representing uncertainty as a range
#'
#' @inheritParams vctrs::vec_cast
#' @inheritParams as.timespan
#'
#' @exportS3Method vec_cast.timespan character
#'
vec_cast.timespan.character <- function(
x, to, ...,
format = parse_iso8601_as_timespan
) {
as.timespan(format(x, ...))
}




#' Cast partial time to timespan, representing uncertainty as a range
#'
#' @inheritParams vctrs::vec_cast
Expand Down
47 changes: 47 additions & 0 deletions R/class_timespan_compat_stats.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#' start S3 generic
#'
#' A generic method to retrieve the start of an object
#'
#' @param x An object to retrieve the start from
#' @param ... Additional arguments passed to methods
#'
start <- function(x, ...) {
UseMethod("start")
}

#' @importFrom utils head
#' @rawNamespace S3method(stats::start,timespan)
#' @export
start.timespan <- function(x, ...) {
cols <- utils::head(dimnames(vctrs::field(x, "tmspn_arr"))[[2]], -1L)
dns <- dimnames(vctrs::field(x, "tmspn_arr"))
as.parttime(array(
vctrs::field(x, "tmspn_arr")[, cols, "lb"],
dim = c(length(x), length(cols)),
dimnames = list(dns[[1L]], cols)
))
}

#' end S3 generic
#'
#' A generic method to retrieve the end of an object
#'
#' @param x An object to retrieve the end from
#' @param ... Additional arguments passed to methods
#'
end <- function(x, ...) {
UseMethod("end")
}

#' @importFrom utils head
#' @rawNamespace S3method(stats::end,timespan)
#' @export
end.timespan <- function(x, ...) {
cols <- utils::head(dimnames(vctrs::field(x, "tmspn_arr"))[[2]], -1L)
dns <- dimnames(vctrs::field(x, "tmspn_arr"))
as.parttime(array(
vctrs::field(x, "tmspn_arr")[, cols, "ub"],
dim = c(length(x), length(cols)),
dimnames = list(dns[[1L]], cols)
))
}
13 changes: 13 additions & 0 deletions R/class_timespan_is.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
#' Check if elements of a partial time vector is NA
#'
#' @param x partial_time vector to test
#' @param ... additional arguments unused
#'
#' @export
is.na.timespan <- function(x, ...) {
cols <- head(dimnames(vctrs::field(x, "tmspn_arr"))[[2L]], -1L)
unname(apply(is.na(vctrs::field(x, "tmspn_arr")[, cols, , drop = FALSE]), 1, all))
}



#' Shorthand for checking timespan inheritance
#'
#' @param x object to test
Expand Down
166 changes: 114 additions & 52 deletions R/parse_iso8601.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,89 @@ re_iso8601 <- paste0(
)



#' Parse iso8601 datetime strings as parttime matrix
#'
#' @param x A \code{character} vector of iso8601 datetime strings
#' @param warn A \code{logical} indicating whether to warn when information
#' would be loss when coercing to a \code{parttime} matrix.
#' @param ... Additional arguments unused
#'
#' @keywords internal
#' @rdname parse_parttime
parse_iso8601 <- function(dates, warn = TRUE) {
#'
parse_iso8601 <- function(x, warn = TRUE, ...) {
if (is.character(x)) {
x <- parse_iso8601_matrix(x)
}

# warn when week is specified without weekday, leading to loss of information
if (warn) warn_repr_data_loss(x, includes = "week", excludes = "weekday")

# add month, day when week, weekday available
i <- is_iso8601_weekday(x)
x[i, c("month", "day")] <- recalc_md_from_weekday(x[i, , drop = FALSE])

# add month, day when yearday available
i <- is_iso8601_yearday(x)
x[i, c("month", "day")] <- recalc_md_from_yearday(x[i, , drop = FALSE])

# add sec, secfrac when frac (minfrac) is available
i <- is_iso8601_minfrac(x)
x[i, c("sec", "secfrac")] <- recalc_sec_from_minfrac(x[i, , drop = FALSE])

# fill secfrac when sec is provided
i <- is.na(x[, "secfrac"]) & !is.na(x[, "sec"])
x[i, "secfrac"] <- 0

# drop iso8601-specific columns
x[, datetime_parts, drop = FALSE]
}


#'
#' @keywords internal
#' @rdname parse_timespan
#'
parse_iso8601_as_timespan <- function(x, ...) {
tmspn_arr <- array(
NA_real_,
dim = c(length(x), length(datetime_parts) + 1L, 2L),
dimnames = list(x, c(datetime_parts, "inclusive"), c("lb", "ub"))
)

m <- parse_iso8601_matrix(x)

# user parttime handler where possible, uniquely handle yearweek format
i <- matrix_field_cond(m, includes = "week", excludes = "weekday")
m[!i, datetime_parts] <- parse_iso8601(m[!i, , drop = FALSE])

# impute yearweek + weekday with first day of the week for start
m[i, datetime_parts] <- parse_iso8601(paste0(x[i], "-1"))
tmspn_arr[, datetime_parts, "lb"] <- clean_parsed_parttime_matrix(m)
tmspn_arr[, "inclusive", "lb"] <- 1

# impute yearweek + weekday with last day of the week for end
m[i, datetime_parts] <- parse_iso8601(paste0(x[i], "-7"))
tmspn_arr[, datetime_parts, "ub"] <- minimally_increment(clean_parsed_parttime_matrix(m))
tmspn_arr[, "inclusive", "ub"] <- 0

tmspn_arr
}



#' Parse an iso8601 datetime to a parttime-like matrix
#'
#' @note In addition to parttime matrix fields, the returned matrix has
#' additional columns for alternative iso8601 formats such as \code{yearday},
#' \code{yearweek} and code{weakday}.
#'
#' @inheritParams parse_iso8601
#'
#' @keywords internal
#'
parse_iso8601_matrix <- function(dates) {
match_m <- parse_to_parttime_matrix(dates, regex = re_iso8601)

# fix missing tzhour, tzmin when tz is available
Expand All @@ -61,61 +141,43 @@ parse_iso8601 <- function(dates, warn = TRUE) {
match_m <- match_m[, fields, drop = FALSE]
storage.mode(match_m) <- "numeric"

# warn when week is specified without weekday, leading to loss of information
i <- !is.na(match_m[, "week"]) & is.na(match_m[, "weekday"])
if (warn && any(i)) {
warning(call. = FALSE, paste0(collapse = "\n", strwrap(paste0(
"Date strings using a week field, but lacking weekday will produce ",
"missing months. To avoid loss of datetime resolution, such partial ",
"dates are best represented as timespans. See `?timespan`."
))))
}
match_m
}

# add month, day when week, weekday available
i <- apply(!is.na(match_m[, c("year", "week", "weekday"), drop = FALSE]), 1, all)
if (any(i)) {
fields <- c("year", "week", "weekday")
dates <- strptime(
apply(match_m[i, fields, drop = FALSE], 1, paste, collapse = "-"),
format = "%Y-%U-%u"
)

match_m[i, "month"] <- dates$mon + 1
match_m[i, "day"] <- dates$mday
}
is_iso8601_form <- function(x, fields) {
apply(!is.na(x[, fields, drop = FALSE]), 1, all)
}

# add month, day when yearday available
i <- apply(!is.na(match_m[, c("year", "yearday"), drop = FALSE]), 1, all)
if (any(i)) {
fields <- c("year", "yearday")
dates <- strptime(
apply(match_m[i, fields, drop = FALSE], 1, paste, collapse = "-"),
format = "%Y-%j"
)

match_m[i, "month"] <- dates$mon + 1
match_m[i, "day"] <- dates$mday
}
is_iso8601_weekday <- function(x) {
is_iso8601_form(x, c("year", "week", "weekday"))
}

# fill secfrac when sec is provided
i <- is.na(match_m[, "secfrac"]) & !is.na(match_m[, "sec"])
match_m[i, "secfrac"] <- 0
is_iso8601_yearday <- function(x) {
is_iso8601_form(x, c("year", "yearday"))
}

# fill frac (minfrac) when sec and secfrac are provided
i <- !apply(is.na(match_m[, c("sec", "secfrac"), drop = FALSE]), 1, any)
if (any(i)) {
match_m[i, "frac"] <- (match_m[i, "sec", drop = FALSE] + match_m[i, "secfrac", drop = FALSE]) / 60
}
is_iso8601_minfrac <- function(x) {
is_iso8601_form(x, "frac")
}

# fill sec and secfrac when frac (minfrac) is provided
i <- !apply(is.na(match_m[, c("frac"), drop = FALSE]), 1, any)
if (any(i)) {
match_m[i, "sec"] <- (match_m[i, c("frac"), drop = FALSE] * 60) %/% 1
match_m[i, "secfrac"] <- (match_m[i, c("frac"), drop = FALSE] * 60) %% 1
}
recalc_md_from_weekday <- function(x) {
dates <- strptime(
paste(x[,"year"], x[,"week"], x[,"weekday"] - 1L, sep = "-"),
format = "%Y-%U-%w"
)

cbind(month = dates$mon + 1L, day = dates$mday)
}

recalc_md_from_yearday <- function(x) {
dates <- strptime(
paste(x[,"year"], x[,"yearday"], sep = "-"),
format = "%Y-%j"
)

cbind(month = dates$mon + 1L, day = dates$mday)
}

# reduce to minimum set of columns
# order of fields should be in decreasing resolution. the order is used for
# matrix operations when handling operator behaviors
match_m[, datetime_parts, drop = FALSE]
recalc_sec_from_minfrac <- function(x) {
cbind(sec = (x[, "frac"] * 60) %/% 1, secfrac = (x[, "frac"] * 60) %% 1)
}
Loading

0 comments on commit 1e44667

Please sign in to comment.