From 94c196aa42edd948288200f150acb9e2829b2300 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 18 Aug 2020 17:16:09 +0200 Subject: [PATCH 1/8] Add list record helper class --- tests/testthat/helper-s3.R | 34 ++++++++++++++++++++++++++++++++- tests/testthat/test-type-rcrd.R | 16 ++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/tests/testthat/helper-s3.R b/tests/testthat/helper-s3.R index 27f6bf691..e5ce9cf47 100644 --- a/tests/testthat/helper-s3.R +++ b/tests/testthat/helper-s3.R @@ -139,7 +139,7 @@ foobar_df_ptype2 <- function(x, y, ...) { foobar_df_cast <- function(x, y, ...) { foobar(df_cast(x, y, ...)) } -local_foobar_df_methods <- function(expr, frame = caller_env()) { +local_foobar_df_methods <- function(frame = caller_env()) { local_methods( .frame = frame, vec_ptype2.vctrs_foobar.vctrs_foobar = foobar_df_ptype2, @@ -154,3 +154,35 @@ with_foobar_df_methods <- function(expr) { local_foobar_df_methods() expr } + +# List that caches the size of its elements in a record field +new_list_rcrd <- function(x, sizes = NULL) { + stopifnot( + vec_is_list(x), + is_null(sizes) || is_integer(sizes, n = length(x)) + ) + + x <- vec_data(x) + + if (is_null(sizes)) { + sizes <- list_sizes(x) + } else { + missing <- is.na(sizes) + sizes[missing] <- list_sizes(x[missing]) + } + + structure(x, sizes = sizes, class = c("vctrs_list_rcrd", "list")) +} +local_list_rcrd_methods <- function(frame = caller_env()) { + local_methods( + .frame = frame, + vec_proxy.vctrs_list_rcrd = function(x, ...) data_frame(data = unclass(x), sizes = attr(x, "sizes")), + vec_restore.vctrs_list_rcrd = function(x, to, ...) new_list_rcrd(x$data, x$sizes), + vec_ptype2.vctrs_list_rcrd.vctrs_list_rcrd = function(x, y, ...) x, + vec_ptype2.vctrs_list_rcrd.list = function(x, y, ...) x, + vec_ptype2.list.vctrs_list_rcrd = function(x, y, ...) y, + vec_cast.vctrs_list_rcrd.vctrs_list_rcrd = function(x, to, ...) x, + vec_cast.list.vctrs_list_rcrd = function(x, to, ...) vec_data(x), + vec_cast.vctrs_list_rcrd.list = function(x, to, ...) new_list_rcrd(x, sizes = vec_init(int(), length(x))) + ) +} diff --git a/tests/testthat/test-type-rcrd.R b/tests/testthat/test-type-rcrd.R index fc612f47c..0832bc735 100644 --- a/tests/testthat/test-type-rcrd.R +++ b/tests/testthat/test-type-rcrd.R @@ -225,3 +225,19 @@ test_that("dots are forwarded", { test_that("records are restored after slicing the proxy", { expect_identical(new_rcrd(list(x = 1:2))[1], new_rcrd(list(x = 1L))) }) + + +# helper classes ---------------------------------------------------------- + +test_that("list_rcrd caches sizes", { + local_list_rcrd_methods() + x <- new_list_rcrd(list(1L, 2:3, 4:6)) + + out <- vec_slice(x, 2:3) + exp <- new_list_rcrd(list(2:3, 4:6), sizes = c(2L, 3L)) + expect_identical(out, exp) + + out <- vec_assign(x, 2:3, list(1:5, 2L)) + exp <- new_list_rcrd(list(1L, 1:5, 2L), sizes = c(1L, 5L, 1L)) + expect_identical(out, exp) +}) From b7b245b4bd46d81a6b67b3a5b7a07ded8dc60ee6 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Mon, 17 Aug 2020 16:05:16 +0200 Subject: [PATCH 2/8] Draft `vec_chop2()` --- R/slice-chop.R | 14 +++++++++++ tests/testthat/test-slice-chop.R | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/R/slice-chop.R b/R/slice-chop.R index 5cef57052..3d18b5710 100644 --- a/R/slice-chop.R +++ b/R/slice-chop.R @@ -105,3 +105,17 @@ vec_chop_seq <- function(x, starts, sizes, increasings = TRUE) { args <- vec_recycle_common(starts, sizes, increasings) .Call(vctrs_chop_seq, x, args[[1]], args[[2]], args[[3]]) } + +vec_chop2 <- function(x) { + if (vec_is_list(x)) { + # Lists necessarily have list storage and so don't require any + # genericity for extracting elements + out <- unstructure(x) + } else { + # Zap inner names of atomic vectors. They become outer names. + out <- vec_set_names(x, NULL) + out <- vec_chop(out) + } + + vec_set_names(out, vec_names(x)) +} diff --git a/tests/testthat/test-slice-chop.R b/tests/testthat/test-slice-chop.R index ad2a3b155..7c9d47f31 100644 --- a/tests/testthat/test-slice-chop.R +++ b/tests/testthat/test-slice-chop.R @@ -229,6 +229,47 @@ test_that("can chop S3 objects using the fallback method with compact seqs", { expect_equal(vec_chop_seq(x, 2L, 2L), list(vec_slice(x, 3:4))) }) + +# vec_chop2 --------------------------------------------------------------- + +test_that("vec_chop2() transforms inner names to outer names", { + x <- c(a = 1, b = 2) + expect_identical( + vec_chop2(x), + list(a = 1, b = 2) + ) + + x <- data.frame(x = 1:2, row.names = c("foo", "bar")) + exp <- list( + foo = data.frame(x = 1L), + bar = data.frame(x = 2L) + ) + expect_identical(vec_chop2(x), exp) + + x <- matrix(1:4, 2) + row.names(x) <- c("foo", "bar") + + # Fails because of #1221 + # exp <- list( + # foo = matrix(c(1L, 3L), 1), + # bar = matrix(c(2L, 4L), 1) + # ) + # expect_identical(vec_chop2(x), exp) + + out <- vec_chop2(x) + expect_null(row.names(out[[1]])) + expect_null(row.names(out[[2]])) +}) + +test_that("vec_chop2() preserves names of lists", { + x <- list(a = 1, b = 2) + expect_identical(vec_chop2(x), x) + + s3 <- new_vctr(x) + expect_identical(vec_chop2(s3), x) +}) + + # vec_unchop -------------------------------------------------------------- test_that("`x` must be a list", { From 711a4266bc497fe761ac6d7be8ea0ac9767eb15e Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Mon, 17 Aug 2020 16:28:28 +0200 Subject: [PATCH 3/8] Implement `vec_chop2()` in C --- R/slice-chop.R | 12 +---------- src/init.c | 2 ++ src/slice-chop.c | 34 ++++++++++++++++++++++++++++++++ src/slice.h | 2 ++ tests/testthat/test-slice-chop.R | 4 +++- 5 files changed, 42 insertions(+), 12 deletions(-) diff --git a/R/slice-chop.R b/R/slice-chop.R index 3d18b5710..ea147c1ec 100644 --- a/R/slice-chop.R +++ b/R/slice-chop.R @@ -107,15 +107,5 @@ vec_chop_seq <- function(x, starts, sizes, increasings = TRUE) { } vec_chop2 <- function(x) { - if (vec_is_list(x)) { - # Lists necessarily have list storage and so don't require any - # genericity for extracting elements - out <- unstructure(x) - } else { - # Zap inner names of atomic vectors. They become outer names. - out <- vec_set_names(x, NULL) - out <- vec_chop(out) - } - - vec_set_names(out, vec_names(x)) + .External(vctrs_chop2, x) } diff --git a/src/init.c b/src/init.c index 91f6df292..f19421d6d 100644 --- a/src/init.c +++ b/src/init.c @@ -278,6 +278,7 @@ extern SEXP vctrs_rbind(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_cbind(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_c(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_new_data_frame(SEXP); +extern SEXP vctrs_chop2(SEXP); static const R_ExternalMethodDef ExtEntries[] = { {"vctrs_type_common", (DL_FUNC) &vctrs_type_common, 1}, @@ -290,6 +291,7 @@ static const R_ExternalMethodDef ExtEntries[] = { {"vctrs_cbind", (DL_FUNC) &vctrs_cbind, 3}, {"vctrs_c", (DL_FUNC) &vctrs_c, 3}, {"vctrs_new_data_frame", (DL_FUNC) &vctrs_new_data_frame, -1}, + {"vctrs_chop2", (DL_FUNC) &vctrs_chop2, 1}, {NULL, NULL, 0} }; diff --git a/src/slice-chop.c b/src/slice-chop.c index 95698da80..45724c727 100644 --- a/src/slice-chop.c +++ b/src/slice-chop.c @@ -414,3 +414,37 @@ SEXP vec_as_indices(SEXP indices, R_len_t n, SEXP names) { UNPROTECT(1); return indices; } + +// ----------------------------------------------------------------------------- + +// [[ register(external = TRUE) ]] +SEXP vctrs_chop2(SEXP args) { + return vec_chop2(CADR(args)); +} + +// [[ include("slice.h") ]] +SEXP vec_chop2(SEXP x) { + SEXP out; + + if (vec_is_list(x)) { + // Lists necessarily have list storage and so don't require any + // genericity for extracting elements + out = PROTECT(r_clone_referenced(x)); + SET_ATTRIB(out, R_NilValue); + SET_OBJECT(out, 0); + + UNPROTECT(1); + } else { + // Zap inner names of atomic vectors. They become outer names. + out = PROTECT(vec_set_names(x, R_NilValue)); + out = vec_chop(out, R_NilValue); + UNPROTECT(1); + } + PROTECT(out); + + SEXP names = PROTECT(vec_names(x)); + out = vec_set_names(out, names); + + UNPROTECT(2); + return out; +} diff --git a/src/slice.h b/src/slice.h index 83e7256eb..b1b9eadd8 100644 --- a/src/slice.h +++ b/src/slice.h @@ -11,5 +11,7 @@ SEXP vec_slice_fallback(SEXP x, SEXP subscript); bool vec_is_restored(SEXP x, SEXP to); +SEXP vec_chop2(SEXP x); + #endif diff --git a/tests/testthat/test-slice-chop.R b/tests/testthat/test-slice-chop.R index 7c9d47f31..af558f221 100644 --- a/tests/testthat/test-slice-chop.R +++ b/tests/testthat/test-slice-chop.R @@ -266,7 +266,9 @@ test_that("vec_chop2() preserves names of lists", { expect_identical(vec_chop2(x), x) s3 <- new_vctr(x) - expect_identical(vec_chop2(s3), x) + out <- vec_chop2(s3) + expect_identical(out, x) + expect_false(is.object(out)) }) From bb9fe4451de24aadcfecad17d6ee6260621722b4 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 18 Aug 2020 18:16:35 +0200 Subject: [PATCH 4/8] Allow setting names of `vctrs_rcrd` to `NULL` Would previously zap the field names --- NAMESPACE | 1 + R/type-rcrd.R | 7 +++++++ tests/testthat/test-slice-chop.R | 11 +++++++++++ tests/testthat/test-type-rcrd.R | 13 +++++++++++++ 4 files changed, 32 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 76dbfe0df..dd3adb018 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,6 +49,7 @@ S3method("length<-",vctrs_rcrd) S3method("length<-",vctrs_vctr) S3method("levels<-",vctrs_sclr) S3method("levels<-",vctrs_vctr) +S3method("names<-",vctrs_rcrd) S3method("names<-",vctrs_sclr) S3method("names<-",vctrs_vctr) S3method("|",vctrs_vctr) diff --git a/R/type-rcrd.R b/R/type-rcrd.R index 49ae6e312..c2643a5a5 100644 --- a/R/type-rcrd.R +++ b/R/type-rcrd.R @@ -70,6 +70,13 @@ length.vctrs_rcrd <- function(x) { names.vctrs_rcrd <- function(x) { NULL } +#' @export +`names<-.vctrs_rcrd` <- function(x, value) { + if (!is_null(value)) { + abort("Can't set names of record vectors.") + } + x +} #' @export format.vctrs_rcrd <- function(x, ...) { diff --git a/tests/testthat/test-slice-chop.R b/tests/testthat/test-slice-chop.R index af558f221..b8beeb6e5 100644 --- a/tests/testthat/test-slice-chop.R +++ b/tests/testthat/test-slice-chop.R @@ -271,6 +271,17 @@ test_that("vec_chop2() preserves names of lists", { expect_false(is.object(out)) }) +test_that("can chop record vectors", { + x <- new_rcrd(list(x = 1:2, y = 3:4)) + exp <- list( + new_rcrd(list(x = 1L, y = 3L)), + new_rcrd(list(x = 2L, y = 4L)) + ) + + expect_identical(vec_chop(x), exp) + expect_identical(vec_chop2(x), exp) +}) + # vec_unchop -------------------------------------------------------------- diff --git a/tests/testthat/test-type-rcrd.R b/tests/testthat/test-type-rcrd.R index 0832bc735..f4dcf3a2f 100644 --- a/tests/testthat/test-type-rcrd.R +++ b/tests/testthat/test-type-rcrd.R @@ -28,6 +28,19 @@ test_that("vec_proxy() transforms records to data frames", { ) }) +test_that("can set names of record vectors to NULL", { + x <- new_rcrd(list(a = 1)) + + orig <- x + names(x) <- NULL + expect_identical(x, orig) + + expect_identical(vec_set_names(x, NULL), x) + + expect_error(names(x) <- "foo", "Can't") + expect_error(vec_set_names(x, "foo"), "Can't") +}) + # coercion ---------------------------------------------------------------- From 09150af46a47612061f3543188e0758b35d5d457 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 19 Aug 2020 10:43:29 +0200 Subject: [PATCH 5/8] Add `vec_chop2()` tests for generic vectors --- tests/testthat/test-slice-chop.R | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-slice-chop.R b/tests/testthat/test-slice-chop.R index b8beeb6e5..dc0b808fe 100644 --- a/tests/testthat/test-slice-chop.R +++ b/tests/testthat/test-slice-chop.R @@ -271,17 +271,34 @@ test_that("vec_chop2() preserves names of lists", { expect_false(is.object(out)) }) -test_that("can chop record vectors", { +test_that("vec_chop2() works with generic atomic vectors", { + x <- set_names(new_vctr(1:2), letters[1:2]) + exp <- list( + a = new_vctr(1L), + b = new_vctr(2L) + ) + expect_identical(vec_chop2(x), exp) + + # TODO: Test with names once rcrd vectors support them x <- new_rcrd(list(x = 1:2, y = 3:4)) exp <- list( new_rcrd(list(x = 1L, y = 3L)), new_rcrd(list(x = 2L, y = 4L)) ) - expect_identical(vec_chop(x), exp) + expect_identical(vec_chop(x), exp) # FIXME: Only because rcrd is unnamed expect_identical(vec_chop2(x), exp) }) +test_that("vec_chop2() works with generic lists", { + x <- list(a = c(foo = 1:2), b = c(bar = 1:3)) + + expect_identical(vec_chop2(new_vctr(x)), x) + + local_list_rcrd_methods() + expect_identical(vec_chop2(new_list_rcrd(x)), x) +}) + # vec_unchop -------------------------------------------------------------- From b56cd90c16305ab73d58b3defc7bbf5a06d7608f Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 19 Aug 2020 14:13:40 +0200 Subject: [PATCH 6/8] Use `vec_chop2()` in `as.list.vctrs_vctr()` --- R/type-vctr.R | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/R/type-vctr.R b/R/type-vctr.R index 97b510f21..e3b2c015a 100644 --- a/R/type-vctr.R +++ b/R/type-vctr.R @@ -298,13 +298,7 @@ as.character.vctrs_vctr <- function(x, ...) { #' @export as.list.vctrs_vctr <- function(x, ...) { - out <- vec_chop(x) - - if (vec_is_list(x)) { - out <- lapply(out, `[[`, 1) - } - - out + vec_chop2(x) } #' @export From 09aab2756cccfdb0ead21902b96df8e24c6a3271 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 19 Aug 2020 14:27:13 +0200 Subject: [PATCH 7/8] Add memory footprint test for `vec_chop2()` --- .../testthat/performance/test-slice-chop.txt | 51 +++++++++++++++++++ tests/testthat/test-slice-chop.R | 42 +++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 tests/testthat/performance/test-slice-chop.txt diff --git a/tests/testthat/performance/test-slice-chop.txt b/tests/testthat/performance/test-slice-chop.txt new file mode 100644 index 000000000..c2dba7238 --- /dev/null +++ b/tests/testthat/performance/test-slice-chop.txt @@ -0,0 +1,51 @@ +> local_list_rcrd_methods() +> n <- 100 +> # Atomic vector +> vec <- rep(c(a = 1L, b = 2L), n) +> with_memory_prof(vec_chop2(vec)) +[1] 2.44KB + +> # S3 atomic vector +> vec_s3 <- new_vctr(vec) +> with_memory_prof(vec_chop2(vec_s3)) +[1] 1.61KB + +> # Record vector +> vec_rcrd <- rep(new_rcrd(list(a = 1:2, b = 3:4)), n) +> with_memory_prof(vec_chop2(vec_rcrd)) +[1] 4.83KB + +> # Data frame +> df <- vec_rep(data.frame(x = 1, y = 2), n) +> with_memory_prof(vec_chop2(df)) +[1] 2.48KB + +> # S3 data frame +> tib <- vec_rep(tibble(x = 1, y = 2), n) +> with_memory_prof(vec_chop2(tib)) +[1] 2.48KB + + +List +==== + +> list <- rep(list(a = c(foo = 1:2), b = c(bar = 1:3)), n) +> with_memory_prof(vec_chop2(list)) +[1] 1.61KB + + +S3 list +======= + +> vctr <- new_vctr(list) +> with_memory_prof(vec_chop2(vctr)) +[1] 1.61KB + + +S3 record list +============== + +> list_rcrd <- new_list_rcrd(list) +> with_memory_prof(vec_chop2(list_rcrd)) +[1] 1.61KB + diff --git a/tests/testthat/test-slice-chop.R b/tests/testthat/test-slice-chop.R index dc0b808fe..6a6b27f23 100644 --- a/tests/testthat/test-slice-chop.R +++ b/tests/testthat/test-slice-chop.R @@ -845,6 +845,9 @@ test_that("vec_unchop() fails if foreign classes are not homogeneous and there i ) }) + +# Golden tests ------------------------------------------------------------ + test_that("vec_unchop() has informative error messages", { verify_output(test_path("error", "test-unchop.txt"), { "# vec_unchop() errors on unsupported location values" @@ -885,3 +888,42 @@ test_that("vec_unchop() has informative error messages", { ) }) }) + +test_that("chop functions have expected memory footprint", { + verify_output(test_path("performance", "test-slice-chop.txt"), { + local_list_rcrd_methods() + n <- 1e2 + + "Atomic vector" + vec <- rep(c(a = 1L, b = 2L), n) + with_memory_prof(vec_chop2(vec)) + + "S3 atomic vector" + vec_s3 <- new_vctr(vec) + with_memory_prof(vec_chop2(vec_s3)) + + "Record vector" + vec_rcrd <- rep(new_rcrd(list(a = 1:2, b = 3:4)), n) + with_memory_prof(vec_chop2(vec_rcrd)) + + "Data frame" + df <- vec_rep(data.frame(x = 1, y = 2), n) + with_memory_prof(vec_chop2(df)) + + "S3 data frame" + tib <- vec_rep(tibble(x = 1, y = 2), n) + with_memory_prof(vec_chop2(tib)) + + "# List" + list <- rep(list(a = c(foo = 1:2), b = c(bar = 1:3)), n) + with_memory_prof(vec_chop2(list)) + + "# S3 list" + vctr <- new_vctr(list) + with_memory_prof(vec_chop2(vctr)) + + "# S3 record list" + list_rcrd <- new_list_rcrd(list) + with_memory_prof(vec_chop2(list_rcrd)) + }) +}) From 8103a290e945d59a68a4c9424c862c0058b66b23 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 19 Aug 2020 15:14:48 +0200 Subject: [PATCH 8/8] `vec_chop2()` on a bare list is a no-op --- src/slice-chop.c | 4 ++++ src/utils.c | 18 ++++++++++++++++++ src/utils.h | 2 ++ tests/testthat/performance/test-slice-chop.txt | 2 +- tests/testthat/test-slice-chop.R | 4 ++++ 5 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/slice-chop.c b/src/slice-chop.c index 45724c727..06961116e 100644 --- a/src/slice-chop.c +++ b/src/slice-chop.c @@ -424,6 +424,10 @@ SEXP vctrs_chop2(SEXP args) { // [[ include("slice.h") ]] SEXP vec_chop2(SEXP x) { + if (r_is_bare_list(x)) { + return x; + } + SEXP out; if (vec_is_list(x)) { diff --git a/src/utils.c b/src/utils.c index 77e05d1a9..73389e512 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1623,6 +1623,24 @@ void stop_internal(const char* fn, const char* fmt, ...) { #undef FMT_BUFSIZE +bool r_is_bare_list(SEXP x) { + if (TYPEOF(x) != VECSXP) { + return false; + } + + SEXP attrib = r_attrib(x); + + while (attrib != r_null) { + if (r_node_tag(attrib) != r_syms_names) { + return false; + } + attrib = r_node_cdr(attrib); + } + + return true; +} + + bool vctrs_debug_verbose = false; SEXP vctrs_ns_env = NULL; diff --git a/src/utils.h b/src/utils.h index a936b8a60..bee0a128c 100644 --- a/src/utils.h +++ b/src/utils.h @@ -494,6 +494,8 @@ r_ssize r_ssize_add(r_ssize x, r_ssize y) { SEXP chr_c(SEXP x, SEXP y); +bool r_is_bare_list(SEXP x); + extern SEXP vctrs_ns_env; extern SEXP vctrs_shared_empty_str; diff --git a/tests/testthat/performance/test-slice-chop.txt b/tests/testthat/performance/test-slice-chop.txt index c2dba7238..3f2fa412f 100644 --- a/tests/testthat/performance/test-slice-chop.txt +++ b/tests/testthat/performance/test-slice-chop.txt @@ -31,7 +31,7 @@ List > list <- rep(list(a = c(foo = 1:2), b = c(bar = 1:3)), n) > with_memory_prof(vec_chop2(list)) -[1] 1.61KB +[1] 0B S3 list diff --git a/tests/testthat/test-slice-chop.R b/tests/testthat/test-slice-chop.R index 6a6b27f23..ddd6a6d1c 100644 --- a/tests/testthat/test-slice-chop.R +++ b/tests/testthat/test-slice-chop.R @@ -299,6 +299,10 @@ test_that("vec_chop2() works with generic lists", { expect_identical(vec_chop2(new_list_rcrd(x)), x) }) +test_that("vec_chop2() zaps attributes", { + expect_identical(vec_chop2(structure(list(), foo = TRUE)), list()) +}) + # vec_unchop --------------------------------------------------------------