Skip to content

Commit

Permalink
Label group names correctly and check for balanced data in `pivot_lon…
Browse files Browse the repository at this point in the history
…ger()` (#230)

* Label group names correctly and check for balanced data. Closes #228, closes #229

* Update tests
  • Loading branch information
markfairbanks committed Mar 23, 2021
1 parent f8ef87a commit e56d8a6
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 24 deletions.
27 changes: 24 additions & 3 deletions R/step-call-pivot_longer.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,21 +90,40 @@ pivot_longer.dtplyr_step <- function(data,

if (uses_dot_value) {
if (!is.null(names_sep)) {
.value <- str_separate(measure_vars, into = names_to, sep = names_sep)$.value
names_to_setup <- str_separate(measure_vars, into = names_to, sep = names_sep)
} else if (!is.null(names_pattern)) {
.value <- str_extract(measure_vars, into = names_to, names_pattern)$.value
names_to_setup <- str_extract(measure_vars, into = names_to, names_pattern)
} else {
abort("If you use '.value' in `names_to` you must also supply
`names_sep' or `names_pattern")
}

.value <- names_to_setup$.value

v_fct <- factor(.value, levels = unique(.value))
measure_vars <- split(measure_vars, v_fct)
values_to <- names(measure_vars)
names(measure_vars) <- NULL

if (multiple_names_to) {
variable_name <- names_to[!names_to == ".value"]

.value_ids <- split(names_to_setup[[variable_name]], v_fct)
.value_id <- .value_ids[[1]]

# Make sure data is "balanced"
# https://github.com/Rdatatable/data.table/issues/2575
# The list passed to measure.vars also needs the same number of column names per element
equal_ids <- vapply(
.value_ids[-1],
function(.x) isTRUE(all.equal(.value_id, .x)),
logical(1)
)
if (all(equal_ids)) {
.value_id <- vctrs::vec_rep_each(.value_id, length(pull(data)))
} else {
abort("`data.table::melt()` doesn't currently support melting of unbalanced datasets.")
}
}
} else if (multiple_names_to) {
if (is.null(names_sep) && is.null(names_pattern)) {
Expand Down Expand Up @@ -152,7 +171,9 @@ pivot_longer.dtplyr_step <- function(data,
out <- mutate(out, !!variable_name := gsub(paste0("^", names_prefix), "", !!sym(variable_name)))
}

if (multiple_names_to && !uses_dot_value) {
if (multiple_names_to && uses_dot_value) {
out <- mutate(out, !!variable_name := !!.value_id)
} else if (multiple_names_to && !uses_dot_value) {
if (!is.null(names_sep)) {
into_cols <- str_separate(pull(out, !!sym(variable_name)), names_to, sep = names_sep)
} else {
Expand Down
10 changes: 9 additions & 1 deletion tests/testthat/_snaps/step-call-pivot_longer.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,15 @@
Output
melt(DT, measure.vars = list(c("x1", "x2", "x3", "x4"), c("y1",
"y2", "y3", "y4")), variable.name = "set", value.name = c("x",
"y"), variable.factor = FALSE)
"y"), variable.factor = FALSE)[, `:=`(set = c("1", "1", "2",
"2", "3", "3", "4", "4"))]

# errors on unbalanced datasets

Code
pivot_longer(dt, everything(), names_to = c(".value", "id"), names_sep = "_")
Error <rlang_error>
`data.table::melt()` doesn't currently support melting of unbalanced datasets.

# informative errors on unsupported features

Expand Down
30 changes: 10 additions & 20 deletions tests/testthat/test-step-call-pivot_longer.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,26 +44,6 @@ test_that("can drop missing values", {
expect_equal(out$value, c(1, 2))
})

test_that("can handle missing combinations", {
tbl <- tribble(
~id, ~x_1, ~x_2, ~y_2,
"A", 1, 2, "a",
"B", 3, 4, "b",
)
dt <- lazy_dt(tbl, "DT")
step <- pivot_longer(dt, -id, names_to = c(".value", "n"), names_sep = "_")
out <- collect(step)

expect_equal(
show_query(step),
expr(melt(DT, measure.vars = !!list(c("x_1", "x_2"), "y_2"), variable.name = "n",
value.name = !!c("x", "y"), variable.factor = FALSE))
)
expect_equal(step$vars, c("id", "n", "x", "y"))
expect_equal(out$x, c(1, 3, 2, 4))
expect_equal(out$y, c("a", "b", NA, NA))
})

test_that("can pivot to multiple measure cols", {
dt <- lazy_dt(head(anscombe, 2), "DT")
step <- pivot_longer(
Expand Down Expand Up @@ -105,6 +85,15 @@ test_that(".value can be at any position in `names_to`", {
expect_identical(value_first, value_second)
})

test_that("errors on unbalanced datasets", {
tbl <- tibble(x_1 = 1, x_2 = 1, y_3 = 1, y_4 = 1)
dt <- lazy_dt(tbl, "DT")

expect_snapshot(error = TRUE,
pivot_longer(dt, everything(), names_to = c(".value", "id"), names_sep = "_")
)
})

test_that("can use names_prefix", {
tbl <- tibble(x_x = 1:2, x_y = 3:4)
dt <- lazy_dt(tbl, "DT")
Expand Down Expand Up @@ -161,3 +150,4 @@ test_that("informative errors on unsupported features", {
})

})

0 comments on commit e56d8a6

Please sign in to comment.