Forget me not - dplyr::ungroup()
If you forget to ungroup()
an operation after a group_by()
it can lead to
unwanted behaviour.
Question: What if we didn’t have to remember to ungroup after every chain?
My “just be annoying” non-solution - Part 2
Since I almost never want to leave grouped data in my workspace, just make a
noisy version of the pipe operator (%>%
) which
- warns if you try and operate on a dataset which is already grouped.
- warns if you the end result of an operation still has groups.
library(magrittr)
library(dplyr)
#-----------------------------------------------------------------------------
# Define a new version of the pipe that always checks for grouped data.
# This is a slightly modified clone of `magrittr::%>%`
#-----------------------------------------------------------------------------
'%>%' <- function (lhs, rhs) {
parent <- parent.frame()
env <- new.env(parent = parent)
chain_parts <- magrittr:::split_chain(match.call(), env = env)
pipes <- chain_parts[["pipes"]]
rhss <- chain_parts[["rhss"]]
lhs <- chain_parts[["lhs"]]
env[["_function_list"]] <- lapply(
1:length(rhss),
function(i) magrittr:::wrap_function(rhss[[i]], pipes[[i]], parent)
)
env[["_fseq"]] <- `class<-`(eval(quote(
function(value) freduce(value, `_function_list`)),
env, env), c("fseq", "function")
)
env[["freduce"]] <- freduce
if (magrittr:::is_placeholder(lhs)) {
env[["_fseq"]]
} else {
env[["_lhs"]] <- eval(lhs, parent, parent)
if (!is.null(groups(env[["_lhs"]])) || inherits(env[['_lhs']], 'rowwise_df')) {
warning("This data.frame already has groups - did you forget to call `ungroup()` earlier?", call. = FALSE)
}
result <- withVisible(eval(quote(`_fseq`(`_lhs`)), env, env))
if (magrittr:::is_compound_pipe(pipes[[1L]])) {
eval(call("<-", lhs, result[["value"]]), parent, parent)
} else {
if (!is.null(groups(result[["value"]])) || inherits(result[["value"]], 'rowwise_df')) {
warning("The end result of this operation still has groups - did you mean to call `ungroup()` as well?", call. = FALSE)
}
if (result[["visible"]])
result[["value"]]
else
invisible(result[["value"]])
}
}
}
#-----------------------------------------------------------------------------
# Use this new pipe instead of the original magrittr pipe and you get
# a warning if end result is ungrouped
#-----------------------------------------------------------------------------
res1 <- mtcars %>%
group_by(cyl)
Warning: The end result of this operation still has groups - did you mean to
call `ungroup()` as well?
#-----------------------------------------------------------------------------
# Get a warning when the initial data.frame is already grouped
#-----------------------------------------------------------------------------
res2 <- res1 %>%
mutate(mean_mpg = mean(mpg)) %>%
select(cyl, mean_mpg) %>%
ungroup()
Warning: This data.frame already has groups - did you forget to call `ungroup()`
earlier?
#-----------------------------------------------------------------------------
# But don't get any warning if it isn't grouped input or output
#-----------------------------------------------------------------------------
res3 <- mtcars %>%
mutate(mean_mpg = mean(mpg)) %>%
select(cyl, mean_mpg) %>%
ungroup()