ggdebug - a package for debugging ggplot stats

ggdebug

ggdebug is a package for debugging ggplot2 stats.

I created this package to help in developing new ggplot2 stats, and trying to decipher and understand how existing ones work.

What’s in the box:

  • create_stat_with_caching()
    • Capture arguments and the return values internal to Stat methods
  • inject_data_into_stat_function()
    • Inject arguments and return values directly into Stat methods

This package wouldn’t be possible without:

Installation

You can install the development version from GitHub with:

# install.packages("remotes")
remotes::install_github("coolbutuseless/ggdebug")

Debug geom_histogram + StatBin

The following example shows how ggdebug can be used to capture the data during creation of geom_histogram().

geom_histogram() uses a default Stat of StatBin. Instead we create a debugging version of this Stat and use it explicitly.

The captured data is available by calling ggdebug::get_cache_data().

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create a new stat which is identical to StatBin, but captures the
# data for all the methods.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
StatBinDebug <- ggdebug::create_stat_with_caching(
  StatBin
)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create the plot using the new Stat
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ggplot(mtcars) +
  geom_histogram(aes(mpg, fill = as.factor(cyl)), stat = StatBinDebug, bins = 10) +
  theme_bw() +
  facet_wrap(~cyl)


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get the data we captured from this process
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
cdata <- ggdebug::get_data_cache()

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Interrogate the captured data. 
# Each method has its only entry in the data cache, and 
# within each entry there are both the `args` and the `return` value
# for this method
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
names(cdata)
[1] "parameters"    "compute_group" "compute_panel" "compute_layer"
[5] "finish_layer" 

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# arguments and the return value for the 'parameters()' method
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
cdata$parameters
$args
$args$extra
[1] FALSE


$return
 [1] "binwidth" "bins"     "center"   "boundary" "closed"   "pad"     
 [7] "breaks"   "origin"   "right"    "drop"     "width"   

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# The 'data' argument passed in to the 'compute_layer()' method
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
head(cdata$compute_layer$args$data)
     x fill PANEL group
1 21.0    6     2     2
2 21.0    6     2     2
3 22.8    4     1     1
4 21.4    6     2     2
5 18.7    8     3     3
6 18.1    6     2     2

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# The return value from the 'finish_layer()' method
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
head(cdata$finish_layer$return)
     fill y count        x      xmin     xmax   density    ncount  ndensity
1 #F8766D 0     0 10.44444  9.138889 11.75000 0.0000000 0.0000000 0.0000000
2 #F8766D 0     0 13.05556 11.750000 14.36111 0.0000000 0.0000000 0.0000000
3 #F8766D 0     0 15.66667 14.361111 16.97222 0.0000000 0.0000000 0.0000000
4 #F8766D 0     0 18.27778 16.972222 19.58333 0.0000000 0.0000000 0.0000000
5 #F8766D 2     2 20.88889 19.583333 22.19444 0.0696325 0.6666667 0.6666667
6 #F8766D 3     3 23.50000 22.194444 24.80556 0.1044487 1.0000000 1.0000000
  PANEL group ymin ymax colour size linetype alpha
1     1     1    0    0     NA  0.5        1    NA
2     1     1    0    0     NA  0.5        1    NA
3     1     1    0    0     NA  0.5        1    NA
4     1     1    0    0     NA  0.5        1    NA
5     1     1    0    2     NA  0.5        1    NA
6     1     1    0    3     NA  0.5        1    NA

Inject data into geom_histogram + StatBin

ggdebug also allows you to inject arguments and override return values within methods inside a Stat.

In this example, we take the above histogram, and at the last stage (finish_layer) we inject a modified return value to override the real one.

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# From the above debugging example, grab the return value from `finish_layer`
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
finish_layer_return_value <- cdata$finish_layer$return

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Change the visual properties for the bar with the greatest height
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
finish_layer_return_value <- finish_layer_return_value %>% 
  group_by(group) %>%
  mutate(
    fill   = ifelse(count == max(count), '#000000', fill  ),
    colour = ifelse(count == max(count), '#ff0000', colour),
    size   = ifelse(count == max(count), 1        , size  )
  ) %>%
  ungroup()

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create a modified version of StatBin which only returns our version of the 
# dataset from `finishlayer`
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
StatBinInject <- inject_data_into_stat_function(
  StatBin, 'finish_layer',
  return_value = finish_layer_return_value
)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Recreate the plot using the injected data as the return from `finish_layer`
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ggplot(mtcars) +
  geom_histogram(aes(mpg, fill = as.factor(cyl)), stat = StatBinInject, bins = 5) +
  theme_bw() +
  facet_wrap(~cyl) +
  labs(title = "Use injected data to highlight the maximum bar within each group")

Inject tetris pieces into geom_histogram + StatBin

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create 3 tetris pieces
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
tetris <- tribble(
  ~fill    , ~xmin, ~xmax, ~ymin, ~ymax,
  '#ff0000', 10, 19, 6, 7,
  '#ff0000', 13, 16, 6, 8,
  '#00ff00', 25, 28, 6, 8,
  '#00ff00', 28, 31, 7, 9,
  '#0000ff', 25, 34, 6, 7,
  '#0000ff', 28, 31, 6, 8
)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Assign each piece to a different panel
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
tetris <- tetris %>% mutate(
  PANEL    = factor(c(1, 1, 2, 2, 3, 3), levels = 1:3), 
  group    = 1,
  linetype = 1,
  x        = xmax,
  y        = ymax
)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Add the tetris data to the original data returned by `finish_layer`
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
finish_layer_return_value <- bind_rows(
  tetris, 
  cdata$finish_layer$return
)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create a StatBin that only returns this modified data from `finish_layer`
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
StatBinInject <- inject_data_into_stat_function(
  StatBin, 'finish_layer',
  return_value = finish_layer_return_value
)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create the plot to generate tetris pieces injected into the data
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ggplot(mtcars) +
  geom_histogram(aes(mpg, fill = as.factor(cyl)), stat = StatBinInject, bins = 5) +
  theme_bw() +
  facet_wrap(~cyl) +
  labs(title = "Use injected data to inject tetris pieces")