default interval -> median_qi, closes #46

mjskay · Jul 1, 2018 · 36b88d5 · 36b88d5
1 parent 0f92426
commit 36b88d5
Show file tree

Hide file tree

Showing 45 changed files with 210 additions and 206 deletions.
diff --git a/R/compare_levels.R b/R/compare_levels.R
@@ -107,7 +107,7 @@ comparison_types = within(list(), {
 #'   spread_samples(b[i,j]) %>%
 #'   filter(i %in% 1:3, j == 1) %>%
 #'   compare_levels(b, by = i) %>%
-#'   mean_qi()
+#'   median_qi()
 #'
 #' # Or let's plot all comparisons against the first level (control):
 #' RankCorr %>%

diff --git a/R/gather_emmeans_samples.R b/R/gather_emmeans_samples.R
@@ -67,14 +67,14 @@ gather_lsmeans_samples = function(...) {
 #' m %>%
 #'   emmeans(~ condition) %>%
 #'   gather_emmeans_samples() %>%
-#'   mean_qi()
+#'   median_qi()
 #'
 #' # or we could get pairwise differences:
 #' m %>%
 #'   emmeans( ~ condition) %>%
 #'   contrast(method = "pairwise") %>%
 #'   gather_emmeans_samples() %>%
-#'   mean_qi()
+#'   median_qi()
 #'
 #' # see the documentation of emmeans() for more examples of types of
 #' # contrasts supported by that packge.

diff --git a/R/gather_terms.R b/R/gather_terms.R
@@ -73,13 +73,13 @@ globalVariables(c("term", "estimate"))
 #' RankCorr %>%
 #'   spread_samples(b[i,v], tau[i]) %>%
 #'   gather_terms() %>%
-#'   mean_qi()
+#'   median_qi()
 #'
 #' # the first three lines below are roughly equivalent to ggmcmc::ggs(RankCorr)
 #' RankCorr %>%
 #'   as_sample_tibble() %>%
 #'   gather_terms() %>%
-#'   mean_qi()
+#'   median_qi()
 #'
 #' }
 #' @importFrom stringi stri_detect_regex

diff --git a/R/geom_eye.R b/R/geom_eye.R
@@ -20,8 +20,8 @@
 #' a violin plot, point estimate, and credible interval.
 #'
 #' The vertical form, \code{geom_eye}, is equivalent to  \code{geom_violin() + stat_pointinterval()}
-#' with some reasonable defaults, including color choices and the use of mean with 95\%
-#' and 6\% quantile intervals.
+#' with some reasonable defaults, including color choices and the use of median with 95\%
+#' and 66\% quantile intervals.
 #'
 #' The horizontal form, \code{geom_eyeh()}, is equivalent to \code{geom_violinh() + stat_pointintervalh()}.
 #'
@@ -107,7 +107,7 @@ geom_eye = function(
   ...,
 
   #stat_summaryh properties
-  point_interval = mean_qi,
+  point_interval = median_qi,
   fun.data = NULL,
   fun.args = list(),
   .prob = c(.66, .95),

diff --git a/R/geom_eyeh.R b/R/geom_eyeh.R
@@ -19,7 +19,7 @@ geom_eyeh = function(
   ...,
 
   #stat_summaryh properties
-  point_interval = mean_qi,
+  point_interval = median_qi,
   fun.data = NULL,
   fun.args = list(),
   .prob = c(.66, .95),

diff --git a/R/geom_halfeyeh.R b/R/geom_halfeyeh.R
@@ -18,7 +18,7 @@
 #' distributions in Bayesian inference. This instantiation is a combination of
 #' a density plot, point estimate, and credible interval. \code{geom_halfeyeh()} is
 #' equivalent to \code{geom_density_ridges() + stat_summaryh()} with some reasonable
-#' defaults, including color choices and the use of mean with 95\% quantile
+#' defaults, including color choices and the use of median with 95\% and 66\% quantile
 #' intervals.
 #'
 #' @param mapping The aesthetic mapping, usually constructed with
@@ -98,7 +98,7 @@ geom_halfeyeh = function(
   ...,
 
   #stat_summaryh properties
-  point_interval = mean_qi,
+  point_interval = median_qi,
   fun.data = NULL,
   fun.args = list(),
   .prob = c(.66, .95),

diff --git a/R/geom_interval.R b/R/geom_interval.R
@@ -18,7 +18,8 @@ globalVariables(c("conf.low", "conf.high", ".prob"))
 #' a modified version of \code{\link{geom_linerangeh}}. These geoms set some default aesthetics equal
 #' to the \code{conf.low}, \code{conf.high}, and \code{.prob} columns generated by the \code{point_interval} family
 #' of functions, making them often more convenient than vanilla \code{\link{geom_linerange}} or
-#' \code{\link{geom_linerangeh}} when used with functions like \code{\link{mean_qi}}, \code{\link{median_qi}}, etc.
+#' \code{\link{geom_linerangeh}} when used with functions like \code{\link{median_qi}}, \code{\link{mean_qi}},
+#' \code{\link{mode_hdi}}, etc.
 #'
 #' Specifically, \code{geom_interval} acts as if its default aesthetics are
 #' \code{aes(ymin = conf.low, ymax = conf.high, color = fct_rev(ordered(.prob)))}. \code{geom_intervalh} acts as if
@@ -52,14 +53,14 @@ globalVariables(c("conf.low", "conf.high", ".prob"))
 #'
 #' RankCorr %>%
 #'   spread_samples(u_tau[i]) %>%
-#'   mean_qi(.prob = c(.5, .8, .95, .99)) %>%
+#'   median_qi(.prob = c(.5, .8, .95, .99)) %>%
 #'   ggplot(aes(y = i, x = u_tau)) +
 #'   geom_intervalh() +
 #'   scale_color_brewer()
 #'
 #' RankCorr %>%
 #'   spread_samples(u_tau[i]) %>%
-#'   mean_qi(.prob = c(.5, .8, .95, .99)) %>%
+#'   median_qi(.prob = c(.5, .8, .95, .99)) %>%
 #'   ggplot(aes(x = i, y = u_tau)) +
 #'   geom_interval() +
 #'   scale_color_brewer()

diff --git a/R/geom_pointinterval.R b/R/geom_pointinterval.R
@@ -18,7 +18,8 @@ globalVariables(c("conf.low", "conf.high", ".prob"))
 #' a modified version of \code{\link{geom_pointrangeh}}. These geoms set some default aesthetics equal
 #' to the \code{conf.low}, \code{conf.high}, and \code{.prob} columns generated by the \code{point_interval} family
 #' of functions, making them often more convenient than vanilla \code{\link{geom_pointrange}} or
-#' \code{\link{geom_pointrangeh}} when used with functions like \code{\link{mean_qi}}, \code{\link{median_qi}}, etc.
+#' \code{\link{geom_pointrangeh}} when used with functions like \code{\link{median_qi}}, \code{\link{mean_qi}},
+#' \code{\link{mode_hdi}}, etc.
 #'
 #' Specifically, \code{geom_pointinterval} acts as if its default aesthetics are
 #' \code{aes(ymin = conf.low, ymax = conf.high, size = -.prob)}. \code{geom_pointintervalh} acts as if its default
@@ -67,13 +68,13 @@ globalVariables(c("conf.low", "conf.high", ".prob"))
 #'
 #' RankCorr %>%
 #'   spread_samples(u_tau[i]) %>%
-#'   mean_qi(.prob = c(.8, .95)) %>%
+#'   median_qi(.prob = c(.8, .95)) %>%
 #'   ggplot(aes(y = i, x = u_tau)) +
 #'   geom_pointintervalh()
 #'
 #' RankCorr %>%
 #'   spread_samples(u_tau[i]) %>%
-#'   mean_qi(.prob = c(.8, .95)) %>%
+#'   median_qi(.prob = c(.8, .95)) %>%
 #'   ggplot(aes(x = i, y = u_tau)) +
 #'   geom_pointinterval()
 #'

diff --git a/R/point_interval.R b/R/point_interval.R
@@ -37,11 +37,11 @@ globalVariables(c("y", "ymin", "ymax"))
 #' \code{y} (the point estimate), \code{ymin} (the lower end of the interval),
 #' \code{ymax} (the upper end of the interval), and \code{.prob}, the probability
 #' corresponding to the interval. This behavior allows \code{point_interval}
-#' and its derived functions (like \code{mean_qi}, \code{median_qi}, etc)
+#' and its derived functions (like \code{median_qi}, \code{mean_qi}, \code{mode_hdi}, etc)
 #' to be easily used to plot intervals in ggplot using methods like
 #' \code{\link{geom_eye}}, \code{\link{geom_eyeh}}, or \code{\link{stat_summary}}.
 #'
-#' The functions ending in \code{h} (e.g., \code{point_intervalh}, \code{mean_qih})
+#' The functions ending in \code{h} (e.g., \code{point_intervalh}, \code{median_qih})
 #' behave identically to the function without the h, except that when passed a vector,
 #' they return a data frame with \code{x}/\code{xmin}/\code{xmax} instead of
 #' \code{y}/\code{ymin}/\code{ymax}. This allows them to be used as values of the
@@ -51,8 +51,8 @@ globalVariables(c("y", "ymin", "ymax"))
 #' \code{\link{stat_pointintervalh}}, \code{\link{geom_halfeyeh}}, etc), as
 #' these automatically adjust the function output to match their required aesthetics.
 #'
-#' \code{mean_qi}, \code{mode_qi}, etc are short forms for
-#' \code{point_interval(..., .point = mean, .interval = qi)}, etc.
+#' \code{median_qi}, \code{mode_hdi}, etc are short forms for
+#' \code{point_interval(..., .point = median, .interval = qi)}, etc.
 #'
 #' \code{qi} yields the quantile interval (also known as the percentile interval or
 #' equi-tailed interval) as a 1x2 matrix.
@@ -90,7 +90,7 @@ globalVariables(c("y", "ymin", "ymax"))
 #' set.seed(123)
 #'
 #' rnorm(1000) %>%
-#'   mean_qi()
+#'   median_qi()
 #'
 #' data.frame(x = rnorm(1000)) %>%
 #'   median_qi(x, .prob = c(.50, .80, .95))
@@ -110,7 +110,7 @@ globalVariables(c("y", "ymin", "ymax"))
 #'     group = "b")
 #'   ) %>%
 #'   group_by(group) %>%
-#'   mean_qi(.prob = c(.50, .80, .95))
+#'   median_qi(.prob = c(.50, .80, .95))
 #'
 #' multimodal_samples = data.frame(
 #'     x = c(rnorm(5000, 0, 1), rnorm(2500, 4, 1))
@@ -128,13 +128,13 @@ globalVariables(c("y", "ymin", "ymax"))
 #' @importFrom stringi stri_startswith_fixed
 #' @importFrom rlang set_names quos quos_auto_name eval_tidy as_quosure
 #' @export
-point_interval = function(.data, ..., .prob=.95, .point = mean, .interval = qi, .broom = TRUE) {
+point_interval = function(.data, ..., .prob=.95, .point = median, .interval = qi, .broom = TRUE) {
   UseMethod("point_interval")
 }
 
 #' @rdname point_interval
 #' @export
-point_interval.default = function(.data, ..., .prob=.95, .point = mean, .interval = qi, .broom = TRUE) {
+point_interval.default = function(.data, ..., .prob=.95, .point = median, .interval = qi, .broom = TRUE) {
   data = .data    # to avoid conflicts with tidy eval's `.data` pronoun
   col_exprs = quos(..., .named = TRUE)
 
@@ -218,7 +218,7 @@ point_interval.default = function(.data, ..., .prob=.95, .point = mean, .interva
 #' @rdname point_interval
 #' @importFrom dplyr rename
 #' @export
-point_interval.numeric = function(.data, ..., .prob = .95, .point = mean, .interval = qi, .broom = FALSE) {
+point_interval.numeric = function(.data, ..., .prob = .95, .point = median, .interval = qi, .broom = FALSE) {
   data = .data    # to avoid conflicts with tidy eval's `.data` pronoun
 
   result = map_df(.prob, function(p) {

diff --git a/R/recover_types.R b/R/recover_types.R
@@ -118,7 +118,7 @@ apply_prototypes = function(...) {
 #' # will be an integer:
 #' m %>%
 #'   spread_samples(condition_mean[condition]) %>%
-#'   mean_qi()
+#'   median_qi()
 #'
 #' # If we apply recover_types() first, subsequent calls to other tidybayes functions will
 #' # automatically back-convert factors so that they are labeled with their original levels
@@ -128,7 +128,7 @@ apply_prototypes = function(...) {
 #' # now the `condition` column with be a factor with levels "A", "B", "C", ...
 #' m %>%
 #'   spread_samples(condition_mean[condition]) %>%
-#'   mean_qi()
+#'   median_qi()
 #'
 #' }
 #' @export

diff --git a/R/spread_samples.R b/R/spread_samples.R
@@ -220,7 +220,7 @@ tidy_samples = function(...) {
 #'
 #' RankCorr %>%
 #'   gather_samples(tau[i], typical_r) %>%
-#'   mean_qi()
+#'   median_qi()
 #'
 #' @aliases extract_samples tidy_samples
 #' @importFrom lazyeval lazy_dots

diff --git a/R/stat_interval.R b/R/stat_interval.R
@@ -71,7 +71,7 @@ globalVariables(c("...prob.."))
 stat_interval <- function(mapping = NULL, data = NULL,
   geom = "interval", position = "identity",
   ...,
-  point_interval = mean_qi,
+  point_interval = median_qi,
   fun.data = NULL,
   .prob = c(.50, .80, .95),
   fun.args = list(),

diff --git a/R/stat_intervalh.R b/R/stat_intervalh.R
@@ -9,7 +9,7 @@
 stat_intervalh <- function(mapping = NULL, data = NULL,
   geom = "intervalh", position = "identity",
   ...,
-  point_interval = mean_qi,
+  point_interval = median_qi,
   fun.data = NULL,
   .prob = c(.50, .80, .95),
   fun.args = list(),

diff --git a/R/stat_pointinterval.R b/R/stat_pointinterval.R
@@ -69,7 +69,7 @@ globalVariables(c("...prob.."))
 stat_pointinterval <- function(mapping = NULL, data = NULL,
   geom = "pointinterval", position = "identity",
   ...,
-  point_interval = mean_qi,
+  point_interval = median_qi,
   fun.data = NULL,
   .prob = c(.66, .95),
   fun.args = list(),
@@ -117,7 +117,7 @@ stat_pointinterval <- function(mapping = NULL, data = NULL,
 
 #' @importFrom plyr defaults
 StatPointinterval <- ggproto("StatPointinterval", StatSummary,
-  compute_panel = function(data, scales, fun.data = mean_qi, .prob = c(.66, .95),
+  compute_panel = function(data, scales, fun.data = median_qi, .prob = c(.66, .95),
     fun.args = list(), na.rm = FALSE
   ) {
 

diff --git a/R/stat_pointintervalh.R b/R/stat_pointintervalh.R
@@ -9,7 +9,7 @@
 stat_pointintervalh <- function(mapping = NULL, data = NULL,
   geom = "pointintervalh", position = "identity",
   ...,
-  point_interval = mean_qi,
+  point_interval = median_qi,
   fun.data = NULL,
   .prob = c(.66, .95),
   fun.args = list(),
@@ -57,7 +57,7 @@ stat_pointintervalh <- function(mapping = NULL, data = NULL,
 
 #' @importFrom plyr defaults
 StatPointintervalh <- ggproto("StatPointintervalh", StatSummary,
-  compute_panel = function(data, scales, fun.data = mean_qih, .prob = c(.66, .95),
+  compute_panel = function(data, scales, fun.data = median_qih, .prob = c(.66, .95),
     fun.args = list(), na.rm = FALSE
   ) {
 

diff --git a/README.Rmd b/README.Rmd
@@ -58,7 +58,7 @@ and visualization tasks common to many models:
   it straightforward to generate arbitrary fit lines from a model.
 
 * __Summarizing posterior distributions__ from models. The `point_interval`
-  family of functions (`mean_qi`, `median_qi`, `mode_hdi`, etc) are methods 
+  family of functions (`median_qi`, `mean_qi`, `mode_hdi`, etc) are methods 
   for generating estimates and intervals that are designed with tidy workflows 
   in mind. They can generate estimates plus an arbitrary number of probability 
   intervals *from* tidy data frames of samples, they *return* tidy data frames,
@@ -239,7 +239,7 @@ The condition numbers are automatically turned back into text ("A", "B", "C", ..
 
 ### Plotting posteriors as eye plots: `geom_eye` / `geom_eyeh`
 
-Automatic splitting of indices into columns makes it easy to plot the condition means here. We will employ the `tidybayes::geom_eyeh` geom (horizontal version of `tidybayes::geom_eye`), which combines a violin plot of the posterior density, mean, and 95% quantile interval to give an "eye plot" of the posterior. The point and interval types are customizable using the `point_interval` family of functions. A "half-eye" plot (non-mirrored density) is also available as `tidybayes::geom_halfeyeh`.
+Automatic splitting of indices into columns makes it easy to plot the condition means here. We will employ the `tidybayes::geom_eyeh` geom (horizontal version of `tidybayes::geom_eye`), which combines a violin plot of the posterior density, median, 66% and 95% quantile interval to give an "eye plot" of the posterior. The point and interval types are customizable using the `point_interval` family of functions. A "half-eye" plot (non-mirrored density) is also available as `tidybayes::geom_halfeyeh`.
 
 ```{r}
 m %>%
@@ -275,7 +275,7 @@ The idea is to get away from thinking about the posterior as indicating one cano
 
 ### Model comparison via compatibility with `broom`
 
-The output of the `tidybayes::mean_qi` function (and other `point_interval` functions) is compatible with `broom::tidy`, so we can compare parameter estimates easily to models supported by `broom`.
+The output of the `tidybayes::median_qi` function (and other `point_interval` functions) is compatible with `broom::tidy`, so we can compare parameter estimates easily to models supported by `broom`.
 
 For example, let's compare to ordinary least squares (OLS) regression:
 
@@ -288,12 +288,12 @@ linear_estimates =
 linear_estimates
 ```
 
-The output from `mean_qi` when given a single parameter uses `conf.low` and `conf.high` for interval names so that it lines up with `tidy`:
+The output from `median_qi` when given a single parameter uses `conf.low` and `conf.high` for interval names so that it lines up with `tidy`:
 
 ```{r}
 bayes_estimates = m %>%
   spread_samples(condition_mean[condition]) %>%
-  mean_qi(estimate = condition_mean) %>%
+  median_qi(estimate = condition_mean) %>%
   mutate(model = "Bayes")
 bayes_estimates
 ```
@@ -318,7 +318,7 @@ bind_rows(linear_estimates, bayes_estimates) %>%
 
 ### Posterior prediction and complex custom plots
 
-The tidy data format returned by `spread_samples` also facilitates additional computation on parameters followed by the construction of more complex custom plots. For example, we can generate posterior predictions easily, and use the `.prob` argument (passed intervally to `mean_qi`) to generate any number of intervals from the posterior predictions, then plot them alongside parameter estimates and the data:
+The tidy data format returned by `spread_samples` also facilitates additional computation on parameters followed by the construction of more complex custom plots. For example, we can generate posterior predictions easily, and use the `.prob` argument (passed internally to `median_qi`) to generate any number of intervals from the posterior predictions, then plot them alongside parameter estimates and the data:
 
 ```{r}
 m %>%
@@ -330,14 +330,14 @@ m %>%
   stat_intervalh(aes(x = pred), .prob = c(.5, .8, .95)) +
   scale_color_brewer() +
   
-  # mean and quantile intervals of condition mean
+  # median and quantile intervals of condition mean
   stat_pointintervalh(aes(x = condition_mean), .prob = c(.66, .95), position = position_nudge(y = -0.2)) +
   
   # data
   geom_point(aes(x = response), data = ABC)
 ```
 
-This plot shows 66% and 95% quantile credible intervals of posterior mean for each condition (point + black line); 95%, 80%, and 50% posterior predictive intervals (blue); and the data.
+This plot shows 66% and 95% quantile credible intervals of posterior median for each condition (point + black line); 95%, 80%, and 50% posterior predictive intervals (blue); and the data.
 
 
 ### Fit curves

diff --git a/man/compare_levels.Rd b/man/compare_levels.Rd
diff --git a/man/gather_emmeans_samples.Rd b/man/gather_emmeans_samples.Rd