From f6a6123a051b218ac6f9f09c7297502c7925c7f7 Mon Sep 17 00:00:00 2001
From: DavisVaughan <davis@rstudio.com>
Date: Tue, 10 May 2022 11:18:31 -0400
Subject: [PATCH 1/9] Import unexported `vec_order_base/radix()`

---
 R/zzz.r | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/R/zzz.r b/R/zzz.r
index d1e85b385b..4780deb58c 100644
--- a/R/zzz.r
+++ b/R/zzz.r
@@ -1,4 +1,6 @@
 .onLoad <- function(libname, pkgname) {
+  ns_dplyr <- ns_env(pkgname)
+
   op <- options()
   op.dplyr <- list(
     dplyr.show_progress = TRUE
@@ -6,7 +8,7 @@
   toset <- !(names(op.dplyr) %in% names(op))
   if (any(toset)) options(op.dplyr[toset])
 
-  .Call(dplyr_init_library, ns_env("dplyr"), ns_env("vctrs"), ns_env("rlang"))
+  .Call(dplyr_init_library, ns_dplyr, ns_env("vctrs"), ns_env("rlang"))
 
   has_dbplyr <- is_installed("dbplyr")
   if (!has_dbplyr || !exists("count.tbl_sql", ns_env("dbplyr"))) {
@@ -14,6 +16,16 @@
     s3_register("dplyr::tally", "tbl_sql")
   }
 
+  # TODO: For `arrange()`, `group_by()`, and `with_order()` until vctrs changes
+  # `vec_order()` to the new ordering algorithm and officially exports
+  # `vec_order_base()`, at which point we should use `vec_order()` and
+  # `vec_order_base()` directly and remove this.
+  env_bind(
+    .env = ns_dplyr,
+    vec_order_base = import_vctrs("vec_order_base"),
+    vec_order_radix = import_vctrs("vec_order_radix")
+  )
+
   run_on_load()
 
   invisible()
@@ -34,3 +46,16 @@
 .onDetach <- function(libpath) {
   setHook(packageEvent("plyr", "attach"), NULL, "replace")
 }
+
+import_vctrs <- function(name) {
+  import_from(name, "vctrs")
+}
+import_from <- function(name, package) {
+  ns <- getNamespace(package)
+
+  if (!exists(name, mode = "function", envir = ns, inherits = FALSE)) {
+    abort(sprintf("No such '%s' function: `%s()`.", package, name))
+  }
+
+  get(name, mode = "function", envir = ns, inherits = FALSE)
+}

From 7693d0f972a315a7e80eacea1b6d5ef5538b96da Mon Sep 17 00:00:00 2001
From: DavisVaughan <davis@rstudio.com>
Date: Tue, 10 May 2022 11:44:26 -0400
Subject: [PATCH 2/9] Use `vec_order_base()` in `with_order()` and `group_by()`
 internals

This allows us to isolate changes in this PR to just `arrange()`, keeping other usage of `vec_order()` the same for now.
---
 R/grouped-df.r | 2 +-
 R/order-by.R   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/grouped-df.r b/R/grouped-df.r
index 0aaa53c465..e80c9233ce 100644
--- a/R/grouped-df.r
+++ b/R/grouped-df.r
@@ -298,7 +298,7 @@ vec_split_id_order <- function(x) {
   split_id <- vec_group_loc(x)
   split_id$loc <- new_list_of(split_id$loc, ptype = integer())
 
-  vec_slice(split_id, vec_order(split_id$key))
+  vec_slice(split_id, vec_order_base(split_id$key))
 }
 
 group_intersect <- function(x, new) {
diff --git a/R/order-by.R b/R/order-by.R
index 59fd8b9676..7902e000a1 100644
--- a/R/order-by.R
+++ b/R/order-by.R
@@ -61,7 +61,7 @@ order_by <- function(order_by, call) {
 #' @keywords internal
 #' @export
 with_order <- function(order_by, fun, x, ...) {
-  ord <- vec_order(order_by)
+  ord <- vec_order_base(order_by)
   undo <- vec_match(seq_along(order_by), ord)
 
   out <- fun(vec_slice(x, ord), ...)

From 580774316dc31edbab00ef4e87a8271d3f8d7ed5 Mon Sep 17 00:00:00 2001
From: DavisVaughan <davis@rstudio.com>
Date: Tue, 10 May 2022 11:51:10 -0400
Subject: [PATCH 3/9] Add a `.locale` argument to `arrange()`, and implement
 `dplyr_locale()`

---
 DESCRIPTION                      |   1 +
 NAMESPACE                        |   1 +
 R/arrange.R                      |  95 ++++++++++++++++++++---------
 R/locale.R                       | 101 +++++++++++++++++++++++++++++++
 man/arrange.Rd                   |  24 ++++++--
 man/dplyr_locale.Rd              |  66 ++++++++++++++++++++
 tests/testthat/_snaps/arrange.md |  32 ++++++++++
 tests/testthat/_snaps/locale.md  |  20 ++++++
 tests/testthat/test-arrange.r    |  59 ++++++++++++++++--
 tests/testthat/test-locale.R     |  20 ++++++
 10 files changed, 380 insertions(+), 39 deletions(-)
 create mode 100644 R/locale.R
 create mode 100644 man/dplyr_locale.Rd
 create mode 100644 tests/testthat/_snaps/locale.md
 create mode 100644 tests/testthat/test-locale.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 3a6795b00d..b2ca1a6c2b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -60,6 +60,7 @@ Suggests:
     RMySQL,
     RPostgreSQL,
     RSQLite,
+    stringi (>= 1.7.6),
     testthat (>= 3.1.1),
     tidyr, 
     withr
diff --git a/NAMESPACE b/NAMESPACE
index d1d4d398a2..489e47fb7f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -270,6 +270,7 @@ export(distinct_prepare)
 export(do)
 export(do_)
 export(dplyr_col_modify)
+export(dplyr_locale)
 export(dplyr_reconstruct)
 export(dplyr_row_slice)
 export(ends_with)
diff --git a/R/arrange.R b/R/arrange.R
index 55055a3399..efc3130084 100644
--- a/R/arrange.R
+++ b/R/arrange.R
@@ -10,10 +10,6 @@
 #' once per data frame, not once per group.
 #'
 #' @details
-#' ## Locales
-#' The sort order for character vectors will depend on the collating sequence
-#' of the locale in use: see [locales()].
-#'
 #' ## Missing values
 #' Unlike base sorting with `sort()`, `NA` are:
 #' * always sorted to the end for local data, even when wrapped with `desc()`.
@@ -42,6 +38,21 @@
 #'   variables. Use [desc()] to sort a variable in descending order.
 #' @param .by_group If `TRUE`, will sort first by grouping variable. Applies to
 #'   grouped data frames only.
+#' @param .locale The locale to sort character vectors in.
+#'
+#'   - Defaults to [dplyr_locale()], which uses the American English locale
+#'     if the stringi package is installed and the global default locale
+#'     has not been altered. See the help page for [dplyr_locale()] for the
+#'     exact details.
+#'
+#'   - If a single string is supplied, then this will be used as the sorting
+#'     locale. For example, `"fr"` will sort with the French locale. This
+#'     requires the stringi package. Use [stringi::stri_locale_list()] to
+#'     generate a list of possible locale identifiers.
+#'
+#'   - If `"C"` is supplied, then character vectors will be sorted in the C
+#'     locale. This does not require stringi and is often much faster than
+#'     supplying a locale identifier.
 #' @family single table verbs
 #' @examples
 #' arrange(mtcars, cyl, disp)
@@ -68,25 +79,34 @@ arrange <- function(.data, ..., .by_group = FALSE) {
   UseMethod("arrange")
 }
 
+#' @rdname arrange
 #' @export
-arrange.data.frame <- function(.data, ..., .by_group = FALSE) {
+arrange.data.frame <- function(.data,
+                               ...,
+                               .by_group = FALSE,
+                               .locale = dplyr_locale()) {
   dots <- enquos(...)
 
   if (.by_group) {
     dots <- c(quos(!!!groups(.data)), dots)
   }
 
-  loc <- arrange_rows(.data, dots)
+  chr_proxy_collate <- locale_to_chr_proxy_collate(.locale)
+
+  loc <- arrange_rows(.data, dots, chr_proxy_collate)
   dplyr_row_slice(.data, loc)
 }
 
 # Helpers -----------------------------------------------------------------
 
-arrange_rows <- function(.data, dots, error_call = caller_env()) {
+arrange_rows <- function(data,
+                         dots,
+                         chr_proxy_collate,
+                         error_call = caller_env()) {
   error_call <- dplyr_error_call(error_call)
 
   if (length(dots) == 0L) {
-    out <- seq_len(nrow(.data))
+    out <- seq_len(nrow(data))
     return(out)
   }
 
@@ -94,6 +114,8 @@ arrange_rows <- function(.data, dots, error_call = caller_env()) {
     if(quo_is_call(quosure, "desc")) "desc" else "asc"
   })
 
+  na_values <- if_else(directions == "desc", "smallest", "largest")
+
   quosures <- map(dots, function(quosure) {
     if (quo_is_call(quosure, "desc", ns = c("", "dplyr"))) {
       expr <- quo_get_expr(quosure)
@@ -117,7 +139,7 @@ arrange_rows <- function(.data, dots, error_call = caller_env()) {
   #       revisit when we have something like mutate_one() to
   #       evaluate one quosure in the data mask
   data <- withCallingHandlers({
-    transmute(new_data_frame(.data), !!!quosures)
+    transmute(new_data_frame(data), !!!quosures)
   }, error = function(cnd) {
 
     if (inherits(cnd, "dplyr:::mutate_error")) {
@@ -144,24 +166,43 @@ arrange_rows <- function(.data, dots, error_call = caller_env()) {
 
   })
 
-  # we can't just use vec_compare_proxy(data) because we need to apply
-  # direction for each column, so we get a list of proxies instead
-  # and then mimic vctrs:::order_proxy
-  #
-  # should really be map2(quosures, directions, ...)
-  proxies <- map2(data, directions, function(column, direction) {
-    proxy <- vec_proxy_order(column)
-    desc <- identical(direction, "desc")
-    if (is.data.frame(proxy)) {
-      proxy <- order(vec_order(proxy,
-        direction = direction,
-        na_value = if(desc) "smallest" else "largest"
-      ))
-    } else if(desc) {
-      proxy <- desc(proxy)
+  vec_order_radix(
+    x = data,
+    direction = directions,
+    na_value = na_values,
+    chr_proxy_collate = chr_proxy_collate
+  )
+}
+
+locale_to_chr_proxy_collate <- function(locale,
+                                        ...,
+                                        has_stringi = has_minimum_stringi(),
+                                        error_call = caller_env()) {
+  check_dots_empty0(...)
+
+  if (identical(locale, "C")) {
+    return(NULL)
+  }
+
+  if (is_character(locale)) {
+    if (!is_string(locale)) {
+      abort("If `.locale` is a character vector, it must be a single string.", call = error_call)
+    }
+    if (!has_stringi) {
+      abort("stringi >=1.5.3 is required to arrange in a different locale.", call = error_call)
+    }
+    if (!locale %in% stringi::stri_locale_list()) {
+      abort("`.locale` must be one of the locales within `stringi::stri_locale_list()`.", call = error_call)
     }
-    proxy
-  })
 
-  exec("order", !!!unname(proxies), decreasing = FALSE, na.last = TRUE)
+    return(sort_key_generator(locale))
+  }
+
+  abort("`.locale` must be a string.", call = error_call)
+}
+
+sort_key_generator <- function(locale) {
+  function(x) {
+    stringi::stri_sort_key(x, locale = locale)
+  }
 }
diff --git a/R/locale.R b/R/locale.R
new file mode 100644
index 0000000000..1d64450c9f
--- /dev/null
+++ b/R/locale.R
@@ -0,0 +1,101 @@
+#' Locale used by dplyr
+#'
+#' @description
+#' `dplyr_locale()` returns a single string representing the default locale used
+#' by dplyr when ordering character vectors. It is used as the default value of
+#' `.locale` in [arrange()].
+#'
+#' ## Default locale
+#'
+#' - If stringi >=1.5.3 is installed, the default locale is set to American
+#'   English, represented by the locale identifier `"en"`.
+#'
+#' - If stringi is not installed or is older than 1.5.3, the default locale
+#'   falls back to the C locale, represented by `"C"`. When this occurs, a
+#'   warning will be thrown encouraging you to either install stringi or
+#'   replace usage of `dplyr_locale()` with `"C"` to explicitly force the C
+#'   locale.
+#'
+#' ## Global override
+#'
+#' To override the above default behavior, you can set the global option,
+#' `dplyr.locale`, to either `"C"` or a stringi locale identifier from
+#' [stringi::stri_locale_list()] to globally alter the default locale.
+#' Setting this option to anything other than `"C"` requires stringi >=1.5.3.
+#'
+#' We generally recommend that you set the `.locale` argument of [arrange()]
+#' explicitly rather than overriding the global locale, if possible.
+#'
+#' Another alternative is to only change the global locale within a limited
+#' scope through the use of [rlang::local_options()] or [rlang::with_options()].
+#' This can be useful when a package that you don't control calls `arrange()`
+#' internally.
+#' @export
+#' @examplesIf dplyr:::has_minimum_stringi()
+#' # Default locale is American English
+#' dplyr_locale()
+#'
+#' # This Danish letter is typically sorted after `z`
+#' df <- tibble(x = x <- c("o", "p", "\u00F8", "z"))
+#' df
+#'
+#' # The American English locale sorts it right after `o`
+#' arrange(df, x)
+#'
+#' # Explicitly override `.locale` to `"da"` for Danish ordering
+#' arrange(df, x, .locale = "da")
+#'
+#' # Or temporarily override the `dplyr.locale` global option, which is useful
+#' # if `arrange()` is called from a function you don't control
+#' col_sorter <- function(df) {
+#'   arrange(df, x)
+#' }
+#'
+#' rlang::with_options(dplyr.locale = "da", {
+#'   col_sorter(df)
+#' })
+dplyr_locale <- function() {
+  locale <- peek_option("dplyr.locale")
+
+  if (is_string(locale)) {
+    return(locale)
+  }
+  if (!is_null(locale)) {
+    abort("If set, the global option `dplyr.locale` must be a string.")
+  }
+
+  dplyr_locale_default()
+}
+
+dplyr_locale_default <- function(has_stringi = has_minimum_stringi()) {
+  if (has_stringi) {
+    "en"
+  } else {
+    warn_locale_fallback()
+    "C"
+  }
+}
+
+warn_locale_fallback <- function() {
+  header <- paste0(
+    "`dplyr_locale()` attempted to default to the American English locale (\"en\"), ",
+    "but the required package, stringi >=1.5.3, is not installed."
+  )
+
+  bullets <- c(
+    i = "Falling back to the C locale.",
+    i = paste0(
+      "Silence this warning by installing stringi or by ",
+      "explicitly replacing usage of `dplyr_locale()` with \"C\"."
+    )
+  )
+
+  warn(
+    message = c(header, bullets),
+    class = "dplyr_warn_locale_fallback"
+  )
+}
+
+has_minimum_stringi <- function() {
+  is_installed("stringi", version = "1.5.3")
+}
diff --git a/man/arrange.Rd b/man/arrange.Rd
index 132ec0bf9f..e2dbfbbcca 100644
--- a/man/arrange.Rd
+++ b/man/arrange.Rd
@@ -2,9 +2,12 @@
 % Please edit documentation in R/arrange.R
 \name{arrange}
 \alias{arrange}
+\alias{arrange.data.frame}
 \title{Arrange rows by column values}
 \usage{
 arrange(.data, ..., .by_group = FALSE)
+
+\method{arrange}{data.frame}(.data, ..., .by_group = FALSE, .locale = dplyr_locale())
 }
 \arguments{
 \item{.data}{A data frame, data frame extension (e.g. a tibble), or a
@@ -16,6 +19,21 @@ variables. Use \code{\link[=desc]{desc()}} to sort a variable in descending orde
 
 \item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to
 grouped data frames only.}
+
+\item{.locale}{The locale to sort character vectors in.
+\itemize{
+\item Defaults to \code{\link[=dplyr_locale]{dplyr_locale()}}, which uses the American English locale
+if the stringi package is installed and the global default locale
+has not been altered. See the help page for \code{\link[=dplyr_locale]{dplyr_locale()}} for the
+exact details.
+\item If a single string is supplied, then this will be used as the sorting
+locale. For example, \code{"fr"} will sort with the French locale. This
+requires the stringi package. Use \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} to
+generate a list of possible locale identifiers.
+\item If \code{"C"} is supplied, then character vectors will be sorted in the C
+locale. This does not require stringi and is often much faster than
+supplying a locale identifier.
+}}
 }
 \value{
 An object of the same type as \code{.data}. The output has the following
@@ -37,12 +55,6 @@ in order to group by them, and functions of variables are evaluated
 once per data frame, not once per group.
 }
 \details{
-\subsection{Locales}{
-
-The sort order for character vectors will depend on the collating sequence
-of the locale in use: see \code{\link[=locales]{locales()}}.
-}
-
 \subsection{Missing values}{
 
 Unlike base sorting with \code{sort()}, \code{NA} are:
diff --git a/man/dplyr_locale.Rd b/man/dplyr_locale.Rd
new file mode 100644
index 0000000000..cf322684cd
--- /dev/null
+++ b/man/dplyr_locale.Rd
@@ -0,0 +1,66 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/locale.R
+\name{dplyr_locale}
+\alias{dplyr_locale}
+\title{Locale used by dplyr}
+\usage{
+dplyr_locale()
+}
+\description{
+\code{dplyr_locale()} returns a single string representing the default locale used
+by dplyr when ordering character vectors. It is used as the default value of
+\code{.locale} in \code{\link[=arrange]{arrange()}}.
+\subsection{Default locale}{
+\itemize{
+\item If stringi >=1.5.3 is installed, the default locale is set to American
+English, represented by the locale identifier \code{"en"}.
+\item If stringi is not installed or is older than 1.5.3, the default locale
+falls back to the C locale, represented by \code{"C"}. When this occurs, a
+warning will be thrown encouraging you to either install stringi or
+replace usage of \code{dplyr_locale()} with \code{"C"} to explicitly force the C
+locale.
+}
+}
+
+\subsection{Global override}{
+
+To override the above default behavior, you can set the global option,
+\code{dplyr.locale}, to either \code{"C"} or a stringi locale identifier from
+\code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} to globally alter the default locale.
+Setting this option to anything other than \code{"C"} requires stringi >=1.5.3.
+
+We generally recommend that you set the \code{.locale} argument of \code{\link[=arrange]{arrange()}}
+explicitly rather than overriding the global locale, if possible.
+
+Another alternative is to only change the global locale within a limited
+scope through the use of \code{\link[rlang:local_options]{rlang::local_options()}} or \code{\link[rlang:local_options]{rlang::with_options()}}.
+This can be useful when a package that you don't control calls \code{arrange()}
+internally.
+}
+}
+\examples{
+\dontshow{if (dplyr:::has_minimum_stringi()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Default locale is American English
+dplyr_locale()
+
+# This Danish letter is typically sorted after `z`
+df <- tibble(x = x <- c("o", "p", "\u00F8", "z"))
+df
+
+# The American English locale sorts it right after `o`
+arrange(df, x)
+
+# Explicitly override `.locale` to `"da"` for Danish ordering
+arrange(df, x, .locale = "da")
+
+# Or temporarily override the `dplyr.locale` global option, which is useful
+# if `arrange()` is called from a function you don't control
+col_sorter <- function(df) {
+  arrange(df, x)
+}
+
+rlang::with_options(dplyr.locale = "da", {
+  col_sorter(df)
+})
+\dontshow{\}) # examplesIf}
+}
diff --git a/tests/testthat/_snaps/arrange.md b/tests/testthat/_snaps/arrange.md
index 43e2ec42c4..6d23a5e50c 100644
--- a/tests/testthat/_snaps/arrange.md
+++ b/tests/testthat/_snaps/arrange.md
@@ -26,6 +26,38 @@
       x Problem while computing `..1 = rep(x, 2)`.
       x `..1` must be size 1, not 2.
 
+# arrange errors if stringi is not installed and a locale identifier is used
+
+    Code
+      locale_to_chr_proxy_collate("fr", has_stringi = FALSE)
+    Condition
+      Error:
+      ! stringi >=1.5.3 is required to arrange in a different locale.
+
+# arrange validates `.locale`
+
+    Code
+      arrange(df, .locale = 1)
+    Condition
+      Error in `arrange()`:
+      ! `.locale` must be a string.
+
+---
+
+    Code
+      arrange(df, .locale = c("en_US", "fr_BF"))
+    Condition
+      Error in `arrange()`:
+      ! If `.locale` is a character vector, it must be a single string.
+
+---
+
+    Code
+      arrange(df, .locale = "x")
+    Condition
+      Error in `arrange()`:
+      ! `.locale` must be one of the locales within `stringi::stri_locale_list()`.
+
 # desc() inside arrange() checks the number of arguments (#5921)
 
     Code
diff --git a/tests/testthat/_snaps/locale.md b/tests/testthat/_snaps/locale.md
new file mode 100644
index 0000000000..99aeea8eb1
--- /dev/null
+++ b/tests/testthat/_snaps/locale.md
@@ -0,0 +1,20 @@
+# `dplyr_locale()` respects `dplyr.locale`
+
+    Code
+      dplyr_locale()
+    Condition
+      Error in `dplyr_locale()`:
+      ! If set, the global option `dplyr.locale` must be a string.
+
+# `dplyr_locale()` falls back to the C locale with a warning if stringi is not available
+
+    Code
+      dplyr_locale_default(has_stringi = FALSE)
+    Condition
+      Warning:
+      `dplyr_locale()` attempted to default to the American English locale ("en"), but the required package, stringi >=1.5.3, is not installed.
+      i Falling back to the C locale.
+      i Silence this warning by installing stringi or by explicitly replacing usage of `dplyr_locale()` with "C".
+    Output
+      [1] "C"
+
diff --git a/tests/testthat/test-arrange.r b/tests/testthat/test-arrange.r
index d022904046..7b7fcb2aee 100644
--- a/tests/testthat/test-arrange.r
+++ b/tests/testthat/test-arrange.r
@@ -82,14 +82,61 @@ test_that("arrange handles S4 classes (#1105)", {
   expect_equal(arrange(df, y), df[3:1, ])
 })
 
-test_that("arrange respects locale (#1280)", {
-  df2 <- tibble(words = c("casa", "\u00e1rbol", "zona", "\u00f3rgano"))
+# locale --------------------------------------------------------------
 
-  res <- df2 %>% arrange(words)
-  expect_equal(res$words, sort(df2$words))
+test_that("arrange defaults to the American English locale if stringi is installed", {
+  skip_if_not_installed("stringi", "1.5.3")
 
-  res <- df2 %>% arrange(desc(words))
-  expect_equal(res$words, sort(df2$words, decreasing = TRUE))
+  x <- c("A", "a", "b", "B")
+  df <- tibble(x = x)
+
+  res <- arrange(df, x)
+  expect_identical(res$x, c("a", "A", "b", "B"))
+
+  res <- arrange(df, desc(x))
+  expect_identical(res$x, rev(c("a", "A", "b", "B")))
+})
+
+test_that("locale can be set to C", {
+  x <- c("A", "a", "b", "B")
+  df <- tibble(x = x)
+
+  res <- arrange(df, x, .locale = "C")
+  expect_identical(res$x, c("A", "B", "a", "b"))
+})
+
+test_that("non-English locales can be used", {
+  skip_if_not_installed("stringi", "1.5.3")
+
+  # Danish `o` with `/` through it sorts after `z`
+  x <- c("o", "\u00F8", "p", "z")
+  df <- tibble(x = x)
+
+  res <- arrange(df, x)
+  expect_identical(res$x, x)
+
+  res <- arrange(df, x, .locale = "da")
+  expect_identical(res$x, x[c(1, 3, 4, 2)])
+})
+
+test_that("arrange errors if stringi is not installed and a locale identifier is used", {
+  expect_snapshot(error = TRUE, {
+    locale_to_chr_proxy_collate("fr", has_stringi = FALSE)
+  })
+})
+
+test_that("arrange validates `.locale`", {
+  df <- tibble()
+
+  expect_snapshot(error = TRUE, {
+    arrange(df, .locale = 1)
+  })
+  expect_snapshot(error = TRUE, {
+    arrange(df, .locale = c("en_US", "fr_BF"))
+  })
+  expect_snapshot(error = TRUE, {
+    arrange(df, .locale = "x")
+  })
 })
 
 # data ----------------------------------------------------------------
diff --git a/tests/testthat/test-locale.R b/tests/testthat/test-locale.R
new file mode 100644
index 0000000000..50271a6cd0
--- /dev/null
+++ b/tests/testthat/test-locale.R
@@ -0,0 +1,20 @@
+test_that("`dplyr_locale()` uses 'en' if stringi is available", {
+  skip_if_not_installed("stringi", "1.5.3")
+
+  expect_identical(dplyr_locale(), "en")
+})
+
+test_that("`dplyr_locale()` respects `dplyr.locale`", {
+  local_options(dplyr.locale = "fr")
+  expect_identical(dplyr_locale(), "fr")
+
+  local_options(dplyr.locale = 1)
+  expect_snapshot(error = TRUE, {
+    dplyr_locale()
+  })
+})
+
+test_that("`dplyr_locale()` falls back to the C locale with a warning if stringi is not available", {
+  expect_snapshot(dplyr_locale_default(has_stringi = FALSE))
+  expect_warning(dplyr_locale_default(has_stringi = FALSE), class = "dplyr_warn_locale_fallback")
+})

From 6f7612e638276e59120d2b2696c623a2e0cbc6e0 Mon Sep 17 00:00:00 2001
From: DavisVaughan <davis@rstudio.com>
Date: Tue, 10 May 2022 11:52:21 -0400
Subject: [PATCH 4/9] Give scoped `arrange()` variants a `.locale` argument

Even though these are superseded, this should help ease the transition a little, since without this argument it would be difficult to choose a different locale, and if stringi wasn't installed then you'd unconditionally get a warning you couldn't silence
---
 R/colwise-arrange.R                   | 26 +++++++++++++----
 man/arrange_all.Rd                    | 41 +++++++++++++++++++++++++--
 tests/testthat/test-colwise-arrange.R | 17 +++++++++++
 3 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/R/colwise-arrange.R b/R/colwise-arrange.R
index 73f5752e45..467db0a094 100644
--- a/R/colwise-arrange.R
+++ b/R/colwise-arrange.R
@@ -29,31 +29,45 @@
 #' arrange_all(df, desc)
 #' # ->
 #' arrange(df, across(everything(), desc))
-arrange_all <- function(.tbl, .funs = list(), ..., .by_group = FALSE) {
+arrange_all <- function(.tbl,
+                        .funs = list(),
+                        ...,
+                        .by_group = FALSE,
+                        .locale = dplyr_locale()) {
   lifecycle::signal_stage("superseded", "arrange_all()")
   funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "arrange_all")
   if (!length(funs)) {
     funs <- syms(tbl_vars(.tbl))
   }
-  arrange(.tbl, !!!funs, .by_group = .by_group)
+  arrange(.tbl, !!!funs, .by_group = .by_group, .locale = .locale)
 }
 #' @rdname arrange_all
 #' @export
-arrange_at <- function(.tbl, .vars, .funs = list(), ..., .by_group = FALSE) {
+arrange_at <- function(.tbl,
+                       .vars,
+                       .funs = list(),
+                       ...,
+                       .by_group = FALSE,
+                       .locale = dplyr_locale()) {
   lifecycle::signal_stage("superseded", "arrange_at()")
   funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "arrange_at")
   if (!length(funs)) {
     funs <- tbl_at_syms(.tbl, .vars, .include_group_vars = TRUE)
   }
-  arrange(.tbl, !!!funs, .by_group = .by_group)
+  arrange(.tbl, !!!funs, .by_group = .by_group, .locale = .locale)
 }
 #' @rdname arrange_all
 #' @export
-arrange_if <- function(.tbl, .predicate, .funs = list(), ..., .by_group = FALSE) {
+arrange_if <- function(.tbl,
+                       .predicate,
+                       .funs = list(),
+                       ...,
+                       .by_group = FALSE,
+                       .locale = dplyr_locale()) {
   lifecycle::signal_stage("superseded", "arrange_if()")
   funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "arrange_if")
   if (!length(funs)) {
     funs <- tbl_if_syms(.tbl, .predicate, .include_group_vars = TRUE)
   }
-  arrange(.tbl, !!!funs, .by_group = .by_group)
+  arrange(.tbl, !!!funs, .by_group = .by_group, .locale = .locale)
 }
diff --git a/man/arrange_all.Rd b/man/arrange_all.Rd
index 4e139aff4a..3dd8d111d9 100644
--- a/man/arrange_all.Rd
+++ b/man/arrange_all.Rd
@@ -6,11 +6,31 @@
 \alias{arrange_if}
 \title{Arrange rows by a selection of variables}
 \usage{
-arrange_all(.tbl, .funs = list(), ..., .by_group = FALSE)
+arrange_all(
+  .tbl,
+  .funs = list(),
+  ...,
+  .by_group = FALSE,
+  .locale = dplyr_locale()
+)
 
-arrange_at(.tbl, .vars, .funs = list(), ..., .by_group = FALSE)
+arrange_at(
+  .tbl,
+  .vars,
+  .funs = list(),
+  ...,
+  .by_group = FALSE,
+  .locale = dplyr_locale()
+)
 
-arrange_if(.tbl, .predicate, .funs = list(), ..., .by_group = FALSE)
+arrange_if(
+  .tbl,
+  .predicate,
+  .funs = list(),
+  ...,
+  .by_group = FALSE,
+  .locale = dplyr_locale()
+)
 }
 \arguments{
 \item{.tbl}{A \code{tbl} object.}
@@ -23,6 +43,21 @@ arrange_if(.tbl, .predicate, .funs = list(), ..., .by_group = FALSE)
 \item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to
 grouped data frames only.}
 
+\item{.locale}{The locale to sort character vectors in.
+\itemize{
+\item Defaults to \code{\link[=dplyr_locale]{dplyr_locale()}}, which uses the American English locale
+if the stringi package is installed and the global default locale
+has not been altered. See the help page for \code{\link[=dplyr_locale]{dplyr_locale()}} for the
+exact details.
+\item If a single string is supplied, then this will be used as the sorting
+locale. For example, \code{"fr"} will sort with the French locale. This
+requires the stringi package. Use \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} to
+generate a list of possible locale identifiers.
+\item If \code{"C"} is supplied, then character vectors will be sorted in the C
+locale. This does not require stringi and is often much faster than
+supplying a locale identifier.
+}}
+
 \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}},
 a character vector of column names, a numeric vector of column
 positions, or \code{NULL}.}
diff --git a/tests/testthat/test-colwise-arrange.R b/tests/testthat/test-colwise-arrange.R
index 8beaa1b7b8..f698570412 100644
--- a/tests/testthat/test-colwise-arrange.R
+++ b/tests/testthat/test-colwise-arrange.R
@@ -65,3 +65,20 @@ test_that("scoped arrange respect .by_group (#3245)",{
     arrange(d, cyl, mpg, disp)
   )
 })
+
+test_that("scoped `arrange()` respects `.locale`", {
+  df <- tibble(x = c("A", "a", "b", "B"))
+
+  expect_identical(
+    arrange_all(df, .locale = "C"),
+    arrange(df, x, .locale = "C")
+  )
+  expect_identical(
+    arrange_if(df, is.character, .locale = "C"),
+    arrange(df, x, .locale = "C")
+  )
+  expect_identical(
+    arrange_at(df, vars(x), .locale = "C"),
+    arrange(df, x, .locale = "C")
+  )
+})

From 42d928b12798074f65e7150e3913e2a78a39fd7c Mon Sep 17 00:00:00 2001
From: DavisVaughan <davis@rstudio.com>
Date: Tue, 10 May 2022 11:53:21 -0400
Subject: [PATCH 5/9] NEWS bullet

---
 NEWS.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 4143c1ec24..bed32a1f78 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,13 @@
 # dplyr (development version)
 
+* `arrange()` now uses a faster algorithm for sorting character vectors,
+   which is heavily inspired by data.table's `forder()`. Additionally, the
+   default locale is now American English, which is a breaking change from
+   the previous behavior which utilized the system locale. The new `.locale`
+   argument can be used to adjust this. For a fuller explanation, refer to this
+   [tidyup](https://github.com/tidyverse/tidyups/blob/main/003-dplyr-radix-ordering.md)
+   which outlines and justifies this change (#4962).
+
 * `tbl_sum()` is no longer reexported from tibble (#6284).
 
 * `slice_sample()` now gives a more informative error when `replace = FALSE` and

From 5f78c31b1ea4b33da83bee4ab20568796217f397 Mon Sep 17 00:00:00 2001
From: DavisVaughan <davis@rstudio.com>
Date: Tue, 10 May 2022 12:27:46 -0400
Subject: [PATCH 6/9] Mark `dplyr_locale()` documentation as internal

With the intention being that it shouldn't show up on the pkgdown site reference page. We expect most people to get to this page through `arrange()`.
---
 R/locale.R          | 1 +
 man/dplyr_locale.Rd | 1 +
 2 files changed, 2 insertions(+)

diff --git a/R/locale.R b/R/locale.R
index 1d64450c9f..38577f3b91 100644
--- a/R/locale.R
+++ b/R/locale.R
@@ -31,6 +31,7 @@
 #' This can be useful when a package that you don't control calls `arrange()`
 #' internally.
 #' @export
+#' @keywords internal
 #' @examplesIf dplyr:::has_minimum_stringi()
 #' # Default locale is American English
 #' dplyr_locale()
diff --git a/man/dplyr_locale.Rd b/man/dplyr_locale.Rd
index cf322684cd..6590984caa 100644
--- a/man/dplyr_locale.Rd
+++ b/man/dplyr_locale.Rd
@@ -64,3 +64,4 @@ rlang::with_options(dplyr.locale = "da", {
 })
 \dontshow{\}) # examplesIf}
 }
+\keyword{internal}

From 0221c59ab0530bc4bf1a5f752b305e11dbd61d0b Mon Sep 17 00:00:00 2001
From: DavisVaughan <davis@rstudio.com>
Date: Fri, 13 May 2022 10:40:38 -0400
Subject: [PATCH 7/9] Default `dplyr_locale()` to C

This is reproducible across all R sessions and OSes, and is much faster, which makes it a good default.

It will also make the default of `arrange()` continue to align with what `group_by() + summarize()` returns, since that will also unconditionally use the C locale.
---
 NEWS.md                          | 17 +++---
 R/arrange.R                      | 21 ++++----
 R/locale.R                       | 88 +++++++++++++++-----------------
 man/arrange.Rd                   | 23 +++++----
 man/arrange_all.Rd               | 23 +++++----
 man/dplyr_locale.Rd              | 62 +++++++++++++++-------
 tests/testthat/_snaps/arrange.md |  2 +-
 tests/testthat/_snaps/locale.md  | 12 -----
 tests/testthat/test-arrange.r    | 40 +++++++++++----
 tests/testthat/test-locale.R     | 11 +---
 10 files changed, 165 insertions(+), 134 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index bed32a1f78..bc4aa8531f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,12 +1,15 @@
 # dplyr (development version)
 
-* `arrange()` now uses a faster algorithm for sorting character vectors,
-   which is heavily inspired by data.table's `forder()`. Additionally, the
-   default locale is now American English, which is a breaking change from
-   the previous behavior which utilized the system locale. The new `.locale`
-   argument can be used to adjust this. For a fuller explanation, refer to this
-   [tidyup](https://github.com/tidyverse/tidyups/blob/main/003-dplyr-radix-ordering.md)
-   which outlines and justifies this change (#4962).
+* `arrange()` now uses a faster algorithm for sorting character vectors, which
+  is heavily inspired by data.table's `forder()`. Additionally, the default
+  locale for sorting character vectors is now the C locale, which is a breaking
+  change from the previous behavior that utilized the system locale. The new
+  `.locale` argument can be used to adjust this to, say, the American English
+  locale, which is an optional feature that requires the stringi package. This
+  change improves reproducibility across R sessions and operating systems. For a
+  fuller explanation, refer to this
+  [tidyup](https://github.com/tidyverse/tidyups/blob/main/003-dplyr-radix-ordering.md)
+  which outlines and justifies this change (#4962).
 
 * `tbl_sum()` is no longer reexported from tibble (#6284).
 
diff --git a/R/arrange.R b/R/arrange.R
index efc3130084..670b12652e 100644
--- a/R/arrange.R
+++ b/R/arrange.R
@@ -40,19 +40,22 @@
 #'   grouped data frames only.
 #' @param .locale The locale to sort character vectors in.
 #'
-#'   - Defaults to [dplyr_locale()], which uses the American English locale
-#'     if the stringi package is installed and the global default locale
-#'     has not been altered. See the help page for [dplyr_locale()] for the
+#'   - Defaults to [dplyr_locale()], which uses the `"C"` locale unless this is
+#'     explicitly overriden. See the help page for [dplyr_locale()] for the
 #'     exact details.
 #'
-#'   - If a single string is supplied, then this will be used as the sorting
-#'     locale. For example, `"fr"` will sort with the French locale. This
-#'     requires the stringi package. Use [stringi::stri_locale_list()] to
-#'     generate a list of possible locale identifiers.
+#'   - If a single string from [stringi::stri_locale_list()] is supplied, then
+#'     this will be used as the locale to sort with. For example, `"en"` will
+#'     sort with the American English locale. This requires the stringi package.
 #'
-#'   - If `"C"` is supplied, then character vectors will be sorted in the C
-#'     locale. This does not require stringi and is often much faster than
+#'   - If `"C"` is supplied, then character vectors will always be sorted in the
+#'     C locale. This does not require stringi and is often much faster than
 #'     supplying a locale identifier.
+#'
+#'   The C locale is not the same as English locales, such as `"en"`,
+#'   particularly when it comes to case-sensitivity. This is explained in
+#'   more detail in the help page of [dplyr_locale()] under the `Default locale`
+#'   section.
 #' @family single table verbs
 #' @examples
 #' arrange(mtcars, cyl, disp)
diff --git a/R/locale.R b/R/locale.R
index 38577f3b91..9b76e7c0d7 100644
--- a/R/locale.R
+++ b/R/locale.R
@@ -7,21 +7,24 @@
 #'
 #' ## Default locale
 #'
-#' - If stringi >=1.5.3 is installed, the default locale is set to American
-#'   English, represented by the locale identifier `"en"`.
+#' The default locale returned by `dplyr_locale()` is the C locale, identical
+#' to explicitly supplying `.locale = "C"`.
 #'
-#' - If stringi is not installed or is older than 1.5.3, the default locale
-#'   falls back to the C locale, represented by `"C"`. When this occurs, a
-#'   warning will be thrown encouraging you to either install stringi or
-#'   replace usage of `dplyr_locale()` with `"C"` to explicitly force the C
-#'   locale.
+#' The C locale is not exactly the same as English locales, such as `"en"`. The
+#' main difference is that the C locale groups the English alphabet by _case_,
+#' while most English locales group the alphabet by _letter_. For example,
+#' `c("a", "b", "C", "B", "c")` will sort as `c("B", "C", "a", "b", "c")` in the
+#' C locale, with all uppercase letters coming before lowercase letters, but
+#' will sort as `c("a", "b", "B", "c", "C")` in an English locale. This often
+#' makes little practical difference during data analysis, because both return
+#' identical results when case is consistent between observations.
 #'
 #' ## Global override
 #'
 #' To override the above default behavior, you can set the global option,
-#' `dplyr.locale`, to either `"C"` or a stringi locale identifier from
-#' [stringi::stri_locale_list()] to globally alter the default locale.
-#' Setting this option to anything other than `"C"` requires stringi >=1.5.3.
+#' `dplyr.locale`, to a stringi locale identifier from
+#' [stringi::stri_locale_list()] to globally alter the default locale. This
+#' requires stringi >=1.5.3.
 #'
 #' We generally recommend that you set the `.locale` argument of [arrange()]
 #' explicitly rather than overriding the global locale, if possible.
@@ -30,21 +33,31 @@
 #' scope through the use of [rlang::local_options()] or [rlang::with_options()].
 #' This can be useful when a package that you don't control calls `arrange()`
 #' internally.
+#'
+#' ## Reproducibility
+#'
+#' The C locale has the benefit of being completely reproducible across all
+#' supported R versions and operating systems with no extra effort.
+#'
+#' If you set `.locale` to an option from [stringi::stri_locale_list()], then
+#' stringi must be installed by anyone who wants to run your code. If you
+#' utilize this in a package, then stringi should be placed in `Imports`.
 #' @export
 #' @keywords internal
 #' @examplesIf dplyr:::has_minimum_stringi()
-#' # Default locale is American English
+#' # Default locale is C
 #' dplyr_locale()
 #'
-#' # This Danish letter is typically sorted after `z`
-#' df <- tibble(x = x <- c("o", "p", "\u00F8", "z"))
+#' df <- tibble(x = c("a", "b", "C", "B", "c"))
 #' df
 #'
-#' # The American English locale sorts it right after `o`
+#' # The C locale groups the English alphabet by case, placing uppercase letters
+#' # before lowercase letters. This is the default.
 #' arrange(df, x)
 #'
-#' # Explicitly override `.locale` to `"da"` for Danish ordering
-#' arrange(df, x, .locale = "da")
+#' # The American English locale groups the alphabet by letter.
+#' # Explicitly override `.locale` with `"en"` for this ordering.
+#' arrange(df, x, .locale = "en")
 #'
 #' # Or temporarily override the `dplyr.locale` global option, which is useful
 #' # if `arrange()` is called from a function you don't control
@@ -52,9 +65,19 @@
 #'   arrange(df, x)
 #' }
 #'
-#' rlang::with_options(dplyr.locale = "da", {
+#' rlang::with_options(dplyr.locale = "en", {
 #'   col_sorter(df)
 #' })
+#'
+#' # This Danish letter is expected to sort after `z`
+#' df <- tibble(x = c("o", "p", "\u00F8", "z"))
+#' df
+#'
+#' # The American English locale sorts it right after `o`
+#' arrange(df, x, .locale = "en")
+#'
+#' # Using `"da"` for Danish ordering gives the expected result
+#' arrange(df, x, .locale = "da")
 dplyr_locale <- function() {
   locale <- peek_option("dplyr.locale")
 
@@ -65,36 +88,7 @@ dplyr_locale <- function() {
     abort("If set, the global option `dplyr.locale` must be a string.")
   }
 
-  dplyr_locale_default()
-}
-
-dplyr_locale_default <- function(has_stringi = has_minimum_stringi()) {
-  if (has_stringi) {
-    "en"
-  } else {
-    warn_locale_fallback()
-    "C"
-  }
-}
-
-warn_locale_fallback <- function() {
-  header <- paste0(
-    "`dplyr_locale()` attempted to default to the American English locale (\"en\"), ",
-    "but the required package, stringi >=1.5.3, is not installed."
-  )
-
-  bullets <- c(
-    i = "Falling back to the C locale.",
-    i = paste0(
-      "Silence this warning by installing stringi or by ",
-      "explicitly replacing usage of `dplyr_locale()` with \"C\"."
-    )
-  )
-
-  warn(
-    message = c(header, bullets),
-    class = "dplyr_warn_locale_fallback"
-  )
+  "C"
 }
 
 has_minimum_stringi <- function() {
diff --git a/man/arrange.Rd b/man/arrange.Rd
index e2dbfbbcca..87534d826f 100644
--- a/man/arrange.Rd
+++ b/man/arrange.Rd
@@ -22,18 +22,21 @@ grouped data frames only.}
 
 \item{.locale}{The locale to sort character vectors in.
 \itemize{
-\item Defaults to \code{\link[=dplyr_locale]{dplyr_locale()}}, which uses the American English locale
-if the stringi package is installed and the global default locale
-has not been altered. See the help page for \code{\link[=dplyr_locale]{dplyr_locale()}} for the
+\item Defaults to \code{\link[=dplyr_locale]{dplyr_locale()}}, which uses the \code{"C"} locale unless this is
+explicitly overriden. See the help page for \code{\link[=dplyr_locale]{dplyr_locale()}} for the
 exact details.
-\item If a single string is supplied, then this will be used as the sorting
-locale. For example, \code{"fr"} will sort with the French locale. This
-requires the stringi package. Use \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} to
-generate a list of possible locale identifiers.
-\item If \code{"C"} is supplied, then character vectors will be sorted in the C
-locale. This does not require stringi and is often much faster than
+\item If a single string from \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} is supplied, then
+this will be used as the locale to sort with. For example, \code{"en"} will
+sort with the American English locale. This requires the stringi package.
+\item If \code{"C"} is supplied, then character vectors will always be sorted in the
+C locale. This does not require stringi and is often much faster than
 supplying a locale identifier.
-}}
+}
+
+The C locale is not the same as English locales, such as \code{"en"},
+particularly when it comes to case-sensitivity. This is explained in
+more detail in the help page of \code{\link[=dplyr_locale]{dplyr_locale()}} under the \verb{Default locale}
+section.}
 }
 \value{
 An object of the same type as \code{.data}. The output has the following
diff --git a/man/arrange_all.Rd b/man/arrange_all.Rd
index 3dd8d111d9..1d35075e05 100644
--- a/man/arrange_all.Rd
+++ b/man/arrange_all.Rd
@@ -45,18 +45,21 @@ grouped data frames only.}
 
 \item{.locale}{The locale to sort character vectors in.
 \itemize{
-\item Defaults to \code{\link[=dplyr_locale]{dplyr_locale()}}, which uses the American English locale
-if the stringi package is installed and the global default locale
-has not been altered. See the help page for \code{\link[=dplyr_locale]{dplyr_locale()}} for the
+\item Defaults to \code{\link[=dplyr_locale]{dplyr_locale()}}, which uses the \code{"C"} locale unless this is
+explicitly overriden. See the help page for \code{\link[=dplyr_locale]{dplyr_locale()}} for the
 exact details.
-\item If a single string is supplied, then this will be used as the sorting
-locale. For example, \code{"fr"} will sort with the French locale. This
-requires the stringi package. Use \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} to
-generate a list of possible locale identifiers.
-\item If \code{"C"} is supplied, then character vectors will be sorted in the C
-locale. This does not require stringi and is often much faster than
+\item If a single string from \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} is supplied, then
+this will be used as the locale to sort with. For example, \code{"en"} will
+sort with the American English locale. This requires the stringi package.
+\item If \code{"C"} is supplied, then character vectors will always be sorted in the
+C locale. This does not require stringi and is often much faster than
 supplying a locale identifier.
-}}
+}
+
+The C locale is not the same as English locales, such as \code{"en"},
+particularly when it comes to case-sensitivity. This is explained in
+more detail in the help page of \code{\link[=dplyr_locale]{dplyr_locale()}} under the \verb{Default locale}
+section.}
 
 \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}},
 a character vector of column names, a numeric vector of column
diff --git a/man/dplyr_locale.Rd b/man/dplyr_locale.Rd
index 6590984caa..63b412c8f7 100644
--- a/man/dplyr_locale.Rd
+++ b/man/dplyr_locale.Rd
@@ -11,23 +11,26 @@ dplyr_locale()
 by dplyr when ordering character vectors. It is used as the default value of
 \code{.locale} in \code{\link[=arrange]{arrange()}}.
 \subsection{Default locale}{
-\itemize{
-\item If stringi >=1.5.3 is installed, the default locale is set to American
-English, represented by the locale identifier \code{"en"}.
-\item If stringi is not installed or is older than 1.5.3, the default locale
-falls back to the C locale, represented by \code{"C"}. When this occurs, a
-warning will be thrown encouraging you to either install stringi or
-replace usage of \code{dplyr_locale()} with \code{"C"} to explicitly force the C
-locale.
-}
+
+The default locale returned by \code{dplyr_locale()} is the C locale, identical
+to explicitly supplying \code{.locale = "C"}.
+
+The C locale is not exactly the same as English locales, such as \code{"en"}. The
+main difference is that the C locale groups the English alphabet by \emph{case},
+while most English locales group the alphabet by \emph{letter}. For example,
+\code{c("a", "b", "C", "B", "c")} will sort as \code{c("B", "C", "a", "b", "c")} in the
+C locale, with all uppercase letters coming before lowercase letters, but
+will sort as \code{c("a", "b", "B", "c", "C")} in an English locale. This often
+makes little practical difference during data analysis, because both return
+identical results when case is consistent between observations.
 }
 
 \subsection{Global override}{
 
 To override the above default behavior, you can set the global option,
-\code{dplyr.locale}, to either \code{"C"} or a stringi locale identifier from
-\code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} to globally alter the default locale.
-Setting this option to anything other than \code{"C"} requires stringi >=1.5.3.
+\code{dplyr.locale}, to a stringi locale identifier from
+\code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} to globally alter the default locale. This
+requires stringi >=1.5.3.
 
 We generally recommend that you set the \code{.locale} argument of \code{\link[=arrange]{arrange()}}
 explicitly rather than overriding the global locale, if possible.
@@ -37,21 +40,32 @@ scope through the use of \code{\link[rlang:local_options]{rlang::local_options()
 This can be useful when a package that you don't control calls \code{arrange()}
 internally.
 }
+
+\subsection{Reproducibility}{
+
+The C locale has the benefit of being completely reproducible across all
+supported R versions and operating systems with no extra effort.
+
+If you set \code{.locale} to an option from \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}}, then
+stringi must be installed by anyone who wants to run your code. If you
+utilize this in a package, then stringi should be placed in \code{Imports}.
+}
 }
 \examples{
 \dontshow{if (dplyr:::has_minimum_stringi()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-# Default locale is American English
+# Default locale is C
 dplyr_locale()
 
-# This Danish letter is typically sorted after `z`
-df <- tibble(x = x <- c("o", "p", "\u00F8", "z"))
+df <- tibble(x = c("a", "b", "C", "B", "c"))
 df
 
-# The American English locale sorts it right after `o`
+# The C locale groups the English alphabet by case, placing uppercase letters
+# before lowercase letters. This is the default.
 arrange(df, x)
 
-# Explicitly override `.locale` to `"da"` for Danish ordering
-arrange(df, x, .locale = "da")
+# The American English locale groups the alphabet by letter.
+# Explicitly override `.locale` with `"en"` for this ordering.
+arrange(df, x, .locale = "en")
 
 # Or temporarily override the `dplyr.locale` global option, which is useful
 # if `arrange()` is called from a function you don't control
@@ -59,9 +73,19 @@ col_sorter <- function(df) {
   arrange(df, x)
 }
 
-rlang::with_options(dplyr.locale = "da", {
+rlang::with_options(dplyr.locale = "en", {
   col_sorter(df)
 })
+
+# This Danish letter is expected to sort after `z`
+df <- tibble(x = c("o", "p", "\u00F8", "z"))
+df
+
+# The American English locale sorts it right after `o`
+arrange(df, x, .locale = "en")
+
+# Using `"da"` for Danish ordering gives the expected result
+arrange(df, x, .locale = "da")
 \dontshow{\}) # examplesIf}
 }
 \keyword{internal}
diff --git a/tests/testthat/_snaps/arrange.md b/tests/testthat/_snaps/arrange.md
index 6d23a5e50c..ed1f501106 100644
--- a/tests/testthat/_snaps/arrange.md
+++ b/tests/testthat/_snaps/arrange.md
@@ -50,7 +50,7 @@
       Error in `arrange()`:
       ! If `.locale` is a character vector, it must be a single string.
 
----
+# arrange validates that `.locale` must be one from stringi
 
     Code
       arrange(df, .locale = "x")
diff --git a/tests/testthat/_snaps/locale.md b/tests/testthat/_snaps/locale.md
index 99aeea8eb1..283d5e2c0f 100644
--- a/tests/testthat/_snaps/locale.md
+++ b/tests/testthat/_snaps/locale.md
@@ -6,15 +6,3 @@
       Error in `dplyr_locale()`:
       ! If set, the global option `dplyr.locale` must be a string.
 
-# `dplyr_locale()` falls back to the C locale with a warning if stringi is not available
-
-    Code
-      dplyr_locale_default(has_stringi = FALSE)
-    Condition
-      Warning:
-      `dplyr_locale()` attempted to default to the American English locale ("en"), but the required package, stringi >=1.5.3, is not installed.
-      i Falling back to the C locale.
-      i Silence this warning by installing stringi or by explicitly replacing usage of `dplyr_locale()` with "C".
-    Output
-      [1] "C"
-
diff --git a/tests/testthat/test-arrange.r b/tests/testthat/test-arrange.r
index 7b7fcb2aee..4ff67a7e6b 100644
--- a/tests/testthat/test-arrange.r
+++ b/tests/testthat/test-arrange.r
@@ -84,41 +84,54 @@ test_that("arrange handles S4 classes (#1105)", {
 
 # locale --------------------------------------------------------------
 
-test_that("arrange defaults to the American English locale if stringi is installed", {
-  skip_if_not_installed("stringi", "1.5.3")
-
+test_that("arrange defaults to the C locale", {
   x <- c("A", "a", "b", "B")
   df <- tibble(x = x)
 
   res <- arrange(df, x)
-  expect_identical(res$x, c("a", "A", "b", "B"))
+  expect_identical(res$x, c("A", "B", "a", "b"))
 
   res <- arrange(df, desc(x))
-  expect_identical(res$x, rev(c("a", "A", "b", "B")))
+  expect_identical(res$x, rev(c("A", "B", "a", "b")))
 })
 
-test_that("locale can be set to C", {
+test_that("locale can be set to an English locale", {
+  skip_if_not_installed("stringi", "1.5.3")
+
   x <- c("A", "a", "b", "B")
   df <- tibble(x = x)
 
-  res <- arrange(df, x, .locale = "C")
-  expect_identical(res$x, c("A", "B", "a", "b"))
+  res <- arrange(df, x, .locale = "en")
+  expect_identical(res$x, c("a", "A", "b", "B"))
 })
 
 test_that("non-English locales can be used", {
   skip_if_not_installed("stringi", "1.5.3")
 
-  # Danish `o` with `/` through it sorts after `z`
+  # Danish `o` with `/` through it sorts after `z` in Danish locale
   x <- c("o", "\u00F8", "p", "z")
   df <- tibble(x = x)
 
-  res <- arrange(df, x)
+  # American English locale puts it right after `o`
+  res <- arrange(df, x, .locale = "en")
   expect_identical(res$x, x)
 
   res <- arrange(df, x, .locale = "da")
   expect_identical(res$x, x[c(1, 3, 4, 2)])
 })
 
+test_that("arrange respects the `dplyr.locale` global option", {
+  skip_if_not_installed("stringi", "1.5.3")
+
+  local_options(dplyr.locale = "da")
+
+  x <- c("o", "\u00F8", "p", "z")
+  df <- tibble(x = x)
+
+  res <- arrange(df, x)
+  expect_identical(res$x, x[c(1, 3, 4, 2)])
+})
+
 test_that("arrange errors if stringi is not installed and a locale identifier is used", {
   expect_snapshot(error = TRUE, {
     locale_to_chr_proxy_collate("fr", has_stringi = FALSE)
@@ -134,6 +147,13 @@ test_that("arrange validates `.locale`", {
   expect_snapshot(error = TRUE, {
     arrange(df, .locale = c("en_US", "fr_BF"))
   })
+})
+
+test_that("arrange validates that `.locale` must be one from stringi", {
+  skip_if_not_installed("stringi", "1.5.3")
+
+  df <- tibble()
+
   expect_snapshot(error = TRUE, {
     arrange(df, .locale = "x")
   })
diff --git a/tests/testthat/test-locale.R b/tests/testthat/test-locale.R
index 50271a6cd0..a226a03e1f 100644
--- a/tests/testthat/test-locale.R
+++ b/tests/testthat/test-locale.R
@@ -1,7 +1,5 @@
-test_that("`dplyr_locale()` uses 'en' if stringi is available", {
-  skip_if_not_installed("stringi", "1.5.3")
-
-  expect_identical(dplyr_locale(), "en")
+test_that("`dplyr_locale()` uses the C locale by default", {
+  expect_identical(dplyr_locale(), "C")
 })
 
 test_that("`dplyr_locale()` respects `dplyr.locale`", {
@@ -13,8 +11,3 @@ test_that("`dplyr_locale()` respects `dplyr.locale`", {
     dplyr_locale()
   })
 })
-
-test_that("`dplyr_locale()` falls back to the C locale with a warning if stringi is not available", {
-  expect_snapshot(dplyr_locale_default(has_stringi = FALSE))
-  expect_warning(dplyr_locale_default(has_stringi = FALSE), class = "dplyr_warn_locale_fallback")
-})

From ea54714bb14ec14e65aeae6f79860d68108a8034 Mon Sep 17 00:00:00 2001
From: DavisVaughan <davis@rstudio.com>
Date: Mon, 13 Jun 2022 12:30:18 -0400
Subject: [PATCH 8/9] Avoid saying "case-sensitivity" because that is a little
 confusing

---
 R/arrange.R        | 6 +++---
 man/arrange.Rd     | 6 +++---
 man/arrange_all.Rd | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/R/arrange.R b/R/arrange.R
index 670b12652e..9730f6c5f1 100644
--- a/R/arrange.R
+++ b/R/arrange.R
@@ -53,9 +53,9 @@
 #'     supplying a locale identifier.
 #'
 #'   The C locale is not the same as English locales, such as `"en"`,
-#'   particularly when it comes to case-sensitivity. This is explained in
-#'   more detail in the help page of [dplyr_locale()] under the `Default locale`
-#'   section.
+#'   particularly when it comes to data containing a mix of upper and lower case
+#'   letters. This is explained in more detail in the help page of
+#'   [dplyr_locale()] under the `Default locale` section.
 #' @family single table verbs
 #' @examples
 #' arrange(mtcars, cyl, disp)
diff --git a/man/arrange.Rd b/man/arrange.Rd
index 87534d826f..86748e2619 100644
--- a/man/arrange.Rd
+++ b/man/arrange.Rd
@@ -34,9 +34,9 @@ supplying a locale identifier.
 }
 
 The C locale is not the same as English locales, such as \code{"en"},
-particularly when it comes to case-sensitivity. This is explained in
-more detail in the help page of \code{\link[=dplyr_locale]{dplyr_locale()}} under the \verb{Default locale}
-section.}
+particularly when it comes to data containing a mix of upper and lower case
+letters. This is explained in more detail in the help page of
+\code{\link[=dplyr_locale]{dplyr_locale()}} under the \verb{Default locale} section.}
 }
 \value{
 An object of the same type as \code{.data}. The output has the following
diff --git a/man/arrange_all.Rd b/man/arrange_all.Rd
index 1d35075e05..6f893bec85 100644
--- a/man/arrange_all.Rd
+++ b/man/arrange_all.Rd
@@ -57,9 +57,9 @@ supplying a locale identifier.
 }
 
 The C locale is not the same as English locales, such as \code{"en"},
-particularly when it comes to case-sensitivity. This is explained in
-more detail in the help page of \code{\link[=dplyr_locale]{dplyr_locale()}} under the \verb{Default locale}
-section.}
+particularly when it comes to data containing a mix of upper and lower case
+letters. This is explained in more detail in the help page of
+\code{\link[=dplyr_locale]{dplyr_locale()}} under the \verb{Default locale} section.}
 
 \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}},
 a character vector of column names, a numeric vector of column

From d9d78d213fbc9e08cdf1efef6aa34b63e8e7cd26 Mon Sep 17 00:00:00 2001
From: DavisVaughan <davis@rstudio.com>
Date: Mon, 13 Jun 2022 12:41:59 -0400
Subject: [PATCH 9/9] Move conversion of `locale` to `chr_proxy_collate` into
 `arrange_rows()`

This is a technical detail of `vec_order_radix()` so it is better to keep the conversion close to where we actually call it. Should make calling `arrange_rows()` on its own a little easier too.
---
 R/arrange.R | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/R/arrange.R b/R/arrange.R
index 9730f6c5f1..9b5c51f991 100644
--- a/R/arrange.R
+++ b/R/arrange.R
@@ -94,9 +94,7 @@ arrange.data.frame <- function(.data,
     dots <- c(quos(!!!groups(.data)), dots)
   }
 
-  chr_proxy_collate <- locale_to_chr_proxy_collate(.locale)
-
-  loc <- arrange_rows(.data, dots, chr_proxy_collate)
+  loc <- arrange_rows(.data, dots = dots, locale = .locale)
   dplyr_row_slice(.data, loc)
 }
 
@@ -104,10 +102,15 @@ arrange.data.frame <- function(.data,
 
 arrange_rows <- function(data,
                          dots,
-                         chr_proxy_collate,
+                         locale,
                          error_call = caller_env()) {
   error_call <- dplyr_error_call(error_call)
 
+  chr_proxy_collate <- locale_to_chr_proxy_collate(
+    locale = locale,
+    error_call = error_call
+  )
+
   if (length(dots) == 0L) {
     out <- seq_len(nrow(data))
     return(out)