Fix #26: replacement forms 'substr_ctl<-'

brodieG · Oct 24, 2021 · 300a7f9 · 300a7f9
2 parents 1624140 + 58bd703
commit 300a7f9
Show file tree

Hide file tree

Showing 45 changed files with 1,837 additions and 403 deletions.
diff --git a/DEVNOTES.Rmd b/DEVNOTES.Rmd
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,7 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
+export("substr2_ctl<-")
+export("substr_ctl<-")
 export(close_state)
 export(fansi_lines)
+export(fwl)
 export(has_ctl)
 export(has_sgr)
 export(html_code_block)

diff --git a/NEWS.md b/NEWS.md
@@ -4,6 +4,8 @@
 
 ### Features
 
+* [#26](https://github.com/brodieG/fansi/issues/26) Replacement forms of
+  `substr_cl` (i.e `substr_ctl<-`).
 * [#58](https://github.com/brodieG/fansi/issues/58) Add support for OSC-anchored
   URLs.
 * [#66](https://github.com/brodieG/fansi/issues/66) Improved handling of

diff --git a/R/fansi-package.R b/R/fansi-package.R
@@ -21,9 +21,9 @@
 #'
 #' @section Control Characters and Sequences:
 #'
-#' Control characters and sequences are non-printing inline characters that can
-#' be used to modify terminal display and behavior, for example by changing text
-#' color or cursor position.
+#' Control characters and sequences are non-printing inline characters or
+#' sequences initiated by them that can be used to modify terminal display and
+#' behavior, for example by changing text color or cursor position.
 #'
 #' We will refer to X3.64/ECMA-48/ISO-6429 control characters and sequences as
 #' "_Control Sequences_" hereafter.
@@ -54,17 +54,17 @@
 #' two characters long.  There are many more unimplemented ECMA-48
 #' specifications.
 #'
-#' In theory it is possible to encode CSI sequenes with a single byte
+#' In theory it is possible to encode CSI sequences with a single byte
 #' introducing character in the 0x40-0x5F range instead of the traditional
 #' "ESC&#91;".  Since this is rare and it conflicts with UTF-8 encoding, `fansi`
 #' does not support it.
 #'
 #' The special treatment of _Control Sequences_ is to compute their
 #' display/character width as zero.  For the SGR subset of the CSI sequences and
-#' OSC-anchored URLs,, `fansi` will also parse, interpret, and reapply the text
-#' the sequences as needed.  Whether a particular type of _Control Sequence_ is
-#' treated specially can be specified via the `ctl` parameter to the `fansi`
-#' functions that have it.
+#' OSC-anchored URLs, `fansi` will also parse, interpret, and reapply the
+#' sequences to the text as needed.  Whether a particular type of _Control
+#' Sequence_ is treated specially can be specified via the `ctl` parameter to
+#' the `fansi` functions that have it.
 #'
 #' @section CSI SGR Control Sequences:
 #'
@@ -131,9 +131,11 @@
 #' While we try to minimize changes across `fansi` versions in how SGR sequences
 #' are output, we focus on minimizing the changes to rendered output, not
 #' necessarily the specific SGR sequences used to produce it.  To maximize the
-#' odds of getting stable SGR output use [`normalize_state`] and set `term.cap` to
-#' a specific set of capabilities.  In general it is likely best not to rely on
-#' the exact SGR encoding of `fansi` output.
+#' odds of getting stable SGR output use [`normalize_state`] and set `term.cap`
+#' to a specific set of capabilities.
+#'
+#' **In general it is likely best not to rely on the exact SGR encoding of
+#' `fansi` output, particularly in tests.**
 #'
 #' Note that `width` calculations may also change across R versions, locales,
 #' etc. (see "Encodings / UTF-8" below).
@@ -151,37 +153,80 @@
 #' @section State Interactions:
 #'
 #' The cumulative nature of state as specified by SGR or OSC-anchored URLs means
-#' that SGR in strings that are spliced will interact with each other.
+#' that unterminated strings that are spliced will interact with each other.
 #' Additionally, a substring does not inherently contain all the information
-#' required to recreate its state as it appeared in the source string.
-#'
-#' One form of interaction to consider is how a character vector provided to
-#' `fansi` functions affect itself.  By default, `fansi` assumes that each
-#' element in an input character vector is independent, but this is incorrect if
-#' the input is a single document with each element a line in it.  In that
-#' situation state from each line should bleed into subsequent ones.  Setting
-#' `carry = TRUE` enables the "single document" interpretation.
-#'
-#' Another form of interaction is when substrings produced by `fansi` are
-#' spliced with or into other substrings.  By default `fansi` automatically
-#' terminates substrings it produces if they contain active formats or URLs.
-#' This prevents the state to bleed into external strings, which is useful e.g.
-#' when arranging text in columns.  We can allow the state to bleed into
-#' appended strings by setting `terminate = FALSE`.  `carry` is unaffected by
-#' `terminate` as `fansi` records the ending SGR state prior to termination
-#' internally.
-#'
-#' Finally, `fansi` strings will be affected by any active state in strings they
-#' are appended to.  There are no parameters to control what happens
-#' automatically in this case, but `fansi` provides several functions that can
-#' help the user get their desired outcome.  `state_at_end` computes the active
-#' state the end of a string, this can then be prepended onto the _input_ of
-#' `fansi` functions so that they are aware of the active style at the beginning
-#' of the string.  Alternatively, one could use `close_state(state_at_end(...))`
-#' and pre-pend that to the _output_ of `fansi` functions so they are unaffected
-#' by preceding SGR.  One could also just prepend "ESC[0m", but in some cases as
+#' required to recreate its state as it appeared in the source string.  The
+#' default `fansi` configuration terminates extracted substrings and prepends
+#' original state to them so they present on a stand alone basis as they did as
+#' part of the original string.
+#'
+#' To allow state in substrings to affect subsequent strings that may be spliced
+#' onto them set `terminate = FALSE`.  Generally you should use `terminate =
+#' TRUE` unless you are willing to deal with the resulting mess (see "Terminal
+#' Quirks") in exchange for fine control of state bleeding.
+#'
+#' By default, `fansi` assumes that each element in an input character vector is
+#' independent, but this is incorrect if the input is a single document with
+#' each element a line in it.  In that situation state from each line should
+#' bleed into subsequent ones.  Setting `carry = TRUE` enables the "single
+#' document" interpretation.
+#'
+#' To most closely approximate what `writeLines(x)` produces on your terminal,
+#' where `x` is a stateful string, use `writeLines(fansi_fun(x, carry=TRUE,
+#' terminate=FALSE))`.  `fansi_fun` is a stand-in for any of the `fansi` string
+#' manipulation functions.  Note that even with a "null-op" such as
+#' `substr_ctl(x, 1, nchar_ctl(x), carry=TRUE, terminate=FALSE)` the output
+#' control sequences may not match the input ones, but the output _should_ look
+#' the same if displayed to the terminal.  With these settings `fansi` will
+#' re-open active state on each new element even if a terminal would naturally
+#' carry them over.  This is to allow the user to manually terminate elements
+#' without losing carried state on the next element.
+#'
+#' `fansi` strings will be affected by any active state in strings they are
+#' appended to.  There are no parameters to control what happens in this case,
+#' but `fansi` provides functions that can help the user get the desired
+#' behavior.  `state_at_end` computes the active state the end of a string,
+#' which can then be prepended onto the _input_ of `fansi` functions so that
+#' they are aware of the active style at the beginning of the string.
+#' Alternatively, one could use `close_state(state_at_end(...))` and pre-pend
+#' that to the _output_ of `fansi` functions so they are unaffected by preceding
+#' SGR.  One could also just prepend "ESC&#91;0m", but in some cases as
 #' described in [`?normalize_state`][normalize_state] that is sub-optimal.
 #'
+#' If you intend to combine stateful `fansi` manipulated strings with your own,
+#' it may be best to set `normalize = TRUE` for best compatibility (see
+#' [`?normalize_state`][normalize_state].)
+#'
+#' @section Terminal Quirks:
+#'
+#' Some terminals (e.g. OS X terminal, ITerm2) will pre-paint the entirety of a
+#' new line with the currently active background before writing the contents of
+#' the line.  If there is a non-default active background color, any unwritten
+#' columns in the new line will keep the prior background color even if the new
+#' line changes the background color.  To avoid this be sure to use `terminate =
+#' TRUE` or to manually terminate each line with e.g.  "ESC&#91;0m".  The
+#' problem manifests as:
+#'
+#' ```
+#' " " = default background
+#' "#" = new background
+#' ">" = start new background
+#' "!" = restore default background
+#'
+#' +-----------+
+#' | abc\n     |
+#' |>###\n     |
+#' |!abc\n#####| <- trailing "#" after newline are from pre-paint
+#' | abc       |
+#' +-----------+
+#' ```
+#'
+#' The simplest way to avoid this problem is to split input strings by any
+#' newlines they contain, and use `terminate = TRUE` (the default).  A more
+#' complex solution is to pad with spaces to the terminal window width before
+#' emitting the newline to ensure the pre-paint is overpainted with the current
+#' line's prevailing background color.
+#'
 #' @section Encodings / UTF-8:
 #'
 #' `fansi` will convert any non-ASCII strings to UTF-8 before processing them,

diff --git a/R/internal.R b/R/internal.R
@@ -56,26 +56,39 @@ check_enc <- function(x, i) .Call(FANSI_check_enc, x, as.integer(i)[1])
 
 ctl_as_int <- function(x) .Call(FANSI_ctl_as_int, as.integer(x))
 
+## testing interface for bridging
+
+bridge <- function(
+  end, restart, term.cap=getOption("fansi.term.cap"),
+  normalize=getOption('fansi.normalize', FALSE)
+) {
+  VAL_IN_ENV(term.cap=term.cap)
+  .Call(FANSI_bridge_state, end, restart, TERM.CAP.INT, normalize)
+}
 ## Common argument validation and conversion.  Missing args okay.
 ##
-## DANGER: will modify values in calling environment!  Also may add `ctl.int`
-## and `term.cap.int` to them.
+## Converts common arguments to standardized forms if needed.
+##
+## DANGER: will modify values in calling environment!  Also may add variables
+## such as CTL.INT, X.LEN, etc. (these should all be in caps).
 
 VAL_IN_ENV <- function(...) {
   call <- sys.call(-1)
   par.env <- parent.frame()
   stop2 <- function(...) stop(simpleError(paste0(..., collapse=""), call))
   args <- list(...)
+  argnm <- names(args)
   if(
     !all(
-      names(args) %in%
+      argnm %in%
       c(
         'x', 'warn', 'term.cap', 'ctl', 'normalize', 'carry', 'terminate',
-        'tab.stops', 'tabs.as.spaces', 'strip.spaces'
+        'tab.stops', 'tabs.as.spaces', 'strip.spaces', 'round', 'type',
+        'start', 'stop'
   ) ) )
     stop("Internal Error: some arguments to validate unknown")
 
-  if('x' %in% names(args)) {
+  if('x' %in% argnm) {
     x <- args[['x']]
     if(!is.character(x)) x <- as.character(args[['x']])
     x <- enc2utf8(x)
@@ -91,20 +104,20 @@ VAL_IN_ENV <- function(...) {
       )
     args[['x']] <- x
   }
-  if('warn' %in% names(args)) {
+  if('warn' %in% argnm) {
     warn <- args[['warn']]
     if(!is.logical(warn)) warn <- as.logical(args[['warn']])
     if(length(warn) != 1L || is.na(warn))
       stop2("Argument `warn` must be TRUE or FALSE.")
     args[['warn']] <- warn
   }
-  if('normalize' %in% names(args)) {
+  if('normalize' %in% argnm) {
     normalize <- args[['normalize']]
     if(!isTRUE(normalize %in% c(FALSE, TRUE)))
       stop2("Argument `normalize` must be TRUE or FALSE.")
     args[['normalize']] <- as.logical(normalize)
   }
-  if('term.cap' %in% names(args)) {
+  if('term.cap' %in% argnm) {
     term.cap <- args[['term.cap']]
     if(!is.character(term.cap))
       stop2("Argument `term.cap` must be character.")
@@ -113,9 +126,9 @@ VAL_IN_ENV <- function(...) {
         "Argument `term.cap` may only contain values in ",
         deparse(VALID.TERM.CAP)
       )
-    args[['term.cap.int']] <- term.cap.int
+    args[['TERM.CAP.INT']] <- term.cap.int
   }
-  if('ctl' %in% names(args)) {
+  if('ctl' %in% argnm) {
     ctl <- args[['ctl']]
     if(!is.character(ctl))
       stop2("Argument `ctl` must be character.")
@@ -127,9 +140,9 @@ VAL_IN_ENV <- function(...) {
           "Argument `ctl` may contain only values in `", deparse(VALID.CTL), "`"
         )
     }
-    args[['ctl.int']] <- ctl.int
+    args[['CTL.INT']] <- ctl.int
   }
-  if('carry' %in% names(args)) {
+  if('carry' %in% argnm) {
     carry <- args[['carry']]
     if(length(carry) != 1L)
       stop2("Argument `carry` must be scalar.")
@@ -141,13 +154,13 @@ VAL_IN_ENV <- function(...) {
     if(is.logical(carry)) if(carry) carry <- "" else carry = NA_character_
     args[['carry']] <- carry
   }
-  if('terminate' %in% names(args)) {
+  if('terminate' %in% argnm) {
     terminate <- args[['terminate']]
     if(!isTRUE(terminate %in% c(TRUE, FALSE)))
       stop2("Argument `terminate` must be TRUE or FALSE")
     terminate <- as.logical(terminate)
   }
-  if('tab.stops' %in% names(args)) {
+  if('tab.stops' %in% argnm) {
     tab.stops <- args[['tab.stops']]
     if(
       !is.numeric(tab.stops) || !length(tab.stops) || any(tab.stops < 1) ||
@@ -159,20 +172,53 @@ VAL_IN_ENV <- function(...) {
       )
     args[['tab.stops']] <- as.integer(tab.stops)
   }
-  if('tabs.as.spaces' %in% names(args)) {
+  if('tabs.as.spaces' %in% argnm) {
     tabs.as.spaces <- args[['tabs.as.spaces']]
     if(!is.logical(tabs.as.spaces)) tabs.as.spaces <- as.logical(tabs.as.spaces)
     if(length(tabs.as.spaces) != 1L || is.na(tabs.as.spaces))
       stop2("Argument `tabs.as.spaces` must be TRUE or FALSE.")
     args[['tabs.as.spaces']] <- tabs.as.spaces
   }
-  if('strip.spaces' %in% names(args)) {
+  if('strip.spaces' %in% argnm) {
     strip.spaces <- args[['strip.spaces']]
     if(!is.logical(strip.spaces)) strip.spaces <- as.logical(strip.spaces)
     if(length(strip.spaces) != 1L || is.na(strip.spaces))
       stop2("Argument `strip.spaces` must be TRUE or FALSE.")
     args[['strip.spaces']] <- strip.spaces
   }
+  if('round' %in% argnm) {
+    valid.round <- c('start', 'stop', 'both', 'neither')
+    round <- args[['round']]
+    if(
+      !is.character(round) || length(round) != 1 ||
+      is.na(round.int <- pmatch(round, valid.round))
+    )
+      stop2("Argument `round` must partial match one of ", deparse(valid.round))
+    args[['round']] <- valid.round[round.int]
+    args[['ROUND.INT']] <- round.int
+  }
+  if('type' %in% argnm) {
+    valid.types <- c('chars', 'width')
+    type <- args[['type']]
+    if(
+      !is.character(type) || length(type) != 1 ||
+      is.na(type.int <- pmatch(type, valid.types))
+    )
+      stop2("Argument `type` must partial match one of ", deparse(valid.types))
+
+    args[['type']] <- valid.types[type.int]
+    args[['TYPE.INT']] <- type.int - 1L
+  }
+  if('start' %in% argnm || 'stop' %in% argnm) {
+    x.len <- length(args[['x']])
+    # Silently recycle start/stop like substr does
+    start <- rep(as.integer(args[['start']]), length.out=x.len)
+    stop <- rep(as.integer(args[['stop']]), length.out=x.len)
+    start[start < 1L] <- 1L
+    args[['start']] <- start
+    args[['stop']] <- stop
+    args[['X.LEN']] <- x.len
+  }
   # we might not have validated all, so we should be careful
   list2env(args, par.env)
 }

diff --git a/R/misc.R b/R/misc.R
@@ -495,4 +495,17 @@ size_buff_prot_test <- function() {
   res[['self']] <- match(res[['self']], addresses)
   res
 }
+#' Display Strings to Terminal
+#'
+#' Shortcut for [`writeLines`] with an additional terminating "ESC&#91;0m".
+#'
+#' @keywords internal
+#' @export
+#' @param ... character vectors to display.
+#' @param end character what to output after the primary inputs.
+#' @return whatever writeLines returns
+
+fwl <- function(..., end='<END>\033[0m') {
+  writeLines(c(..., end))
+}
 
diff --git a/R/nchar.R b/R/nchar.R
@@ -80,6 +80,7 @@ nchar_ctl <- function(
       "Argument `type` must partial match one of 'chars', 'width', or 'bytes'."
     )
 
+  ## modifies / creates NEW VARS in fun env
   VAL_IN_ENV(x=x, ctl=ctl, warn=warn)
   type <- valid.types[type.int]
   stripped <- strip_ctl(x, ctl=ctl, warn=warn)
@@ -92,13 +93,14 @@ nchar_ctl <- function(
 #' @rdname nchar_ctl
 
 nzchar_ctl <- function(x, keepNA=NA, ctl='all', warn=getOption('fansi.warn')) {
+  ## modifies / creates NEW VARS in fun env
   VAL_IN_ENV(x=x, ctl=ctl, warn=warn)
   if(!is.logical(keepNA)) keepNA <- as.logical(keepNA)
   if(length(keepNA) != 1L)
     stop("Argument `keepNA` must be a scalar logical.")
 
   term.cap.int <- 1L
-  .Call(FANSI_nzchar_esc, x, keepNA, warn, term.cap.int, ctl.int)
+  .Call(FANSI_nzchar_esc, x, keepNA, warn, term.cap.int, CTL.INT)
 }
 #' Control Sequence Aware Version of nchar
 #'

diff --git a/R/normalize.R b/R/normalize.R
@@ -85,8 +85,9 @@ normalize_state <- function(
   x, warn=getOption('fansi.warn'), term.cap=getOption('fansi.term.cap'),
   carry=getOption('fansi.carry', FALSE)
 ) {
+  ## modifies / creates NEW VARS in fun env
   VAL_IN_ENV(x=x, warn=warn, term.cap=term.cap, carry=carry)
-  .Call(FANSI_normalize_state, x, warn, term.cap.int, carry)
+  .Call(FANSI_normalize_state, x, warn, TERM.CAP.INT, carry)
 }
 # To reduce overhead of applying this in `strwrap_ctl`