From 339f7504a923c9b318325092a858db99c22b7258 Mon Sep 17 00:00:00 2001 From: brodieG Date: Mon, 11 Oct 2021 20:29:10 -0400 Subject: [PATCH 01/27] wordsmithing --- R/fansi-package.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fansi-package.R b/R/fansi-package.R index b021d9d5..704d8dac 100644 --- a/R/fansi-package.R +++ b/R/fansi-package.R @@ -165,7 +165,7 @@ #' Another form of interaction is when substrings produced by `fansi` are #' spliced with or into other substrings. By default `fansi` automatically #' terminates substrings it produces if they contain active formats or URLs. -#' This prevents the state to bleed into external strings, which is useful e.g. +#' This prevents the state bleeding into external strings, which is useful e.g. #' when arranging text in columns. We can allow the state to bleed into #' appended strings by setting `terminate = FALSE`. `carry` is unaffected by #' `terminate` as `fansi` records the ending SGR state prior to termination From 8a601513b236b3e6e07c55c9a5a11b6f603ae7c5 Mon Sep 17 00:00:00 2001 From: brodieG Date: Sat, 16 Oct 2021 19:19:47 -0400 Subject: [PATCH 02/27] initial add of replace funs, untested --- R/internal.R | 54 ++++++++++++++++++++++++--------- R/sgr.R | 10 +++---- R/substr2.R | 84 +++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 109 insertions(+), 39 deletions(-) diff --git a/R/internal.R b/R/internal.R index 3ad6f74b..faac1def 100644 --- a/R/internal.R +++ b/R/internal.R @@ -58,24 +58,27 @@ ctl_as_int <- function(x) .Call(FANSI_ctl_as_int, as.integer(x)) ## Common argument validation and conversion. Missing args okay. ## -## DANGER: will modify values in calling environment! Also may add `ctl.int` -## and `term.cap.int` to them. +## Converts common arguments to standardized forms if needed. +## +## DANGER: will modify values in calling environment! Also may add variables +## ending in `.int` like `ctl.int`, `term.cap.int`, and others to them. VAL_IN_ENV <- function(...) { call <- sys.call(-1) par.env <- parent.frame() stop2 <- function(...) stop(simpleError(paste0(..., collapse=""), call)) args <- list(...) + argnm <- names(args) if( !all( - names(args) %in% + argnm %in% c( 'x', 'warn', 'term.cap', 'ctl', 'normalize', 'carry', 'terminate', - 'tab.stops', 'tabs.as.spaces', 'strip.spaces' + 'tab.stops', 'tabs.as.spaces', 'strip.spaces', 'round' ) ) ) stop("Internal Error: some arguments to validate unknown") - if('x' %in% names(args)) { + if('x' %in% argnm) { x <- args[['x']] if(!is.character(x)) x <- as.character(args[['x']]) x <- enc2utf8(x) @@ -91,20 +94,20 @@ VAL_IN_ENV <- function(...) { ) args[['x']] <- x } - if('warn' %in% names(args)) { + if('warn' %in% argnm) { warn <- args[['warn']] if(!is.logical(warn)) warn <- as.logical(args[['warn']]) if(length(warn) != 1L || is.na(warn)) stop2("Argument `warn` must be TRUE or FALSE.") args[['warn']] <- warn } - if('normalize' %in% names(args)) { + if('normalize' %in% argnm) { normalize <- args[['normalize']] if(!isTRUE(normalize %in% c(FALSE, TRUE))) stop2("Argument `normalize` must be TRUE or FALSE.") args[['normalize']] <- as.logical(normalize) } - if('term.cap' %in% names(args)) { + if('term.cap' %in% argnm) { term.cap <- args[['term.cap']] if(!is.character(term.cap)) stop2("Argument `term.cap` must be character.") @@ -115,7 +118,7 @@ VAL_IN_ENV <- function(...) { ) args[['term.cap.int']] <- term.cap.int } - if('ctl' %in% names(args)) { + if('ctl' %in% argnm) { ctl <- args[['ctl']] if(!is.character(ctl)) stop2("Argument `ctl` must be character.") @@ -129,7 +132,7 @@ VAL_IN_ENV <- function(...) { } args[['ctl.int']] <- ctl.int } - if('carry' %in% names(args)) { + if('carry' %in% argnm) { carry <- args[['carry']] if(length(carry) != 1L) stop2("Argument `carry` must be scalar.") @@ -141,13 +144,13 @@ VAL_IN_ENV <- function(...) { if(is.logical(carry)) if(carry) carry <- "" else carry = NA_character_ args[['carry']] <- carry } - if('terminate' %in% names(args)) { + if('terminate' %in% argnm) { terminate <- args[['terminate']] if(!isTRUE(terminate %in% c(TRUE, FALSE))) stop2("Argument `terminate` must be TRUE or FALSE") terminate <- as.logical(terminate) } - if('tab.stops' %in% names(args)) { + if('tab.stops' %in% argnm) { tab.stops <- args[['tab.stops']] if( !is.numeric(tab.stops) || !length(tab.stops) || any(tab.stops < 1) || @@ -159,20 +162,43 @@ VAL_IN_ENV <- function(...) { ) args[['tab.stops']] <- as.integer(tab.stops) } - if('tabs.as.spaces' %in% names(args)) { + if('tabs.as.spaces' %in% argnm) { tabs.as.spaces <- args[['tabs.as.spaces']] if(!is.logical(tabs.as.spaces)) tabs.as.spaces <- as.logical(tabs.as.spaces) if(length(tabs.as.spaces) != 1L || is.na(tabs.as.spaces)) stop2("Argument `tabs.as.spaces` must be TRUE or FALSE.") args[['tabs.as.spaces']] <- tabs.as.spaces } - if('strip.spaces' %in% names(args)) { + if('strip.spaces' %in% argnm) { strip.spaces <- args[['strip.spaces']] if(!is.logical(strip.spaces)) strip.spaces <- as.logical(strip.spaces) if(length(strip.spaces) != 1L || is.na(strip.spaces)) stop2("Argument `strip.spaces` must be TRUE or FALSE.") args[['strip.spaces']] <- strip.spaces } + if('round', %in% argnm) { + valid.round <- c('start', 'stop', 'both', 'neither') + if( + !is.character(round) || length(round) != 1 || + is.na(round.int <- pmatch(round, valid.round)) + ) + stop("Argument `round` must partial match one of ", deparse(valid.round)) + args[['round']] <- valid.round['round.int'] + args[['round.int']] <- round.int + } + if('type' %in% argnm) { + valid.types <- c('chars', 'width') + type <- args[['type']] + if( + !is.character(type) || length(type) != 1 || + is.na(type.int <- pmatch(type, valid.types)) + ) + stop("Argument `type` must partial match one of ", deparse(valid.types)) + + args[['type']] <- valid.types[type.int] + args[['type.int']] <- type.int - 1L + } + # we might not have validated all, so we should be careful list2env(args, par.env) } diff --git a/R/sgr.R b/R/sgr.R index 000774ec..d857b5c7 100644 --- a/R/sgr.R +++ b/R/sgr.R @@ -133,11 +133,11 @@ has_sgr <- function(x, warn=getOption('fansi.warn')) #' OSC-anchored URLs until the end of the string and outputs the active state at #' the end of it. `close_state` produces the sequence that closes active SGR #' and OSC-anchored URLs at the end of the input string. If `normalize = FALSE` -#' (default), it will close SGRs with the reset code "ESC[0m", so it is only -#' interesting for closing SGRs if `normalize = TRUE`. Unlike `state_at_end` -#' and other functions `close_state` has no concept of `carry`: it will only -#' close state activate within an element that is still active at the end of -#' that element. +#' (default), it will emit the reset code "ESC[0m" if any SGR is present. It is +#' more interesting for closing SGRs if `normalize = TRUE`. Unlike +#' `state_at_end` and other functions `close_state` has no concept of `carry`: +#' it will only emit closing sequences for states activate within an element +#' that is still active at the end of that element. #' #' @export #' @inheritParams substr_ctl diff --git a/R/substr2.R b/R/substr2.R index 5d7500e2..5b05226d 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -126,6 +126,7 @@ #' active state closed to avoid it bleeding into other strings they may be #' prepended onto. See the "State Interactions" section of [`?fansi`][fansi] #' for details. +#' @param value a character vector or object that can be coerced to such. #' @return a character vector of the same length and with the same attributes as #' x (after possible coercion and re-encoding to UTF-8). #' @examples @@ -181,28 +182,11 @@ substr2_ctl <- function( carry=getOption('fansi.carry', FALSE), terminate=getOption('fansi.terminate', TRUE) ) { - VAL_IN_ENV( + VAL_IN_ENV( ## modifies / creates NEW VARS in fun env x=x, warn=warn, term.cap=term.cap, ctl=ctl, normalize=normalize, carry=carry, terminate=terminate, tab.stops=tab.stops, tabs.as.spaces=tabs.as.spaces ) - valid.round <- c('start', 'stop', 'both', 'neither') - if( - !is.character(round) || length(round) != 1 || - is.na(round.int <- pmatch(round, valid.round)) - ) - stop("Argument `round` must partial match one of ", deparse(valid.round)) - - round <- valid.round[round.int] - - valid.types <- c('chars', 'width') - if( - !is.character(type) || length(type) != 1 || - is.na(type.int <- pmatch(type, valid.types)) - ) - stop("Argument `type` must partial match one of ", deparse(valid.types)) - - type.m <- type.int - 1L x.len <- length(x) # Silently recycle start/stop like substr does @@ -216,19 +200,79 @@ substr2_ctl <- function( res[no.na] <- substr_ctl_internal( x[no.na], start=start[no.na], stop=stop[no.na], - type.int=type.m, + type.int=type.int, tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, term.cap.int=term.cap.int, round.start=round == 'start' || round == 'both', round.stop=round == 'stop' || round == 'both', - x.len=length(x), + x.len=x.len, ctl.int=ctl.int, normalize=normalize, carry=carry, terminate=terminate ) res[!no.na] <- NA_character_ res } +#' @rdname substr_ctl +#' @export +`substr_ctl<-` <- function( + x, start, stop, value, + warn=getOption('fansi.warn'), + term.cap=getOption('fansi.term.cap'), + ctl='all', normalize=getOption('fansi.normalize', FALSE), + carry=getOption('fansi.carry', FALSE), + terminate=getOption('fansi.terminate', TRUE) +) { + substr2_ctl( + x=x, start=start, stop=stop, warn=warn, term.cap=term.cap, ctl=ctl, + normalize=normalize, carry=carry, terminate=terminate + ) <- value + x +} +#' @rdname substr_ctl +#' @export + +`substr2_ctl<-` <- function( + x, start, stop, value, type='chars', round='start', + tabs.as.spaces=getOption('fansi.tabs.as.spaces'), + tab.stops=getOption('fansi.tab.stops'), + warn=getOption('fansi.warn'), + term.cap=getOption('fansi.term.cap'), + ctl='all', normalize=getOption('fansi.normalize', FALSE), + carry=getOption('fansi.carry', FALSE), + terminate=getOption('fansi.terminate', TRUE) +) { + VAL_IN_ENV( ## modifies / creates NEW VARS in fun env + x=x, warn=warn, term.cap=term.cap, ctl=ctl, normalize=normalize, + carry=carry, terminate=terminate, tab.stops=tab.stops, + tabs.as.spaces=tabs.as.spaces, round=round + ) + # Need to translate start/stop and remap round + round.a <- switch( + round, start='stop', stop='start', both='neither', neither='both' + ) + round.b <- round + + # Handle value termination + value <- enc2utf8(as.character(value)) + + x[] <- paste0( + substr_ctl_internal( + x, 1L, start - 1L, type.int=type.int, round=round.a, + tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, + term.cap=term.cap.int, ctl.int=ctl.int, normalize=normalize, + carry=carry, terminate=terminate + ), + rep(value, length.out=length(x)), + substr_ctl_internal( + x, stop + 1L, .Machine[['integer.max']], type=type, round=round.b, + tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, + term.cap=term.cap, ctl=ctl, normalize=normalize, carry=carry, + terminate=terminate + ) + ) + x +} #' SGR Control Sequence Aware Version of substr #' #' These functions are deprecated in favor of the [`_ctl` flavors][substr_ctl]. From c2b280396b75249db7d26056691dd579ca38c907 Mon Sep 17 00:00:00 2001 From: brodieG Date: Sun, 17 Oct 2021 08:53:56 -0400 Subject: [PATCH 03/27] update notes --- DEVNOTES.Rmd | 373 +++++++++++++++++++++++++++++---------------------- 1 file changed, 216 insertions(+), 157 deletions(-) diff --git a/DEVNOTES.Rmd b/DEVNOTES.Rmd index 2bd26407..e51d7146 100644 --- a/DEVNOTES.Rmd +++ b/DEVNOTES.Rmd @@ -26,9 +26,6 @@ These are internal developer notes. keep the origin always, and probably also need an alloc_len to know what size the previously allocated buffer is. -* Confirm invalid UTF-8 assumptions (do we consume the lead byte count, just on - byte?). Right now we consume the lead byte bytes. - * Add test that confirms were not messing up the integer overflow check for the start indices. Not sure how to do this. @@ -69,6 +66,12 @@ These are internal developer notes. ## Done +* Confirm invalid UTF-8 assumptions (do we consume the lead byte count, just on + byte?). Right now we consume the lead byte bytes. + +We checked way after the fact. Claim in logs this was done in 8ee665a, and code +seems to back it up. + * Is it okay to let `has_ctl` re-encode to UTF-8? Might be weird if it doesn't but `strip_ctl` does. Is it possible that after re-encoding something that wasn't an escape becomes one or vice versa? Yes, illegal bytes could get @@ -142,7 +145,8 @@ We combined the SGR and URL warnings. * Normalize must explicitly close and reopen? Probably no, as so long as at the * Make sure `strwrap` and others consume initial SGR (substr does so)? It looks - like stwrap doesn't: `strwrap_ctl(c("\033[31mhello world", "\033[42mgoodbye moon"), carry=TRUE, 10)` + like stwrap doesn't: `strwrap_ctl(c("\033[31mhello world", "\033[42mgoodbye + moon"), carry=TRUE, 10)` Now they do. @@ -245,162 +249,16 @@ unsigned one (I think). Currently takes STRSXP. A little awkward though, but we did it that way because we had the index. -## `find_esc` - -We want something that: - -* finds the start of the escape -* attempts to read it -* warns if it is a malformed illegal one -* returns the number of bytes in the escape - -Then, - -* has_ctl reports whether there was one. -* strip_ctl strips it. -* nzchar_ctl keeps looking for characters. - -## Wrap Strip Space - -I think the logic needed right now is to make sure that sequences of -tabs/spaces/newlines are replaced correctly: - -* " " -> " " -* ". " -> ". " -* " \n " -> " " -* " \n \n " -> "\n\n" - -So what do we do if an escape interrupts? - -* If another non-white space after escape? - * Nothing, everything is fine -* If another white space sequence after escape? - * Scan forward until find the first non-SGR non-whitespace (be sure to - read ESC with `FANSI_read_next`, accumulting any SGR ESC). - * Compute correct continuation as currently, and write it along with the - rest of the beginning of the string. - * Write out the SGR - * Other CTLs will not get moved, this should be documented. - -Odd cases to think about: +## `substr<-` -* SGR between two spaces following a periods. -* SGR between first and last newline. +Main question here is whether leading and trailing SGR are selected in the +replacement: -Special cases are: + substr_ctl("A\033[31mhello\033[42mB", 2, 6) <- "\033[32mgoodbye\033[0m" -* Two spaces after a period - * ESC between two spaces, stays there, otherwise at end. -* `n` newlines - * After first stays after first - * After second stays at end - -If there are different ESCs at different spots, then apply above rules to each. - -The above is too complicated, just put it at very end in all cases. - -## Grapheme Boundaries - -So we need to modify `readUTF8` to know when to continue reading when the next -char being zero width doesn't do it. - -* RI flags (even odd) (what about england/wales/scotland?) - -Exploring whether we can get away with a cheap implementation. - -Country flags are fine since each Regional Indicator (RI) on its own is 1 width, -but together they are two (ah, but we can't break in the middle). - -Prepend we probably can't handle, as they too are zero width (some are anyway). - -Do not break before extending characters. - -* Grapheme extend (non spacing and enclosing marks, ZWNJ) -* Emoji modifier - -Do not break before modifiers: - -* Emoji component 1F9B0..1F9B3 hair type -* Emoji modifier 1F3FB..1F3FF skin color - -Do no break within emoji modifier sequences - -In practice, I think this means: - -* Treat modifier/component as width zero -* Treat anything after a ZWJ as width zero -* Handle RI/flags U+1F1E6...U+1F1FF - -This should be usually right so long as the sequences are well formed. - -The pain is flags because we don't want to let substring in width mode cut in -the middle. But maybe that's okay? All the rounding business should work if we -just force the advance. We just need to check whether the next thing is as flag -and just advance extra. - -## Crayon Compatibility - -### Updating Crayon - -We could modify the code to add in addition to `st$open` and `st$close` a -`st$closedby`, where the last would be transformed into a regex that matches any -closing sequence. This will work even nested as in e.g. - -red('hello ', red('wor\033[0mld') , ' yo') - -Result would be: - - \033[32mhello \033[31mwo\033[0\033[32m\033[31mld\033[39m\033[32m yo\033[39m - -What happens is that the inner step is done first, adding its color after -`\033[0m`, but then the outer step happens, and adds its color in between the -`\033[0m` and the `\033[31m` just added, so the inner step dominates, which is -exactly what we want. - -Issue are: - -* We need to use regular expressions, not fixed, so might be a little slower. -* It will not deal with things like "\033[1;31m". - -## Interaction - -Is this the desired outcome: - - > strwrap_ctl("hello world\033[31m", 12) - [1] "hello world" - -Yes, if "isolate" is true, but if not we should emit the ending style. - -What was the issue with recycling carry? That you have to pick whether to wrap -your own carry, or whether take the external one? Indeed, what's the right -answer there? Ah, that's the ambiguity, carry and inherit are really distinct -but potentially mutually exclusive (where inherit means take a previously known -state). Inherit matters also beyond normalized mode as e.g. when we take -substrings we start the string with all the known states. - -Inherit doesn't really make sense for `strwrap`? I guess it does to begin, and -then carry does the rest (although `strwrap` always auto-carries per element, so -that's a different type of carry). - -So: - -* Inherit, a recycled vector of starting styles. -* Carry, TRUE or FALSE, or a single style string to start with. - * Mutually exclusive with inherit. -* Isolate, "start", "end", "both", "none/neither". - * Orthogonal to all the others. - * Both Carry and Inherit will be emitted? Or is inherit just so we know - what to close with isolate in normalized mode? - -Leaning more and more on it being user responsibility to handle interactions -with external strings by pre-pasting either the style-at-end of other strings, -or the required closing tags. - -Does `isolate` then just become `terminate`? What if we change our mind in the -future about wanting to terminate the beginning? - -So we're left with just `carry` and terminate, and instructions on how to do -things manually. +First reaction is that all the leading SGR should be consumed. The tricky +business starts when we have other zero width stuff, like an accent, or an emoji +ZWJ sequence with a color change in the middle. ## Consumption of Zero Width @@ -695,6 +553,57 @@ So we will walk the string until we pass all the cut points. * Char length in bytes (use 0 for sub-elements of UTF8 sequences)? * Display width (0 for ANSI, and 0 for sub-elements) +## substr_ctl<- (replace) + +Replacement seek should include all zero width leading? + +What about when `type=width`. + +* Leading: include all zero width. +* Trailing: include all zero width up to first CSI? + +Follow the round parameter when substrings requested in middle. + +Do we need to reconcile the behavior of `substr` and `substr<-`. Currently the +behavior is to include the CTLs in both + + x <- sprintf("he\033[31mllo%s\033[42m world", "\u0301") + substr2_ctl(x, 3, 5, terminate=FALSE, type='width') + y <- sprintf("he\033[31m%sllo%s\033[42m world", "\u200c", "\u0301") + substr2_ctl(y, 3, 5, terminate=FALSE, type='width') + +Ah, but it's as bit tricky to interpret because active styles will naturally be +copied over into the output so it's hard to tell exactly what's going on. + +The most natural thing to do is use the same logic in both ends, keep accruing +all the zero widths. But if we do that and the insert, we lose the format +potentially affecting the rest of the string? + + x <- sprintf("ab\033[31m%sc", "\u200C") + substr(x, 2, 2) <- y + +Versus: + + y <- 'X' + paste0( + substr2_ctl(x, 1, 1, type='width'), + y, + substr2_ctl(x, 3, nchar(x), type='width') + ) + +What happens if we have "ab\033[31m\u200Cc"? + +In that case the zero widths get consumed. More generally, how do you treat the +prevailing styles? Do we need a terminate parameter? Probably, and that's +perhaps the way we do this. By default, the semantics are to terminate, for +better or worse. In the terminated case, styles just end and resume on other +side of insertion. But what if we don't terminate? Styles will bleed through +on the beginning. What about styles that change in what is being inserted? +Are those completely lost? Or is that handled with carry? + +Maybe we just implement as the paste business? + + ## Benchmarks ### Strip @@ -814,3 +723,153 @@ microbenchmark(times=1, ``` Just doesn't seem worth the hassle even if we could get better. + +# Old notes for reference + +Just so we can remember what we were thinking back in the day. Many of the docs +above should probably be moved into this section. + +## Interaction + +Is this the desired outcome: + + > strwrap_ctl("hello world\033[31m", 12) + [1] "hello world" + +Yes, if "isolate" is true, but if not we should emit the ending style. + +What was the issue with recycling carry? That you have to pick whether to wrap +your own carry, or whether take the external one? Indeed, what's the right +answer there? Ah, that's the ambiguity, carry and inherit are really distinct +but potentially mutually exclusive (where inherit means take a previously known +state). Inherit matters also beyond normalized mode as e.g. when we take +substrings we start the string with all the known states. + +Inherit doesn't really make sense for `strwrap`? I guess it does to begin, and +then carry does the rest (although `strwrap` always auto-carries per element, so +that's a different type of carry). + +So: + +* Inherit, a recycled vector of starting styles. +* Carry, TRUE or FALSE, or a single style string to start with. + * Mutually exclusive with inherit. +* Isolate, "start", "end", "both", "none/neither". + * Orthogonal to all the others. + * Both Carry and Inherit will be emitted? Or is inherit just so we know + what to close with isolate in normalized mode? + +Leaning more and more on it being user responsibility to handle interactions +with external strings by pre-pasting either the style-at-end of other strings, +or the required closing tags. + +Does `isolate` then just become `terminate`? What if we change our mind in the +future about wanting to terminate the beginning? + +So we're left with just `carry` and terminate, and instructions on how to do +things manually. + + +## Wrap Strip Space + +I think the logic needed right now is to make sure that sequences of +tabs/spaces/newlines are replaced correctly: + +* " " -> " " +* ". " -> ". " +* " \n " -> " " +* " \n \n " -> "\n\n" + +So what do we do if an escape interrupts? + +* If another non-white space after escape? + * Nothing, everything is fine +* If another white space sequence after escape? + * Scan forward until find the first non-SGR non-whitespace (be sure to + read ESC with `FANSI_read_next`, accumulting any SGR ESC). + * Compute correct continuation as currently, and write it along with the + rest of the beginning of the string. + * Write out the SGR + * Other CTLs will not get moved, this should be documented. + +Odd cases to think about: + +* SGR between two spaces following a periods. +* SGR between first and last newline. + +Special cases are: + +* Two spaces after a period + * ESC between two spaces, stays there, otherwise at end. +* `n` newlines + * After first stays after first + * After second stays at end + +If there are different ESCs at different spots, then apply above rules to each. + +The above is too complicated, just put it at very end in all cases. + +## Grapheme Boundaries + +So we need to modify `readUTF8` to know when to continue reading when the next +char being zero width doesn't do it. + +* RI flags (even odd) (what about england/wales/scotland?) + +Exploring whether we can get away with a cheap implementation. + +Country flags are fine since each Regional Indicator (RI) on its own is 1 width, +but together they are two (ah, but we can't break in the middle). + +Prepend we probably can't handle, as they too are zero width (some are anyway). + +Do not break before extending characters. + +* Grapheme extend (non spacing and enclosing marks, ZWNJ) +* Emoji modifier + +Do not break before modifiers: + +* Emoji component 1F9B0..1F9B3 hair type +* Emoji modifier 1F3FB..1F3FF skin color + +Do no break within emoji modifier sequences + +In practice, I think this means: + +* Treat modifier/component as width zero +* Treat anything after a ZWJ as width zero +* Handle RI/flags U+1F1E6...U+1F1FF + +This should be usually right so long as the sequences are well formed. + +The pain is flags because we don't want to let substring in width mode cut in +the middle. But maybe that's okay? All the rounding business should work if we +just force the advance. We just need to check whether the next thing is as flag +and just advance extra. + +## Crayon Compatibility + +### Updating Crayon + +We could modify the code to add in addition to `st$open` and `st$close` a +`st$closedby`, where the last would be transformed into a regex that matches any +closing sequence. This will work even nested as in e.g. + +red('hello ', red('wor\033[0mld') , ' yo') + +Result would be: + + \033[32mhello \033[31mwo\033[0\033[32m\033[31mld\033[39m\033[32m yo\033[39m + +What happens is that the inner step is done first, adding its color after +`\033[0m`, but then the outer step happens, and adds its color in between the +`\033[0m` and the `\033[31m` just added, so the inner step dominates, which is +exactly what we want. + +Issue are: + +* We need to use regular expressions, not fixed, so might be a little slower. +* It will not deal with things like "\033[1;31m". + + From 46b1875611f3c0668e6e465eaed905d1bfd1e021 Mon Sep 17 00:00:00 2001 From: brodieG Date: Sun, 17 Oct 2021 08:55:13 -0400 Subject: [PATCH 04/27] validation/regressions --- R/internal.R | 21 +++++++++++++---- R/sgr.R | 15 ++++++++++-- R/substr2.R | 64 ++++++++++++++++++++++++++++++++-------------------- 3 files changed, 68 insertions(+), 32 deletions(-) diff --git a/R/internal.R b/R/internal.R index faac1def..5c65aa79 100644 --- a/R/internal.R +++ b/R/internal.R @@ -74,7 +74,8 @@ VAL_IN_ENV <- function(...) { argnm %in% c( 'x', 'warn', 'term.cap', 'ctl', 'normalize', 'carry', 'terminate', - 'tab.stops', 'tabs.as.spaces', 'strip.spaces', 'round' + 'tab.stops', 'tabs.as.spaces', 'strip.spaces', 'round', 'type', + 'start', 'stop' ) ) ) stop("Internal Error: some arguments to validate unknown") @@ -176,13 +177,14 @@ VAL_IN_ENV <- function(...) { stop2("Argument `strip.spaces` must be TRUE or FALSE.") args[['strip.spaces']] <- strip.spaces } - if('round', %in% argnm) { + if('round' %in% argnm) { valid.round <- c('start', 'stop', 'both', 'neither') + round <- args[['round']] if( !is.character(round) || length(round) != 1 || is.na(round.int <- pmatch(round, valid.round)) ) - stop("Argument `round` must partial match one of ", deparse(valid.round)) + stop2("Argument `round` must partial match one of ", deparse(valid.round)) args[['round']] <- valid.round['round.int'] args[['round.int']] <- round.int } @@ -193,12 +195,21 @@ VAL_IN_ENV <- function(...) { !is.character(type) || length(type) != 1 || is.na(type.int <- pmatch(type, valid.types)) ) - stop("Argument `type` must partial match one of ", deparse(valid.types)) + stop2("Argument `type` must partial match one of ", deparse(valid.types)) args[['type']] <- valid.types[type.int] args[['type.int']] <- type.int - 1L } - + if('start' %in% argnm || 'stop' %in% argnm) { + x.len <- length(args[['x']]) + # Silently recycle start/stop like substr does + start <- rep(as.integer(args[['start']]), length.out=x.len) + stop <- rep(as.integer(args[['stop']]), length.out=x.len) + start[start < 1L] <- 1L + args[['start']] <- start + args[['stop']] <- stop + args[['x.lane']] <- x.len + } # we might not have validated all, so we should be careful list2env(args, par.env) } diff --git a/R/sgr.R b/R/sgr.R index d857b5c7..45b86188 100644 --- a/R/sgr.R +++ b/R/sgr.R @@ -161,7 +161,6 @@ state_at_end <- function( .Call( FANSI_state_at_end, x, - 0L, # character type warn, term.cap.int, ctl.int, @@ -169,7 +168,19 @@ state_at_end <- function( carry ) } - +## R-level carry for functions that do not implement it internally in C +## +## Arguments should already have been processed by VAL_IN_ENV + +carry_internal <- function(x, warn, term.cap.int, ctl.int, normalize, carry) { + if(!is.na(carry)) { + ends <- .Call( + FANSI_state_at_end, x, warn, term.cap.int, ctl.int, normalize, carry + ) + x <- paste0(c(carry, ends[-length(ends)]), x) + } + x +} # Given an SGR, compute the sequence that closes it #' @export diff --git a/R/substr2.R b/R/substr2.R index 5b05226d..3ae55b6c 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -47,6 +47,13 @@ #' directly from Gábor Csárdi's `crayon` package, although the implementation of #' the calculation is different. #' +#' Replacement functions are implemented as two substring operations to select +#' the beginning and end of the final string, and a `paste` operation to stick +#' all the pieces back together. The `carry` parameter is applied separately to +#' the `value` and to the `x` parameter. Styles in `value` will only carry to +#' substrings in the result that were originally part of `value`, and vice +#' versa. +#' #' @note Non-ASCII strings are converted to and returned in UTF-8 encoding. #' Width calculations will not work properly in R < 3.2.2. #' @note If `stop` < `start`, the return value is always an empty string. @@ -118,7 +125,7 @@ #' @param carry TRUE, FALSE, or a scalar string, controls whether active SGR #' present at the end of an input vector element is carried into the next #' vector element. If FALSE each vector element is interpreted as if there -#' were no active state when they begin. If character, then the active +#' were no active state when it begins. If character, then the active #' state at the end of the `carry` string is carried into the first element of #' `x`. See the "State Interactions" section of [`?fansi`][fansi] for #' details. @@ -185,16 +192,9 @@ substr2_ctl <- function( VAL_IN_ENV( ## modifies / creates NEW VARS in fun env x=x, warn=warn, term.cap=term.cap, ctl=ctl, normalize=normalize, carry=carry, terminate=terminate, tab.stops=tab.stops, - tabs.as.spaces=tabs.as.spaces + tabs.as.spaces=tabs.as.spaces, type=type, round=round, + start=start, stop=stop ) - x.len <- length(x) - - # Silently recycle start/stop like substr does - - start <- rep(as.integer(start), length.out=x.len) - stop <- rep(as.integer(stop), length.out=x.len) - start[start < 1L] <- 1L - res <- x no.na <- !(is.na(x) | is.na(start & stop)) @@ -245,7 +245,8 @@ substr2_ctl <- function( VAL_IN_ENV( ## modifies / creates NEW VARS in fun env x=x, warn=warn, term.cap=term.cap, ctl=ctl, normalize=normalize, carry=carry, terminate=terminate, tab.stops=tab.stops, - tabs.as.spaces=tabs.as.spaces, round=round + tabs.as.spaces=tabs.as.spaces, round=round, start=start, stop=stop, + type=type ) # Need to translate start/stop and remap round round.a <- switch( @@ -253,22 +254,37 @@ substr2_ctl <- function( ) round.b <- round - # Handle value termination + # Handle value termination, this is not very efficient due to manual carry, + # etc. value <- enc2utf8(as.character(value)) - + if(terminate) { + value <- carry_internal( + value, warn=warn, term.cap.int=term.cap.int, ctl.int=ctl.int, + normalize=normalize, carry=carry + ) + value <- paste0( + value, + close_state(state_at_end(value), normalize=normalize) + ) + } + # Actual replacement operation as substr/paste x[] <- paste0( substr_ctl_internal( - x, 1L, start - 1L, type.int=type.int, round=round.a, + x, 1L, start - 1L, type.int=type.int, + round.start=round.a == 'start' || round.a == 'both', + round.stop=round.a == 'stop' || round.a == 'both', tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, - term.cap=term.cap.int, ctl.int=ctl.int, normalize=normalize, + term.cap.int=term.cap.int, ctl.int=ctl.int, normalize=normalize, carry=carry, terminate=terminate ), rep(value, length.out=length(x)), substr_ctl_internal( - x, stop + 1L, .Machine[['integer.max']], type=type, round=round.b, + x, stop + 1L, .Machine[['integer.max']], type.int=type.int, + round.start=round.b == 'start' || round.b == 'both', + round.stop=round.b == 'stop' || round.b == 'both', tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, - term.cap=term.cap, ctl=ctl, normalize=normalize, carry=carry, - terminate=terminate + term.cap.int=term.cap.int, ctl.int=ctl.int, normalize=normalize, + carry=carry, terminate=terminate ) ) x @@ -333,19 +349,17 @@ substr_ctl_internal <- function( if(tabs.as.spaces) x <- .Call(FANSI_tabs_as_spaces, x, tab.stops, warn, term.cap.int, ctl.int) - res <- character(x.len) + res <- character(length(x)) s.s.valid <- stop >= start & stop # If we want to carry, we'll do this manually as too much work to try to do it # in C given the current structure using ordered indices into each string. # Do before `unique` as this to equal strings may become different. - if(!is.na(carry)) { - ends <- .Call( - FANSI_state_at_end, x, warn, term.cap.int, ctl.int, normalize, carry - ) - x <- paste0(c(carry, ends[-length(ends)]), x) - } + ends <- carry_internal( + x, warn=warn, term.cap.int=term.cap.int, ctl.int=ctl.int, + normalize=normalize, carry=carry + ) # We compute style at each start and stop position by getting all those # positions into a vector and then ordering them by position, keeping track of # original order and whether they are starting or ending positions (affects From 976c95fb7115ae272e8298290f4a110eddc4bbdf Mon Sep 17 00:00:00 2001 From: brodieG Date: Sun, 17 Oct 2021 18:53:50 -0400 Subject: [PATCH 05/27] don't allow overshoot --- R/substr2.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/substr2.R b/R/substr2.R index 3ae55b6c..f345c1ef 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -254,8 +254,10 @@ substr2_ctl <- function( ) round.b <- round - # Handle value termination, this is not very efficient due to manual carry, - # etc. + # Adjust `stop` to be no longer than end of string + nc <- nchar_ctl(x, type=type, ctl=ctl, warn=warn) + stop <- pmin(stop, nc) + value <- enc2utf8(as.character(value)) if(terminate) { value <- carry_internal( From 1ba72394a6d120db49971ba996ad03d133d55ef4 Mon Sep 17 00:00:00 2001 From: brodieG Date: Sun, 17 Oct 2021 18:56:08 -0400 Subject: [PATCH 06/27] trim 'value' to replacement size --- R/substr2.R | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/R/substr2.R b/R/substr2.R index f345c1ef..c155dfd9 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -259,16 +259,7 @@ substr2_ctl <- function( stop <- pmin(stop, nc) value <- enc2utf8(as.character(value)) - if(terminate) { - value <- carry_internal( - value, warn=warn, term.cap.int=term.cap.int, ctl.int=ctl.int, - normalize=normalize, carry=carry - ) - value <- paste0( - value, - close_state(state_at_end(value), normalize=normalize) - ) - } + # Actual replacement operation as substr/paste x[] <- paste0( substr_ctl_internal( @@ -279,13 +270,21 @@ substr2_ctl <- function( term.cap.int=term.cap.int, ctl.int=ctl.int, normalize=normalize, carry=carry, terminate=terminate ), - rep(value, length.out=length(x)), + substr_ctl_internal( + rep(value, length.out=length(x)), 1L, stop - start + 1L, + type.int=type.int, + round.start=round == 'start' || round == 'both', + round.stop=round == 'stop' || round == 'both', + tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, + term.cap.int=term.cap.int, ctl.int=ctl.int, normalize=normalize, + carry=carry, terminate=terminate + ), substr_ctl_internal( x, stop + 1L, .Machine[['integer.max']], type.int=type.int, round.start=round.b == 'start' || round.b == 'both', round.stop=round.b == 'stop' || round.b == 'both', tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, - term.cap.int=term.cap.int, ctl.int=ctl.int, normalize=normalize, + term.cap.int=term.cap.int, ctl.int=ctl.int, normalize=normalize, carry=carry, terminate=terminate ) ) From 5c0638dda139461c0b370b8042faec044a3d02f9 Mon Sep 17 00:00:00 2001 From: brodieG Date: Sun, 17 Oct 2021 19:27:25 -0400 Subject: [PATCH 07/27] DOC --- NAMESPACE | 2 + man/fansi.Rd | 2 +- man/has_sgr.Rd | 28 +++++++ man/nchar_sgr.Rd | 52 +++++++++++++ man/state_at_end.Rd | 10 +-- man/strip_sgr.Rd | 32 ++++++++ man/strsplit_sgr.Rd | 97 +++++++++++++++++++++++ man/strtrim_sgr.Rd | 73 ++++++++++++++++++ man/strwrap_sgr.Rd | 131 +++++++++++++++++++++++++++++++ man/substr_ctl.Rd | 37 +++++++++ man/substr_sgr.Rd | 107 ++++++++++++++++++++++++++ man/to_html.Rd | 182 ++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 747 insertions(+), 6 deletions(-) create mode 100644 man/has_sgr.Rd create mode 100644 man/nchar_sgr.Rd create mode 100644 man/strip_sgr.Rd create mode 100644 man/strsplit_sgr.Rd create mode 100644 man/strtrim_sgr.Rd create mode 100644 man/strwrap_sgr.Rd create mode 100644 man/substr_sgr.Rd create mode 100644 man/to_html.Rd diff --git a/NAMESPACE b/NAMESPACE index 9db0bcbf..f75d8b9c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +export("substr2_ctl<-") +export("substr_ctl<-") export(close_state) export(fansi_lines) export(has_ctl) diff --git a/man/fansi.Rd b/man/fansi.Rd index f38f7e1f..d0019e86 100644 --- a/man/fansi.Rd +++ b/man/fansi.Rd @@ -162,7 +162,7 @@ situation state from each line should bleed into subsequent ones. Setting Another form of interaction is when substrings produced by \code{fansi} are spliced with or into other substrings. By default \code{fansi} automatically terminates substrings it produces if they contain active formats or URLs. -This prevents the state to bleed into external strings, which is useful e.g. +This prevents the state bleeding into external strings, which is useful e.g. when arranging text in columns. We can allow the state to bleed into appended strings by setting \code{terminate = FALSE}. \code{carry} is unaffected by \code{terminate} as \code{fansi} records the ending SGR state prior to termination diff --git a/man/has_sgr.Rd b/man/has_sgr.Rd new file mode 100644 index 00000000..c651c1f6 --- /dev/null +++ b/man/has_sgr.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sgr.R +\name{has_sgr} +\alias{has_sgr} +\title{Check for Presence of Control Sequences} +\usage{ +has_sgr(x, warn = getOption("fansi.warn")) +} +\arguments{ +\item{x}{a character vector or object that can be coerced to such.} + +\item{warn}{TRUE (default) or FALSE, whether to warn when potentially +problematic \emph{Control Sequences} are encountered. These could cause the +assumptions \code{fansi} makes about how strings are rendered on your display +to be incorrect, for example by moving the cursor (see \code{\link[=fansi]{?fansi}}). +If the problematic sequence is a tab, you can use the \code{tabs.as.spaces} +parameter on functions that have it, or the \code{tabs_as_spaces} function, to +turn the tabs to spaces and resolve the warning that way.} +} +\value{ +logical of same length as \code{x}; NA values in \code{x} result in NA values +in return +} +\description{ +This function is deprecated in favor of the \link[=has_ctl]{\verb{_ctl} flavor}. It +checks for CSI SGR and OSC-anchored URL sequences. +} +\keyword{internal} diff --git a/man/nchar_sgr.Rd b/man/nchar_sgr.Rd new file mode 100644 index 00000000..dc3313ef --- /dev/null +++ b/man/nchar_sgr.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nchar.R +\name{nchar_sgr} +\alias{nchar_sgr} +\alias{nzchar_sgr} +\title{Control Sequence Aware Version of nchar} +\usage{ +nchar_sgr( + x, + type = "chars", + allowNA = FALSE, + keepNA = NA, + warn = getOption("fansi.warn") +) + +nzchar_sgr(x, keepNA = NA, warn = getOption("fansi.warn")) +} +\arguments{ +\item{x}{a character vector or object that can be coerced to such.} + +\item{type}{character(1L) partial matching \code{c("chars", "width")}, although +\code{type="width"} only works correctly with R >= 3.2.2. See +\code{\link[base:nchar]{?nchar}}. With "width", the results might be affected by +locale changes, Unicode database updates, and logic changes for processing +of complex graphemes. Generally you should not rely on a specific output +e.g. by embedding it in unit tests. For the most part \code{fansi} (currently) +uses the internals of \code{base::nchar(type='width')}, but there are exceptions +and this may change in the future.} + +\item{allowNA}{logical: should \code{NA} be returned for invalid + multibyte strings or \code{"bytes"}-encoded strings (rather than + throwing an error)?} + +\item{keepNA}{logical: should \code{NA} be returned when + \code{x} is \code{\link[base]{NA}}? If false, \code{nchar()} returns + \code{2}, as that is the number of printing characters used when + strings are written to output, and \code{nzchar()} is \code{TRUE}. The + default for \code{nchar()}, \code{NA}, means to use \code{keepNA = TRUE} + unless \code{type} is \code{"width"}.} + +\item{warn}{TRUE (default) or FALSE, whether to warn when potentially +problematic \emph{Control Sequences} are encountered. These could cause the +assumptions \code{fansi} makes about how strings are rendered on your display +to be incorrect, for example by moving the cursor (see \code{\link[=fansi]{?fansi}}). +If the problematic sequence is a tab, you can use the \code{tabs.as.spaces} +parameter on functions that have it, or the \code{tabs_as_spaces} function, to +turn the tabs to spaces and resolve the warning that way.} +} +\description{ +These functions are deprecated in favor of the \link[=nchar_ctl]{\verb{_ctl} flavors}. +} +\keyword{internal} diff --git a/man/state_at_end.Rd b/man/state_at_end.Rd index 3b86c9f2..461b22f3 100644 --- a/man/state_at_end.Rd +++ b/man/state_at_end.Rd @@ -61,11 +61,11 @@ character vector same length as \code{x}. OSC-anchored URLs until the end of the string and outputs the active state at the end of it. \code{close_state} produces the sequence that closes active SGR and OSC-anchored URLs at the end of the input string. If \code{normalize = FALSE} -(default), it will close SGRs with the reset code "ESC[0m", so it is only -interesting for closing SGRs if \code{normalize = TRUE}. Unlike \code{state_at_end} -and other functions \code{close_state} has no concept of \code{carry}: it will only -close state activate within an element that is still active at the end of -that element. +(default), it will emit the reset code "ESC[0m" if any SGR is present. It is +more interesting for closing SGRs if \code{normalize = TRUE}. Unlike +\code{state_at_end} and other functions \code{close_state} has no concept of \code{carry}: +it will only emit closing sequences for states activate within an element +that is still active at the end of that element. } \examples{ x <- c("\033[44mhello", "\033[33mworld") diff --git a/man/strip_sgr.Rd b/man/strip_sgr.Rd new file mode 100644 index 00000000..159aff0b --- /dev/null +++ b/man/strip_sgr.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sgr.R +\name{strip_sgr} +\alias{strip_sgr} +\title{Strip Control Sequences} +\usage{ +strip_sgr(x, warn = getOption("fansi.warn")) +} +\arguments{ +\item{x}{a character vector or object that can be coerced to such.} + +\item{warn}{TRUE (default) or FALSE, whether to warn when potentially +problematic \emph{Control Sequences} are encountered. These could cause the +assumptions \code{fansi} makes about how strings are rendered on your display +to be incorrect, for example by moving the cursor (see \code{\link[=fansi]{?fansi}}). +If the problematic sequence is a tab, you can use the \code{tabs.as.spaces} +parameter on functions that have it, or the \code{tabs_as_spaces} function, to +turn the tabs to spaces and resolve the warning that way.} +} +\value{ +character vector of same length as x with ANSI escape sequences +stripped +} +\description{ +This function is deprecated in favor of the \link[=strip_ctl]{\verb{_ctl} flavor}. It +strips for CSI SGR and OSC-anchored URL sequences. +} +\examples{ +## convenience function, same as `strip_ctl(ctl=c('sgr', 'url'))` +strip_sgr(string) +} +\keyword{internal} diff --git a/man/strsplit_sgr.Rd b/man/strsplit_sgr.Rd new file mode 100644 index 00000000..69f32555 --- /dev/null +++ b/man/strsplit_sgr.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/strsplit.R +\name{strsplit_sgr} +\alias{strsplit_sgr} +\title{Check for Presence of Control Sequences} +\usage{ +strsplit_sgr( + x, + split, + fixed = FALSE, + perl = FALSE, + useBytes = FALSE, + warn = getOption("fansi.warn"), + term.cap = getOption("fansi.term.cap"), + normalize = getOption("fansi.normalize", FALSE), + carry = getOption("fansi.carry", FALSE), + terminate = getOption("fansi.terminate", TRUE) +) +} +\arguments{ +\item{x}{a character vector, or, unlike \code{\link[base:strsplit]{base::strsplit}} an object that can +be coerced to character.} + +\item{split}{ + character vector (or object which can be coerced to such) + containing \link[base]{regular expression}(s) (unless \code{fixed = TRUE}) + to use for splitting. If empty matches occur, in particular if + \code{split} has length 0, \code{x} is split into single characters. + If \code{split} has length greater than 1, it is re-cycled along + \code{x}. + } + +\item{fixed}{ + logical. If \code{TRUE} match \code{split} exactly, otherwise + use regular expressions. Has priority over \code{perl}. + } + +\item{perl}{logical. Should Perl-compatible regexps be used?} + +\item{useBytes}{logical. If \code{TRUE} the matching is done + byte-by-byte rather than character-by-character, and inputs with + marked encodings are not converted. This is forced (with a warning) + if any input is found which is marked as \code{"bytes"} + (see \code{\link[base]{Encoding}}).} + +\item{warn}{TRUE (default) or FALSE, whether to warn when potentially +problematic \emph{Control Sequences} are encountered. These could cause the +assumptions \code{fansi} makes about how strings are rendered on your display +to be incorrect, for example by moving the cursor (see \code{\link[=fansi]{?fansi}}). +If the problematic sequence is a tab, you can use the \code{tabs.as.spaces} +parameter on functions that have it, or the \code{tabs_as_spaces} function, to +turn the tabs to spaces and resolve the warning that way.} + +\item{term.cap}{character a vector of the capabilities of the terminal, can +be any combination of "bright" (SGR codes 90-97, 100-107), "256" (SGR codes +starting with "38;5" or "48;5"), "truecolor" (SGR codes starting with +"38;2" or "48;2"), and "all". Changing this parameter changes how \code{fansi} +interprets escape sequences, so you should ensure that it matches your +terminal capabilities. See \code{\link{term_cap_test}} for details. "all" behaves as +it does for the \code{ctl} parameter: "all" combined with any other value means +all terminal capabilities except that one.} + +\item{normalize}{TRUE or FALSE (default) whether SGR sequence should be +normalized out such that there is one distinct sequence for each SGR code. +normalized strings will occupy more space (e.g. "\033[31;42m" becomes +"\033[31m\033[42m"), but will work better with code that assumes each SGR +code will be in its own escape as \code{crayon} does.} + +\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR +present at the end of an input vector element is carried into the next +vector element. If FALSE each vector element is interpreted as if there +were no active state when they begin. If character, then the active +state at the end of the \code{carry} string is carried into the first element of +\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for +details.} + +\item{terminate}{TRUE (default) or FALSE whether substrings should have +active state closed to avoid it bleeding into other strings they may be +prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} +} +\value{ +A list of the same length as \code{x}, the \code{i}-th element of which + contains the vector of splits of \code{x[i]}. + + If any element of \code{x} or \code{split} is declared to be in UTF-8 + (see \code{\link[base]{Encoding}}), all non-ASCII character strings in the + result will be in UTF-8 and have their encoding declared as UTF-8. + (This also holds if any element is declared to be Latin-1 except in a + Latin-1 locale.) + For \code{perl = TRUE, useBytes = FALSE} all non-ASCII strings in a + multibyte locale are translated to UTF-8. +} +\description{ +This function is deprecated in favor of the \link[=strsplit_ctl]{\verb{_ctl} flavor}. +} +\keyword{internal} diff --git a/man/strtrim_sgr.Rd b/man/strtrim_sgr.Rd new file mode 100644 index 00000000..44256466 --- /dev/null +++ b/man/strtrim_sgr.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/strtrim.R +\name{strtrim_sgr} +\alias{strtrim_sgr} +\alias{strtrim2_sgr} +\title{Control Sequence Aware Version of strtrim} +\usage{ +strtrim_sgr( + x, + width, + warn = getOption("fansi.warn"), + normalize = getOption("fansi.normalize", FALSE), + carry = getOption("fansi.carry", FALSE), + terminate = getOption("fansi.terminate", TRUE) +) + +strtrim2_sgr( + x, + width, + warn = getOption("fansi.warn"), + tabs.as.spaces = getOption("fansi.tabs.as.spaces"), + tab.stops = getOption("fansi.tab.stops"), + normalize = getOption("fansi.normalize", FALSE), + carry = getOption("fansi.carry", FALSE), + terminate = getOption("fansi.terminate", TRUE) +) +} +\arguments{ +\item{x}{a character vector, or an object which can be coerced to a + character vector by \code{\link[base]{as.character}}.} + +\item{width}{Positive integer values: recycled to the length of \code{x}.} + +\item{warn}{TRUE (default) or FALSE, whether to warn when potentially +problematic \emph{Control Sequences} are encountered. These could cause the +assumptions \code{fansi} makes about how strings are rendered on your display +to be incorrect, for example by moving the cursor (see \code{\link[=fansi]{?fansi}}). +If the problematic sequence is a tab, you can use the \code{tabs.as.spaces} +parameter on functions that have it, or the \code{tabs_as_spaces} function, to +turn the tabs to spaces and resolve the warning that way.} + +\item{normalize}{TRUE or FALSE (default) whether SGR sequence should be +normalized out such that there is one distinct sequence for each SGR code. +normalized strings will occupy more space (e.g. "\033[31;42m" becomes +"\033[31m\033[42m"), but will work better with code that assumes each SGR +code will be in its own escape as \code{crayon} does.} + +\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR +present at the end of an input vector element is carried into the next +vector element. If FALSE each vector element is interpreted as if there +were no active state when they begin. If character, then the active +state at the end of the \code{carry} string is carried into the first element of +\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for +details.} + +\item{terminate}{TRUE (default) or FALSE whether substrings should have +active state closed to avoid it bleeding into other strings they may be +prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} + +\item{tabs.as.spaces}{FALSE (default) or TRUE, whether to convert tabs to +spaces. This can only be set to TRUE if \code{strip.spaces} is FALSE.} + +\item{tab.stops}{integer(1:n) indicating position of tab stops to use +when converting tabs to spaces. If there are more tabs in a line than +defined tab stops the last tab stop is re-used. For the purposes of +applying tab stops, each input line is considered a line and the character +count begins from the beginning of the input line.} +} +\description{ +These functions are deprecated in favor of the \link[=substr_ctl]{\verb{_ctl} flavors}. +} +\keyword{internal} diff --git a/man/strwrap_sgr.Rd b/man/strwrap_sgr.Rd new file mode 100644 index 00000000..ea9cdc59 --- /dev/null +++ b/man/strwrap_sgr.Rd @@ -0,0 +1,131 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/strwrap.R +\name{strwrap_sgr} +\alias{strwrap_sgr} +\alias{strwrap2_sgr} +\title{Control Sequence Aware Version of strwrap} +\usage{ +strwrap_sgr( + x, + width = 0.9 * getOption("width"), + indent = 0, + exdent = 0, + prefix = "", + simplify = TRUE, + initial = prefix, + warn = getOption("fansi.warn"), + term.cap = getOption("fansi.term.cap"), + normalize = getOption("fansi.normalize", FALSE), + carry = getOption("fansi.carry", FALSE), + terminate = getOption("fansi.terminate", TRUE) +) + +strwrap2_sgr( + x, + width = 0.9 * getOption("width"), + indent = 0, + exdent = 0, + prefix = "", + simplify = TRUE, + initial = prefix, + wrap.always = FALSE, + pad.end = "", + strip.spaces = !tabs.as.spaces, + tabs.as.spaces = getOption("fansi.tabs.as.spaces"), + tab.stops = getOption("fansi.tab.stops"), + warn = getOption("fansi.warn"), + term.cap = getOption("fansi.term.cap"), + normalize = getOption("fansi.normalize", FALSE), + carry = getOption("fansi.carry", FALSE), + terminate = getOption("fansi.terminate", TRUE) +) +} +\arguments{ +\item{x}{a character vector, or an object which can be converted to a + character vector by \code{\link[base]{as.character}}.} + +\item{width}{a positive integer giving the target column for wrapping + lines in the output.} + +\item{indent}{a non-negative integer giving the indentation of the + first line in a paragraph.} + +\item{exdent}{a non-negative integer specifying the indentation of + subsequent lines in paragraphs.} + +\item{prefix}{a character string to be used as prefix for + each line except the first, for which \code{initial} is used.} + +\item{simplify}{a logical. If \code{TRUE}, the result is a single + character vector of line text; otherwise, it is a list of the same + length as \code{x} the elements of which are character vectors of + line text obtained from the corresponding element of \code{x}. + (Hence, the result in the former case is obtained by unlisting that + of the latter.)} + +\item{initial}{a character string to be used as prefix for + each line except the first, for which \code{initial} is used.} + +\item{warn}{TRUE (default) or FALSE, whether to warn when potentially +problematic \emph{Control Sequences} are encountered. These could cause the +assumptions \code{fansi} makes about how strings are rendered on your display +to be incorrect, for example by moving the cursor (see \code{\link[=fansi]{?fansi}}). +If the problematic sequence is a tab, you can use the \code{tabs.as.spaces} +parameter on functions that have it, or the \code{tabs_as_spaces} function, to +turn the tabs to spaces and resolve the warning that way.} + +\item{term.cap}{character a vector of the capabilities of the terminal, can +be any combination of "bright" (SGR codes 90-97, 100-107), "256" (SGR codes +starting with "38;5" or "48;5"), "truecolor" (SGR codes starting with +"38;2" or "48;2"), and "all". Changing this parameter changes how \code{fansi} +interprets escape sequences, so you should ensure that it matches your +terminal capabilities. See \code{\link{term_cap_test}} for details. "all" behaves as +it does for the \code{ctl} parameter: "all" combined with any other value means +all terminal capabilities except that one.} + +\item{normalize}{TRUE or FALSE (default) whether SGR sequence should be +normalized out such that there is one distinct sequence for each SGR code. +normalized strings will occupy more space (e.g. "\033[31;42m" becomes +"\033[31m\033[42m"), but will work better with code that assumes each SGR +code will be in its own escape as \code{crayon} does.} + +\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR +present at the end of an input vector element is carried into the next +vector element. If FALSE each vector element is interpreted as if there +were no active state when they begin. If character, then the active +state at the end of the \code{carry} string is carried into the first element of +\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for +details.} + +\item{terminate}{TRUE (default) or FALSE whether substrings should have +active state closed to avoid it bleeding into other strings they may be +prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} + +\item{wrap.always}{TRUE or FALSE (default), whether to hard wrap at requested +width if no word breaks are detected within a line. If set to TRUE then +\code{width} must be at least 2.} + +\item{pad.end}{character(1L), a single character to use as padding at the +end of each line until the line is \code{width} wide. This must be a printable +ASCII character or an empty string (default). If you set it to an empty +string the line remains unpadded.} + +\item{strip.spaces}{TRUE (default) or FALSE, if TRUE, extraneous white spaces +(spaces, newlines, tabs) are removed in the same way as \link[base:strwrap]{base::strwrap} +does. When FALSE, whitespaces are preserved, except for newlines as those +are implicit boundaries between output vector elements.} + +\item{tabs.as.spaces}{FALSE (default) or TRUE, whether to convert tabs to +spaces. This can only be set to TRUE if \code{strip.spaces} is FALSE.} + +\item{tab.stops}{integer(1:n) indicating position of tab stops to use +when converting tabs to spaces. If there are more tabs in a line than +defined tab stops the last tab stop is re-used. For the purposes of +applying tab stops, each input line is considered a line and the character +count begins from the beginning of the input line.} +} +\description{ +These functions are deprecated in favor of the \link[=strwrap_ctl]{\verb{_ctl} flavors}. +} +\keyword{internal} diff --git a/man/substr_ctl.Rd b/man/substr_ctl.Rd index c47bedb9..dc4b61e9 100644 --- a/man/substr_ctl.Rd +++ b/man/substr_ctl.Rd @@ -3,6 +3,8 @@ \name{substr_ctl} \alias{substr_ctl} \alias{substr2_ctl} +\alias{substr_ctl<-} +\alias{substr2_ctl<-} \title{Control Sequence Aware Version of substr} \usage{ substr_ctl( @@ -32,6 +34,34 @@ substr2_ctl( carry = getOption("fansi.carry", FALSE), terminate = getOption("fansi.terminate", TRUE) ) + +substr_ctl( + x, + start, + stop, + warn = getOption("fansi.warn"), + term.cap = getOption("fansi.term.cap"), + ctl = "all", + normalize = getOption("fansi.normalize", FALSE), + carry = getOption("fansi.carry", FALSE), + terminate = getOption("fansi.terminate", TRUE) +) <- value + +substr2_ctl( + x, + start, + stop, + type = "chars", + round = "start", + tabs.as.spaces = getOption("fansi.tabs.as.spaces"), + tab.stops = getOption("fansi.tab.stops"), + warn = getOption("fansi.warn"), + term.cap = getOption("fansi.term.cap"), + ctl = "all", + normalize = getOption("fansi.normalize", FALSE), + carry = getOption("fansi.carry", FALSE), + terminate = getOption("fansi.terminate", TRUE) +) <- value } \arguments{ \item{x}{a character vector or object that can be coerced to such.} @@ -118,6 +148,8 @@ when converting tabs to spaces. If there are more tabs in a line than defined tab stops the last tab stop is re-used. For the purposes of applying tab stops, each input line is considered a line and the character count begins from the beginning of the input line.} + +\item{value}{a character vector or object that can be coerced to such.} } \value{ a character vector of the same length and with the same attributes as @@ -155,6 +187,11 @@ to the naive length calculations, and then use the mapping in conjunction with \code{\link[base:substr]{base::substr()}} to extract the string. This concept is borrowed directly from Gábor Csárdi's \code{crayon} package, although the implementation of the calculation is different. + +Replacement functions are implemented as two substring operations to select +the beginning and end of the final string, and a \code{paste} operation to stick +all the pieces back together. The \code{carry} parameter is applied separately to +the \code{value} and to the \code{x} parameter. } \note{ Non-ASCII strings are converted to and returned in UTF-8 encoding. diff --git a/man/substr_sgr.Rd b/man/substr_sgr.Rd new file mode 100644 index 00000000..52e01bf9 --- /dev/null +++ b/man/substr_sgr.Rd @@ -0,0 +1,107 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/substr2.R +\name{substr_sgr} +\alias{substr_sgr} +\alias{substr2_sgr} +\title{SGR Control Sequence Aware Version of substr} +\usage{ +substr_sgr( + x, + start, + stop, + warn = getOption("fansi.warn"), + term.cap = getOption("fansi.term.cap"), + normalize = getOption("fansi.normalize", FALSE), + carry = getOption("fansi.carry", FALSE), + terminate = getOption("fansi.terminate", TRUE) +) + +substr2_sgr( + x, + start, + stop, + type = "chars", + round = "start", + tabs.as.spaces = getOption("fansi.tabs.as.spaces"), + tab.stops = getOption("fansi.tab.stops"), + warn = getOption("fansi.warn"), + term.cap = getOption("fansi.term.cap"), + normalize = getOption("fansi.normalize", FALSE), + carry = getOption("fansi.carry", FALSE), + terminate = getOption("fansi.terminate", TRUE) +) +} +\arguments{ +\item{x}{a character vector or object that can be coerced to such.} + +\item{start}{integer. The first element to be replaced.} + +\item{stop}{integer. The last element to be replaced.} + +\item{warn}{TRUE (default) or FALSE, whether to warn when potentially +problematic \emph{Control Sequences} are encountered. These could cause the +assumptions \code{fansi} makes about how strings are rendered on your display +to be incorrect, for example by moving the cursor (see \code{\link[=fansi]{?fansi}}). +If the problematic sequence is a tab, you can use the \code{tabs.as.spaces} +parameter on functions that have it, or the \code{tabs_as_spaces} function, to +turn the tabs to spaces and resolve the warning that way.} + +\item{term.cap}{character a vector of the capabilities of the terminal, can +be any combination of "bright" (SGR codes 90-97, 100-107), "256" (SGR codes +starting with "38;5" or "48;5"), "truecolor" (SGR codes starting with +"38;2" or "48;2"), and "all". Changing this parameter changes how \code{fansi} +interprets escape sequences, so you should ensure that it matches your +terminal capabilities. See \code{\link{term_cap_test}} for details. "all" behaves as +it does for the \code{ctl} parameter: "all" combined with any other value means +all terminal capabilities except that one.} + +\item{normalize}{TRUE or FALSE (default) whether SGR sequence should be +normalized out such that there is one distinct sequence for each SGR code. +normalized strings will occupy more space (e.g. "\033[31;42m" becomes +"\033[31m\033[42m"), but will work better with code that assumes each SGR +code will be in its own escape as \code{crayon} does.} + +\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR +present at the end of an input vector element is carried into the next +vector element. If FALSE each vector element is interpreted as if there +were no active state when they begin. If character, then the active +state at the end of the \code{carry} string is carried into the first element of +\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for +details.} + +\item{terminate}{TRUE (default) or FALSE whether substrings should have +active state closed to avoid it bleeding into other strings they may be +prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} + +\item{type}{character(1L) partial matching \code{c("chars", "width")}, although +\code{type="width"} only works correctly with R >= 3.2.2. See +\code{\link[base:nchar]{?nchar}}. With "width", the results might be affected by +locale changes, Unicode database updates, and logic changes for processing +of complex graphemes. Generally you should not rely on a specific output +e.g. by embedding it in unit tests. For the most part \code{fansi} (currently) +uses the internals of \code{base::nchar(type='width')}, but there are exceptions +and this may change in the future.} + +\item{round}{character(1L) partial matching +\code{c("start", "stop", "both", "neither")}, controls how to resolve +ambiguities when a \code{start} or \code{stop} value in "width" \code{type} mode falls +within a wide display character. See details.} + +\item{tabs.as.spaces}{FALSE (default) or TRUE, whether to convert tabs to +spaces. This can only be set to TRUE if \code{strip.spaces} is FALSE.} + +\item{tab.stops}{integer(1:n) indicating position of tab stops to use +when converting tabs to spaces. If there are more tabs in a line than +defined tab stops the last tab stop is re-used. For the purposes of +applying tab stops, each input line is considered a line and the character +count begins from the beginning of the input line.} +} +\value{ +a character vector of the same length and with the same attributes as +x (after possible coercion and re-encoding to UTF-8). +} +\description{ +These functions are deprecated in favor of the \link[=substr_ctl]{\verb{_ctl} flavors}. +} +\keyword{internal} diff --git a/man/to_html.Rd b/man/to_html.Rd new file mode 100644 index 00000000..c2651f6d --- /dev/null +++ b/man/to_html.Rd @@ -0,0 +1,182 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tohtml.R +\name{to_html} +\alias{to_html} +\title{Convert Control Sequences to HTML Equivalents} +\usage{ +to_html( + x, + warn = getOption("fansi.warn"), + term.cap = getOption("fansi.term.cap"), + classes = FALSE, + carry = getOption("fansi.carry", TRUE) +) +} +\arguments{ +\item{x}{a character vector or object that can be coerced to such.} + +\item{warn}{TRUE (default) or FALSE, whether to warn when potentially +problematic \emph{Control Sequences} are encountered. These could cause the +assumptions \code{fansi} makes about how strings are rendered on your display +to be incorrect, for example by moving the cursor (see \code{\link[=fansi]{?fansi}}). +If the problematic sequence is a tab, you can use the \code{tabs.as.spaces} +parameter on functions that have it, or the \code{tabs_as_spaces} function, to +turn the tabs to spaces and resolve the warning that way.} + +\item{term.cap}{character a vector of the capabilities of the terminal, can +be any combination of "bright" (SGR codes 90-97, 100-107), "256" (SGR codes +starting with "38;5" or "48;5"), "truecolor" (SGR codes starting with +"38;2" or "48;2"), and "all". Changing this parameter changes how \code{fansi} +interprets escape sequences, so you should ensure that it matches your +terminal capabilities. See \code{\link{term_cap_test}} for details. "all" behaves as +it does for the \code{ctl} parameter: "all" combined with any other value means +all terminal capabilities except that one.} + +\item{classes}{FALSE (default), TRUE, or character vector of either 16, +32, or 512 class names. Character strings may only contain ASCII +characters corresponding to letters, numbers, the hyphen, or the +underscore. It is the user's responsibility to provide values that are +legal class names. +\itemize{ +\item FALSE: All colors rendered as inline CSS styles. +\item TRUE: Each of the 256 basic colors is mapped to a class in form +"fansi-color-###" (or "fansi-bgcol-###" for background colors) +where "###" is a zero padded three digit number in 0:255. Basic colors +specified with SGR codes 30-37 (or 40-47) map to 000:007, and bright ones +specified with 90-97 (or 100-107) map to 008:015. 8 bit colors specified +with SGR codes 38;5;### or 48;5;### map directly based on the value of +"###". Implicitly, this maps the 8 bit colors in 0:7 to the basic +colors, and those in 8:15 to the bright ones even though these are not +exactly the same when using inline styles. "truecolor"s specified with +38;2;#;#;# or 48;2;#;#;# do not map to classes and are rendered as inline +styles. +\item character(16): The eight basic colors are mapped to the string values in +the vector, all others are rendered as inline CSS styles. Basic colors +are mapped irrespective of whether they are encoded as the basic colors +or as 8-bit colors. Sixteen elements are needed because there must be +eight classes for foreground colors, and eight classes for background +colors. Classes should be ordered in ascending order of color number, +with foreground and background classes alternating starting with +foreground (see examples). +\item character(32): Like character(16), except the basic and bright colors are +mapped. +\item character(512): Like character(16), except the basic, bright, and all +other 8-bit colors are mapped. +}} + +\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR +present at the end of an input vector element is carried into the next +vector element. If FALSE each vector element is interpreted as if there +were no active state when they begin. If character, then the active +state at the end of the \code{carry} string is carried into the first element of +\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for +details.} +} +\value{ +A character vector of the same length as \code{x} with all escape +sequences removed and any basic ANSI CSI SGR escape sequences applied via +SPAN HTML tags. +} +\description{ +Interprets CSI SGR sequences and OSC-anchored URLs to produce strings with +the state reproduced with SPAN elements, inline CSS styles, and A anchors. +Optionally for colors, the SPAN elements may be assigned classes instead of +inline styles, in which case it is the user's responsibility to provide a +style sheet. Input that contains special HTML characters ("<", ">", "&", +"'", and "\""), particularly the first two, should be escaped with +\code{\link{html_esc}}. +} +\details{ +Only "observable" formats are translated. These include colors, +background-colors, and basic styles (CSI SGR codes 1-6, 8, 9). Style 7, the +"inverse" style, is implemented by explicitly switching foreground and +background colors, if there are any. Styles 5-6 (blink) are rendered as +"text-decoration" but likely will do nothing in the browser. Style 8 +(conceal) sets the color to transparent. + +Parameters in OSC sequences are not copied over as they might have different +semantics in the OSC sequences than they would in HTML (e.g. the "id" +parameter is intended to be non-unique in OSC). + +Each element of the input vector is translated into a stand-alone valid HTML +string. In particular, any open tags generated by \code{fansi} are closed at the +end of an element and re-opened on the subsequent element with the same +style. This allows safe combination of HTML translated strings, for example +by \code{\link{paste}}ing them together. The trade-off is that there may be redundant +HTML produced. To reduce redundancy you can first collapse the input vector +into one string, being mindful that very large strings may exceed maximum +string size when converted to HTML. + +\code{fansi}-opened tags are closed and new ones open anytime the "observable" +state changes. \code{to_html} never produces nested tags, even if at times +that might produce more compact output. While it would be possible to +match a CSI/OSC encoded state with nested tags, it would increase the +complexity of the code substantially for little gain. +} +\note{ +Non-ASCII strings are converted to and returned in UTF-8 encoding. + +\code{to_html} always terminates as not doing so produces +invalid HTML. If you wish for the last active SPAN to bleed into +subsequent text you may do so with e.g. \code{sub("(?:)?$", "", x)} +or similar. +} +\examples{ +to_html("hello\033[31;42;1mworld\033[m") +to_html("hello\033[31;42;1mworld\033[m", classes=TRUE) + +## Input contains HTML special chars +x <- " Date: Mon, 18 Oct 2021 07:07:59 -0400 Subject: [PATCH 08/27] clarify VAL_IN_ENV creates new vars --- R/internal.R | 14 +++++++------- R/nchar.R | 6 ++++-- R/normalize.R | 3 ++- R/sgr.R | 13 +++++++++---- R/strsplit.R | 5 +++-- R/strtrim.R | 6 ++++-- R/strwrap.R | 14 ++++++++------ R/substr2.R | 28 +++++++++++++++------------- R/tohtml.R | 3 ++- R/unhandled.R | 3 ++- 10 files changed, 56 insertions(+), 39 deletions(-) diff --git a/R/internal.R b/R/internal.R index 5c65aa79..a78a76f5 100644 --- a/R/internal.R +++ b/R/internal.R @@ -61,7 +61,7 @@ ctl_as_int <- function(x) .Call(FANSI_ctl_as_int, as.integer(x)) ## Converts common arguments to standardized forms if needed. ## ## DANGER: will modify values in calling environment! Also may add variables -## ending in `.int` like `ctl.int`, `term.cap.int`, and others to them. +## such as CTL.INT, X.LEN, etc. (these should all be in caps). VAL_IN_ENV <- function(...) { call <- sys.call(-1) @@ -117,7 +117,7 @@ VAL_IN_ENV <- function(...) { "Argument `term.cap` may only contain values in ", deparse(VALID.TERM.CAP) ) - args[['term.cap.int']] <- term.cap.int + args[['TERM.CAP.INT']] <- term.cap.int } if('ctl' %in% argnm) { ctl <- args[['ctl']] @@ -131,7 +131,7 @@ VAL_IN_ENV <- function(...) { "Argument `ctl` may contain only values in `", deparse(VALID.CTL), "`" ) } - args[['ctl.int']] <- ctl.int + args[['CTL.INT']] <- ctl.int } if('carry' %in% argnm) { carry <- args[['carry']] @@ -185,8 +185,8 @@ VAL_IN_ENV <- function(...) { is.na(round.int <- pmatch(round, valid.round)) ) stop2("Argument `round` must partial match one of ", deparse(valid.round)) - args[['round']] <- valid.round['round.int'] - args[['round.int']] <- round.int + args[['round']] <- valid.round[round.int] + args[['ROUND.INT']] <- round.int } if('type' %in% argnm) { valid.types <- c('chars', 'width') @@ -198,7 +198,7 @@ VAL_IN_ENV <- function(...) { stop2("Argument `type` must partial match one of ", deparse(valid.types)) args[['type']] <- valid.types[type.int] - args[['type.int']] <- type.int - 1L + args[['TYPE.INT']] <- type.int - 1L } if('start' %in% argnm || 'stop' %in% argnm) { x.len <- length(args[['x']]) @@ -208,7 +208,7 @@ VAL_IN_ENV <- function(...) { start[start < 1L] <- 1L args[['start']] <- start args[['stop']] <- stop - args[['x.lane']] <- x.len + args[['X.LEN']] <- x.len } # we might not have validated all, so we should be careful list2env(args, par.env) diff --git a/R/nchar.R b/R/nchar.R index 5f5c8c43..94c19b02 100644 --- a/R/nchar.R +++ b/R/nchar.R @@ -80,8 +80,9 @@ nchar_ctl <- function( "Argument `type` must partial match one of 'chars', 'width', or 'bytes'." ) + ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, ctl=ctl, warn=warn) - type <- valid.types[type.int] + type <- valid.types[TYPE.INT] stripped <- strip_ctl(x, ctl=ctl, warn=warn) R.ver.gte.3.2.2 <- R.ver.gte.3.2.2 # "import" symbol from namespace @@ -92,13 +93,14 @@ nchar_ctl <- function( #' @rdname nchar_ctl nzchar_ctl <- function(x, keepNA=NA, ctl='all', warn=getOption('fansi.warn')) { + ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, ctl=ctl, warn=warn) if(!is.logical(keepNA)) keepNA <- as.logical(keepNA) if(length(keepNA) != 1L) stop("Argument `keepNA` must be a scalar logical.") term.cap.int <- 1L - .Call(FANSI_nzchar_esc, x, keepNA, warn, term.cap.int, ctl.int) + .Call(FANSI_nzchar_esc, x, keepNA, warn, term.cap.int, CTL.INT) } #' Control Sequence Aware Version of nchar #' diff --git a/R/normalize.R b/R/normalize.R index 255cc059..df6cc20e 100644 --- a/R/normalize.R +++ b/R/normalize.R @@ -85,8 +85,9 @@ normalize_state <- function( x, warn=getOption('fansi.warn'), term.cap=getOption('fansi.term.cap'), carry=getOption('fansi.carry', FALSE) ) { + ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, warn=warn, term.cap=term.cap, carry=carry) - .Call(FANSI_normalize_state, x, warn, term.cap.int, carry) + .Call(FANSI_normalize_state, x, warn, TERM.CAP.INT, carry) } # To reduce overhead of applying this in `strwrap_ctl` diff --git a/R/sgr.R b/R/sgr.R index 45b86188..b0e8e35b 100644 --- a/R/sgr.R +++ b/R/sgr.R @@ -61,9 +61,10 @@ strip_ctl <- function(x, ctl='all', warn=getOption('fansi.warn'), strip) { message("Parameter `strip` has been deprecated; use `ctl` instead.") ctl <- strip } + ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, ctl=ctl, warn=warn) - if(length(ctl)) .Call(FANSI_strip_csi, enc2utf8(x), ctl.int, warn) + if(length(ctl)) .Call(FANSI_strip_csi, enc2utf8(x), CTL.INT, warn) else x } #' Strip Control Sequences @@ -80,6 +81,7 @@ strip_ctl <- function(x, ctl='all', warn=getOption('fansi.warn'), strip) { #' strip_sgr(string) strip_sgr <- function(x, warn=getOption('fansi.warn')) { + ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, warn=warn) ctl.int <- match(c("sgr", "url"), VALID.CTL) .Call(FANSI_strip_csi, x, ctl.int, warn) @@ -109,9 +111,10 @@ has_ctl <- function(x, ctl='all', warn=getOption('fansi.warn'), which) { message("Parameter `which` has been deprecated; use `ctl` instead.") ctl <- which } + ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, ctl=ctl, warn=warn) if(length(ctl.int)) { - .Call(FANSI_has_csi, x, ctl.int, warn) + .Call(FANSI_has_csi, x, CTL.INT, warn) } else rep(FALSE, length(x)) } #' Check for Presence of Control Sequences @@ -157,13 +160,14 @@ state_at_end <- function( normalize=getOption('fansi.normalize', FALSE), carry=getOption('fansi.carry', FALSE) ) { + ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, ctl='sgr', warn=warn, term.cap=term.cap, carry=carry) .Call( FANSI_state_at_end, x, warn, - term.cap.int, - ctl.int, + TERM.CAP.INT, + CTL.INT, normalize, carry ) @@ -191,6 +195,7 @@ close_state <- function( warn=getOption('fansi.warn'), normalize=getOption('fansi.normalize', FALSE) ) { + ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, warn=warn, normalize=normalize) .Call(FANSI_close_state, x, warn, 1L, normalize) } diff --git a/R/strsplit.R b/R/strsplit.R index f6bec906..1ce89ee6 100644 --- a/R/strsplit.R +++ b/R/strsplit.R @@ -54,6 +54,7 @@ strsplit_ctl <- function( carry=getOption('fansi.carry', FALSE), terminate=getOption('fansi.terminate', TRUE) ) { + ## modifies / creates NEW VARS in fun env VAL_IN_ENV( x=x, warn=warn, term.cap=term.cap, ctl=ctl, normalize=normalize, carry=carry, terminate=terminate @@ -126,8 +127,8 @@ strsplit_ctl <- function( start=starts, stop=ends, type.int=0L, round.start=TRUE, round.stop=FALSE, tabs.as.spaces=FALSE, tab.stops=8L, warn=warn, - term.cap.int=term.cap.int, x.len=length(starts), - ctl.int=ctl.int, normalize=normalize, + term.cap.int=TERM.CAP.INT, x.len=length(starts), + ctl.int=CTL.INT, normalize=normalize, carry=carry, terminate=terminate ) } else { diff --git a/R/strtrim.R b/R/strtrim.R index 83dee530..63f9f498 100644 --- a/R/strtrim.R +++ b/R/strtrim.R @@ -38,6 +38,7 @@ strtrim_ctl <- function( carry=getOption('fansi.carry', FALSE), terminate=getOption('fansi.terminate', TRUE) ) { + ## modifies / creates NEW VARS in fun env VAL_IN_ENV( x=x, warn=warn, ctl=ctl, normalize=normalize, carry=carry, terminate=terminate @@ -64,7 +65,7 @@ strtrim_ctl <- function( FALSE, 8L, warn, term.cap.int, TRUE, # first only - ctl.int, + CTL.INT, normalize, carry, terminate @@ -82,6 +83,7 @@ strtrim2_ctl <- function( carry=getOption('fansi.carry', FALSE), terminate=getOption('fansi.terminate', TRUE) ) { + ## modifies / creates NEW VARS in fun env VAL_IN_ENV( x=x, warn=warn, ctl=ctl, tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, @@ -111,7 +113,7 @@ strtrim2_ctl <- function( tabs.as.spaces, tab.stops, warn, term.cap.int, TRUE, # first only - ctl.int, + CTL.INT, normalize, carry, terminate ) if(normalize) normalize_state(res) else res diff --git a/R/strwrap.R b/R/strwrap.R index 63decba9..356c0443 100644 --- a/R/strwrap.R +++ b/R/strwrap.R @@ -106,6 +106,7 @@ strwrap_ctl <- function( carry=getOption('fansi.carry', FALSE), terminate=getOption('fansi.terminate', TRUE) ) { + ## modifies / creates NEW VARS in fun env VAL_IN_ENV( x=x, warn=warn, term.cap=term.cap, ctl=ctl, normalize=normalize, carry=carry, terminate=terminate @@ -118,16 +119,16 @@ strwrap_ctl <- function( FALSE, "", TRUE, FALSE, 8L, - warn, term.cap.int, + warn, TERM.CAP.INT, FALSE, # first_only - ctl.int, normalize, + CTL.INT, normalize, carry, terminate ) if(simplify) { if(normalize) normalize_state(unlist(res), warn, term.cap) else unlist(res) } else { - if(normalize) normalize_state_list(res, warn, term.cap.int) else res + if(normalize) normalize_state_list(res, warn, TERM.CAP.INT) else res } } #' @export @@ -151,6 +152,7 @@ strwrap2_ctl <- function( if(!is.logical(tabs.as.spaces)) tabs.as.spaces <- as.logical(tabs.as.spaces) if(wrap.always && width < 2L) stop("Width must be at least 2 in `wrap.always` mode.") + ## modifies / creates NEW VARS in fun env VAL_IN_ENV ( x=x, warn=warn, term.cap=term.cap, ctl=ctl, normalize=normalize, carry=carry, terminate=terminate, tab.stops=tab.stops, @@ -171,16 +173,16 @@ strwrap2_ctl <- function( wrap.always, pad.end, strip.spaces, tabs.as.spaces, tab.stops, - warn, term.cap.int, + warn, TERM.CAP.INT, FALSE, # first_only - ctl.int, normalize, + CTL.INT, normalize, carry, terminate ) if(simplify) { if(normalize) normalize_state(unlist(res), warn, term.cap) else unlist(res) } else { - if(normalize) normalize_state_list(res, warn, term.cap.int) else res + if(normalize) normalize_state_list(res, warn, TERM.CAP.INT) else res } } #' Control Sequence Aware Version of strwrap diff --git a/R/substr2.R b/R/substr2.R index c155dfd9..d3d61152 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -189,7 +189,8 @@ substr2_ctl <- function( carry=getOption('fansi.carry', FALSE), terminate=getOption('fansi.terminate', TRUE) ) { - VAL_IN_ENV( ## modifies / creates NEW VARS in fun env + ## modifies / creates NEW VARS in fun env + VAL_IN_ENV( x=x, warn=warn, term.cap=term.cap, ctl=ctl, normalize=normalize, carry=carry, terminate=terminate, tab.stops=tab.stops, tabs.as.spaces=tabs.as.spaces, type=type, round=round, @@ -200,13 +201,13 @@ substr2_ctl <- function( res[no.na] <- substr_ctl_internal( x[no.na], start=start[no.na], stop=stop[no.na], - type.int=type.int, + type.int=TYPE.INT, tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, - term.cap.int=term.cap.int, + term.cap.int=TERM.CAP.INT, round.start=round == 'start' || round == 'both', round.stop=round == 'stop' || round == 'both', - x.len=x.len, - ctl.int=ctl.int, normalize=normalize, + x.len=X.LEN, + ctl.int=CTL.INT, normalize=normalize, carry=carry, terminate=terminate ) res[!no.na] <- NA_character_ @@ -242,7 +243,8 @@ substr2_ctl <- function( carry=getOption('fansi.carry', FALSE), terminate=getOption('fansi.terminate', TRUE) ) { - VAL_IN_ENV( ## modifies / creates NEW VARS in fun env + ## modifies / creates NEW VARS in fun env + VAL_IN_ENV( x=x, warn=warn, term.cap=term.cap, ctl=ctl, normalize=normalize, carry=carry, terminate=terminate, tab.stops=tab.stops, tabs.as.spaces=tabs.as.spaces, round=round, start=start, stop=stop, @@ -258,33 +260,33 @@ substr2_ctl <- function( nc <- nchar_ctl(x, type=type, ctl=ctl, warn=warn) stop <- pmin(stop, nc) - value <- enc2utf8(as.character(value)) + value <- rep_len(enc2utf8(as.character(value)), X.LEN) # Actual replacement operation as substr/paste x[] <- paste0( substr_ctl_internal( - x, 1L, start - 1L, type.int=type.int, + x, 1L, start - 1L, type.int=TYPE.INT, round.start=round.a == 'start' || round.a == 'both', round.stop=round.a == 'stop' || round.a == 'both', tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, - term.cap.int=term.cap.int, ctl.int=ctl.int, normalize=normalize, + term.cap.int=TERM.CAP.INT, ctl.int=CTL.INT, normalize=normalize, carry=carry, terminate=terminate ), substr_ctl_internal( rep(value, length.out=length(x)), 1L, stop - start + 1L, - type.int=type.int, + type.int=TYPE.INT, round.start=round == 'start' || round == 'both', round.stop=round == 'stop' || round == 'both', tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, - term.cap.int=term.cap.int, ctl.int=ctl.int, normalize=normalize, + term.cap.int=TERM.CAP.INT, ctl.int=CTL.INT, normalize=normalize, carry=carry, terminate=terminate ), substr_ctl_internal( - x, stop + 1L, .Machine[['integer.max']], type.int=type.int, + x, stop + 1L, .Machine[['integer.max']], type.int=TYPE.INT, round.start=round.b == 'start' || round.b == 'both', round.stop=round.b == 'stop' || round.b == 'both', tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, - term.cap.int=term.cap.int, ctl.int=ctl.int, normalize=normalize, + term.cap.int=TERM.CAP.INT, ctl.int=CTL.INT, normalize=normalize, carry=carry, terminate=terminate ) ) diff --git a/R/tohtml.R b/R/tohtml.R index 8c0cbdc5..6fd01d04 100644 --- a/R/tohtml.R +++ b/R/tohtml.R @@ -151,6 +151,7 @@ to_html <- function( classes=FALSE, carry=getOption('fansi.carry', TRUE) # different from other functions ) { + ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, warn=warn, term.cap=term.cap, carry=carry) classes <- if(isTRUE(classes)) { FANSI.CLASSES @@ -161,7 +162,7 @@ to_html <- function( } else stop("Argument `classes` must be TRUE, FALSE, or a character vector.") - .Call(FANSI_esc_to_html, x, warn, term.cap.int, classes, carry) + .Call(FANSI_esc_to_html, x, warn, TERM.CAP.INT, classes, carry) } #' Convert Control Sequences to HTML Equivalents #' diff --git a/R/unhandled.R b/R/unhandled.R index 379ef4bc..e47b90ac 100644 --- a/R/unhandled.R +++ b/R/unhandled.R @@ -82,8 +82,9 @@ #' unhandled_ctl(string) unhandled_ctl <- function(x, term.cap=getOption('fansi.term.cap')) { + ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, term.cap=term.cap) - res <- .Call(FANSI_unhandled_esc, x, term.cap.int) + res <- .Call(FANSI_unhandled_esc, x, TERM.CAP.INT) names(res) <- c("index", "start", "stop", "error", "translated", "esc") errors <- c( 'unknown', 'special', 'exceed-term-cap', 'non-SGR/URL', 'malformed-CSI/OSC', From 404f592ba539e4615764ecf9cc8187b563f1121e Mon Sep 17 00:00:00 2001 From: brodieG Date: Mon, 18 Oct 2021 07:12:59 -0400 Subject: [PATCH 09/27] VAL_IN_ENV regression --- R/nchar.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/nchar.R b/R/nchar.R index 94c19b02..e91e4750 100644 --- a/R/nchar.R +++ b/R/nchar.R @@ -82,7 +82,7 @@ nchar_ctl <- function( ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, ctl=ctl, warn=warn) - type <- valid.types[TYPE.INT] + type <- valid.types[type.int] stripped <- strip_ctl(x, ctl=ctl, warn=warn) R.ver.gte.3.2.2 <- R.ver.gte.3.2.2 # "import" symbol from namespace From 49a28d608754cc3b1fb5cbe2969a51df2a415b2d Mon Sep 17 00:00:00 2001 From: brodieG Date: Mon, 18 Oct 2021 07:13:15 -0400 Subject: [PATCH 10/27] correct input lenght recycle --- R/substr2.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/substr2.R b/R/substr2.R index d3d61152..75f47f91 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -265,7 +265,7 @@ substr2_ctl <- function( # Actual replacement operation as substr/paste x[] <- paste0( substr_ctl_internal( - x, 1L, start - 1L, type.int=TYPE.INT, + x, rep(1L, X.LEN), start - 1L, type.int=TYPE.INT, round.start=round.a == 'start' || round.a == 'both', round.stop=round.a == 'stop' || round.a == 'both', tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, @@ -273,7 +273,7 @@ substr2_ctl <- function( carry=carry, terminate=terminate ), substr_ctl_internal( - rep(value, length.out=length(x)), 1L, stop - start + 1L, + value, rep(1L, X.LEN), stop - start + 1L, type.int=TYPE.INT, round.start=round == 'start' || round == 'both', round.stop=round == 'stop' || round == 'both', @@ -282,7 +282,7 @@ substr2_ctl <- function( carry=carry, terminate=terminate ), substr_ctl_internal( - x, stop + 1L, .Machine[['integer.max']], type.int=TYPE.INT, + x, stop + 1L, rep(.Machine[['integer.max']], X.LEN), type.int=TYPE.INT, round.start=round.b == 'start' || round.b == 'both', round.stop=round.b == 'stop' || round.b == 'both', tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, From a487f7205fee638f49636f6f03ef7a2f4b8a953d Mon Sep 17 00:00:00 2001 From: brodieG Date: Mon, 18 Oct 2021 07:45:26 -0400 Subject: [PATCH 11/27] fix case where 'value' shorter than replacement --- R/substr2.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/substr2.R b/R/substr2.R index 75f47f91..5329c640 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -256,11 +256,12 @@ substr2_ctl <- function( ) round.b <- round - # Adjust `stop` to be no longer than end of string + # Adjust `stop` to be no longer than end of string, also need to make sure the + # overall string length is unchanged. nc <- nchar_ctl(x, type=type, ctl=ctl, warn=warn) stop <- pmin(stop, nc) - value <- rep_len(enc2utf8(as.character(value)), X.LEN) + ncv <- nchar_ctl(value, type=type, ctl=ctl, warn=warn) # Actual replacement operation as substr/paste x[] <- paste0( @@ -282,7 +283,8 @@ substr2_ctl <- function( carry=carry, terminate=terminate ), substr_ctl_internal( - x, stop + 1L, rep(.Machine[['integer.max']], X.LEN), type.int=TYPE.INT, + x, pmin(stop + 1L, start + ncv), + rep(.Machine[['integer.max']], X.LEN), type.int=TYPE.INT, round.start=round.b == 'start' || round.b == 'both', round.stop=round.b == 'stop' || round.b == 'both', tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, From 612266104c1fc1c3add1ca5a69d8545c1677ca4d Mon Sep 17 00:00:00 2001 From: brodieG Date: Wed, 20 Oct 2021 08:07:56 -0400 Subject: [PATCH 12/27] remove single-use 'carry_internal' --- R/sgr.R | 13 ------------- R/substr2.R | 12 ++++++++---- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/R/sgr.R b/R/sgr.R index b0e8e35b..cce23d25 100644 --- a/R/sgr.R +++ b/R/sgr.R @@ -172,19 +172,6 @@ state_at_end <- function( carry ) } -## R-level carry for functions that do not implement it internally in C -## -## Arguments should already have been processed by VAL_IN_ENV - -carry_internal <- function(x, warn, term.cap.int, ctl.int, normalize, carry) { - if(!is.na(carry)) { - ends <- .Call( - FANSI_state_at_end, x, warn, term.cap.int, ctl.int, normalize, carry - ) - x <- paste0(c(carry, ends[-length(ends)]), x) - } - x -} # Given an SGR, compute the sequence that closes it #' @export diff --git a/R/substr2.R b/R/substr2.R index 5329c640..16a7cc84 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -361,10 +361,14 @@ substr_ctl_internal <- function( # in C given the current structure using ordered indices into each string. # Do before `unique` as this to equal strings may become different. - ends <- carry_internal( - x, warn=warn, term.cap.int=term.cap.int, ctl.int=ctl.int, - normalize=normalize, carry=carry - ) + x.carry <- character(X.LEN) + if(!is.na(carry)) { + ends <- .Call( + FANSI_state_at_end, x, warn, term.cap.int, ctl.int, normalize, carry + ) + x.carry <- c(carry, ends[-length(ends)]) + x <- paste0(x.carry, x) + } # We compute style at each start and stop position by getting all those # positions into a vector and then ordering them by position, keeping track of # original order and whether they are starting or ending positions (affects From 51a0b0ee78a849d2779997cd3a2d7ec43ce693f4 Mon Sep 17 00:00:00 2001 From: brodieG Date: Wed, 20 Oct 2021 08:08:18 -0400 Subject: [PATCH 13/27] clarify docs --- R/fansi-package.R | 104 +++++++++++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 37 deletions(-) diff --git a/R/fansi-package.R b/R/fansi-package.R index 704d8dac..e03ea524 100644 --- a/R/fansi-package.R +++ b/R/fansi-package.R @@ -21,9 +21,9 @@ #' #' @section Control Characters and Sequences: #' -#' Control characters and sequences are non-printing inline characters that can -#' be used to modify terminal display and behavior, for example by changing text -#' color or cursor position. +#' Control characters and sequences are non-printing inline characters or +#' sequences initiated by them that can be used to modify terminal display and +#' behavior, for example by changing text color or cursor position. #' #' We will refer to X3.64/ECMA-48/ISO-6429 control characters and sequences as #' "_Control Sequences_" hereafter. @@ -54,17 +54,17 @@ #' two characters long. There are many more unimplemented ECMA-48 #' specifications. #' -#' In theory it is possible to encode CSI sequenes with a single byte +#' In theory it is possible to encode CSI sequences with a single byte #' introducing character in the 0x40-0x5F range instead of the traditional #' "ESC[". Since this is rare and it conflicts with UTF-8 encoding, `fansi` #' does not support it. #' #' The special treatment of _Control Sequences_ is to compute their #' display/character width as zero. For the SGR subset of the CSI sequences and -#' OSC-anchored URLs,, `fansi` will also parse, interpret, and reapply the text -#' the sequences as needed. Whether a particular type of _Control Sequence_ is -#' treated specially can be specified via the `ctl` parameter to the `fansi` -#' functions that have it. +#' OSC-anchored URLs, `fansi` will also parse, interpret, and reapply the +#' sequences to the text as needed. Whether a particular type of _Control +#' Sequence_ is treated specially can be specified via the `ctl` parameter to +#' the `fansi` functions that have it. #' #' @section CSI SGR Control Sequences: #' @@ -131,9 +131,9 @@ #' While we try to minimize changes across `fansi` versions in how SGR sequences #' are output, we focus on minimizing the changes to rendered output, not #' necessarily the specific SGR sequences used to produce it. To maximize the -#' odds of getting stable SGR output use [`normalize_state`] and set `term.cap` to -#' a specific set of capabilities. In general it is likely best not to rely on -#' the exact SGR encoding of `fansi` output. +#' odds of getting stable SGR output use [`normalize_state`] and set `term.cap` +#' to a specific set of capabilities. In general it is likely best not to rely +#' on the exact SGR encoding of `fansi` output. #' #' Note that `width` calculations may also change across R versions, locales, #' etc. (see "Encodings / UTF-8" below). @@ -151,37 +151,67 @@ #' @section State Interactions: #' #' The cumulative nature of state as specified by SGR or OSC-anchored URLs means -#' that SGR in strings that are spliced will interact with each other. +#' that unterminated strings that are spliced will interact with each other. #' Additionally, a substring does not inherently contain all the information -#' required to recreate its state as it appeared in the source string. -#' -#' One form of interaction to consider is how a character vector provided to -#' `fansi` functions affect itself. By default, `fansi` assumes that each -#' element in an input character vector is independent, but this is incorrect if -#' the input is a single document with each element a line in it. In that -#' situation state from each line should bleed into subsequent ones. Setting -#' `carry = TRUE` enables the "single document" interpretation. -#' -#' Another form of interaction is when substrings produced by `fansi` are -#' spliced with or into other substrings. By default `fansi` automatically -#' terminates substrings it produces if they contain active formats or URLs. -#' This prevents the state bleeding into external strings, which is useful e.g. -#' when arranging text in columns. We can allow the state to bleed into -#' appended strings by setting `terminate = FALSE`. `carry` is unaffected by -#' `terminate` as `fansi` records the ending SGR state prior to termination -#' internally. +#' required to recreate its state as it appeared in the source string. The +#' default `fansi` configuration terminates extracted substrings and prepends +#' original state to them so they present on a stand alone basis as they as part +#' of the original string. +#' +#' To allow state in substrings to affect subsequent strings they may be spliced +#' onto set `terminate = FALSE`. Generally you should use `terminate = TRUE` +#' unless you are willing to deal with the resulting mess (see "Terminal +#' Quirks") for the sake of fine control of state bleeding. +#' +#' Additionally, by default, `fansi` assumes that each element in an input +#' character vector is independent, but this is incorrect if the input is a +#' single document with each element a line in it. In that situation state from +#' each line should bleed into subsequent ones. Setting `carry = TRUE` enables +#' the "single document" interpretation. +#' +#' For `terminate = FALSE` and `carry = TRUE`, `fansi` will re-open active +#' state on each new element even if a terminal would naturally carry them +#' over. This is to allow the user to manually terminate elements without +#' losing them on the next element. #' #' Finally, `fansi` strings will be affected by any active state in strings they -#' are appended to. There are no parameters to control what happens -#' automatically in this case, but `fansi` provides several functions that can -#' help the user get their desired outcome. `state_at_end` computes the active -#' state the end of a string, this can then be prepended onto the _input_ of -#' `fansi` functions so that they are aware of the active style at the beginning -#' of the string. Alternatively, one could use `close_state(state_at_end(...))` -#' and pre-pend that to the _output_ of `fansi` functions so they are unaffected -#' by preceding SGR. One could also just prepend "ESC[0m", but in some cases as +#' are appended to. There are no parameters to control what happens in this +#' case, but `fansi` provides functions that can help the user get the desired +#' behavior. `state_at_end` computes the active state the end of a string, +#' which can then be prepended onto the _input_ of `fansi` functions so that +#' they are aware of the active style at the beginning of the string. +#' Alternatively, one could use `close_state(state_at_end(...))` and pre-pend +#' that to the _output_ of `fansi` functions so they are unaffected by preceding +#' SGR. One could also just prepend "ESC[0m", but in some cases as #' described in [`?normalize_state`][normalize_state] that is sub-optimal. #' +#' @section Terminal Quirks: +#' +#' Some terminals (e.g. OS X terminal, ITerm2) will pre-paint the entirety of a +#' new line with the currently active background before writing the contents of +#' the line. If there is a non-default active background color, any unwritten +#' columns in the new line will keep the prior background color even if the new +#' line changes the background color. To avoid this be sure to use `terminate = +#' TRUE` or to manually terminate each line with e.g. "ESC[0m". The +#' problem manifests as: +#' +#' ``` +#' " " = default background +#' "#" = new background +#' ">" = start new background +#' "!" = restore default background +#' +#' +-----------+ +#' | abc\n | +#' |>###\n | +#' |!abc\n#####| <- trailing "#" after newline are from pre-paint +#' | abc | +#' +-----------+ +#' ``` +#' +#' The simplest way to avoid this problem is to split input strings by any +#' newlines they contain, and use `terminate = TRUE` (the default). +#' #' @section Encodings / UTF-8: #' #' `fansi` will convert any non-ASCII strings to UTF-8 before processing them, From a5904f13cd105e45faf9ed781d70317480278e7e Mon Sep 17 00:00:00 2001 From: brodieG Date: Wed, 20 Oct 2021 08:09:16 -0400 Subject: [PATCH 14/27] clarify docs --- src/normalize.c | 6 ++---- src/write.c | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/normalize.c b/src/normalize.c index 443c3f7f..da75c34d 100644 --- a/src/normalize.c +++ b/src/normalize.c @@ -23,9 +23,9 @@ * * Or computes the size required * - * @param state the start of the string. - * @param sgr (by ref) final state * @param buff if NULL, computes the size required, if not writes it. + * @param *state state by reference so that we can recover the changed state + * info from reading for use in the `carry` case. */ static int normalize( @@ -192,5 +192,3 @@ SEXP FANSI_normalize_state_list_ext( UNPROTECT(1); return res; } - - diff --git a/src/write.c b/src/write.c index ae3db0f0..b63c5793 100644 --- a/src/write.c +++ b/src/write.c @@ -31,8 +31,8 @@ * 2. Allocate the buffer with FANSI_size * 3. Re-run in write mode to write the buffer. * - * The functions accept a pointer to a FANSI_struct object. If the `.buff` - * member points to NULL, the functions ru in measure mode Otherwise, it runs in + * The functions accept a pointer to a FANSI_buff object. If the `.buff` member + * points to NULL, the functions run in measure mode. Otherwise, they run in * write mode. * * Here is an example implementation that uses a loop to iterate between measure @@ -53,6 +53,9 @@ * * FANSI_release_buff(&buff, 1); * + * NOTE: avoid using `R_alloc` in functions that use FANSI buffers, or in + * functions used by such functions (see "Buffer Allocation" below). + * * Buffers must be reset prior to the measure pass. Use FANSI_size_buff0 if you * know the size ahead of time and don't need the two pass measure/write * approach. @@ -60,14 +63,15 @@ * The key workhorses are the macros FANSI_W_COPY and FANSI_W_MCOPY which * roughly mimic the semantics of `strcpy` and `memcpy` respectively. Functions * that only use these functions to write to the buffer and accept the buffer by - * reference Ban then be used as `FANSI_W_fun1/2` are used above. + * reference can then be used as `FANSI_W_fun1/2` are used above. * * vvvvvvvv * !> DANGER len = 0; buff->buff = NULL; - buff->reset = 1; + buff->reset = 1; // Internal, only for _(reset|size)_buff } /* @@ -424,7 +428,7 @@ int FANSI_W_mcopy( error("Internal Error: exceeded target buffer size in _mcopy."); memcpy(buff->buff, tmp, (size_t) tmp_len); buff->buff += tmp_len; - *(buff->buff) = 0; // not necessary, but helps to debug + *(buff->buff) = 0; // as documented } else { FANSI_check_append(buff->len, tmp_len, err_msg, i); buff->len += tmp_len; From 81dca77ab7966e9094ea97896bd97db9edfbc2bc Mon Sep 17 00:00:00 2001 From: brodieG Date: Wed, 20 Oct 2021 08:09:47 -0400 Subject: [PATCH 15/27] initial 'bridge' implementation --- R/internal.R | 6 ++++ src/carry.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/fansi.h | 1 + src/init.c | 1 + 4 files changed, 95 insertions(+) diff --git a/R/internal.R b/R/internal.R index a78a76f5..1560ac28 100644 --- a/R/internal.R +++ b/R/internal.R @@ -56,6 +56,12 @@ check_enc <- function(x, i) .Call(FANSI_check_enc, x, as.integer(i)[1]) ctl_as_int <- function(x) .Call(FANSI_ctl_as_int, as.integer(x)) +## testing interface for bridging + +bridge <- function(end, restart, term.cap=getOption("fansi.term.cap")) { + VAL_IN_ENV(term.cap=term.cap) + .Call(FANSI_bridge_state, end, restart, TERM.CAP.INT) +} ## Common argument validation and conversion. Missing args okay. ## ## Converts common arguments to standardized forms if needed. diff --git a/src/carry.c b/src/carry.c index e42266fb..43829fa7 100644 --- a/src/carry.c +++ b/src/carry.c @@ -105,3 +105,90 @@ struct FANSI_state FANSI_carry_init( } +/* + * Compute Sequences to Transition from `end` to `restart` + * + * Very similar logic to used in `normalize`, intended to handle the + * `substr_ctl(..., carry=TRUE, terminate=FALSE)` case. + */ + +static int bridge( + struct FANSI_buff * buff, + struct FANSI_state end, + struct FANSI_state restart, + R_xlen_t i +) { + struct FANSI_sgr to_close = FANSI_sgr_setdiff(end.sgr, restart.sgr); + + // Any prior open styles not overriden by new one need to be closed + FANSI_W_sgr_close(buff, to_close, 1, i); + + // Any newly open styles will need to be opened + struct FANSI_sgr to_open = FANSI_sgr_setdiff(restart.sgr, end.sgr); + FANSI_W_sgr(buff, to_open, 1, i); + + // Any changed URLs will need to be written (empty URL acts as a closer + // so simpler than with SGR). + if(FANSI_url_comp(end.url, restart.url)) + FANSI_W_url(buff, restart.url, 1, i); + + return buff->len; +} + +SEXP FANSI_bridge_state_ext(SEXP end, SEXP restart, SEXP term_cap) { + if(TYPEOF(end) != STRSXP) + error("Internal Error: `end` must be character vector"); // nocov + if(TYPEOF(restart) != STRSXP) + error("Internal Error: `restart` must be character vector"); // nocov + if(XLENGTH(end) != XLENGTH(restart)) + error("Internal Error: `end` and `restart` unequal lengths"); // nocov + + struct FANSI_buff buff; + FANSI_INIT_BUFF(&buff); + + R_xlen_t x_len = XLENGTH(end); + SEXP res = PROTECT(allocVector(STRSXP, x_len)); // WRE docs this is init'ed + + // We'll already have warned about these at some point + SEXP warn = PROTECT(ScalarLogical(0)); + struct FANSI_state st_end, st_rst; + + for(R_xlen_t i = 0; i < x_len; ++i) { + FANSI_interrupt(i); + if(STRING_ELT(end, i) == NA_STRING || STRING_ELT(restart, i) == NA_STRING) + continue; + if( + getCharCE(STRING_ELT(end, i)) != CE_NATIVE || + getCharCE(STRING_ELT(restart, i)) != CE_NATIVE + ) { + // nocov start + error( + "Internal Error: non-native encoding at index[%jd].", + FANSI_ind(i) + ); + // nocov end + } + // state_init is inefficient + st_end = state_at_end(FANSI_state_init(end, warn, term_cap, i), i); + st_rst = state_at_end(FANSI_state_init(restart, warn, term_cap, i), i); + + FANSI_reset_buff(&buff); + + // Measure + int len = bridge(&buff, st_end, st_rst, i); + if(len < 0) continue; + + // Write + FANSI_size_buff(&buff); + bridge(&buff, st_end, st_rst, i); + + SEXP reschr = PROTECT(FANSI_mkChar(buff, CE_NATIVE, i)); + SET_STRING_ELT(res, i, reschr); + UNPROTECT(1); + } + FANSI_release_buff(&buff, 1); + UNPROTECT(2); + return res; +} + + diff --git a/src/fansi.h b/src/fansi.h index c7201ecc..a34451eb 100644 --- a/src/fansi.h +++ b/src/fansi.h @@ -443,6 +443,7 @@ Go to for a copy of the license. SEXP x, SEXP warn, SEXP term_cap, SEXP ctl, SEXP norm, SEXP carry ); SEXP FANSI_utf8_to_cp_ext(SEXP x); + SEXP FANSI_bridge_state_ext(SEXP end, SEXP restart, SEXP term_cap); // - Internal funs ----------------------------------------------------------- diff --git a/src/init.c b/src/init.c index 4a279466..1e8fe7e3 100644 --- a/src/init.c +++ b/src/init.c @@ -52,6 +52,7 @@ R_CallMethodDef callMethods[] = { {"size_buff_prot_test", (DL_FUNC) &FANSI_size_buff_prot_test, 0}, {"state_at_end", (DL_FUNC) &FANSI_state_at_end_ext, 6}, {"utf8_to_cp", (DL_FUNC) &FANSI_utf8_to_cp_ext, 1}, + {"bridge_state", (DL_FUNC) &FANSI_bridge_state_ext, 3}, {NULL, NULL, 0} }; From 588de86882a22096514f4a61172ee58ac2d468a1 Mon Sep 17 00:00:00 2001 From: brodieG Date: Wed, 20 Oct 2021 08:21:33 -0400 Subject: [PATCH 16/27] change bridge to re-open unterminated state Decided this would be more convenien to allow for manual termination, documents already reflect this modified behavior. --- src/carry.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/carry.c b/src/carry.c index 43829fa7..f93b967b 100644 --- a/src/carry.c +++ b/src/carry.c @@ -123,9 +123,8 @@ static int bridge( // Any prior open styles not overriden by new one need to be closed FANSI_W_sgr_close(buff, to_close, 1, i); - // Any newly open styles will need to be opened - struct FANSI_sgr to_open = FANSI_sgr_setdiff(restart.sgr, end.sgr); - FANSI_W_sgr(buff, to_open, 1, i); + // Open all new styles (an alternative would be to open only newly open ones) + FANSI_W_sgr(buff, restart.sgr, 1, i); // Any changed URLs will need to be written (empty URL acts as a closer // so simpler than with SGR). From eef67afdf5ee6450d4415e871c43bcbb0261a2ed Mon Sep 17 00:00:00 2001 From: brodieG Date: Wed, 20 Oct 2021 20:13:07 -0400 Subject: [PATCH 17/27] integrate normalize into 'bridge' --- R/fansi-package.R | 39 +++++++++++++++++++++++---------------- R/internal.R | 7 +++++-- R/substr2.R | 19 ++++++++++++++++--- src/carry.c | 19 +++++++++++++------ src/fansi.h | 2 +- src/state.c | 3 +++ 6 files changed, 61 insertions(+), 28 deletions(-) diff --git a/R/fansi-package.R b/R/fansi-package.R index e03ea524..f723a1e4 100644 --- a/R/fansi-package.R +++ b/R/fansi-package.R @@ -155,28 +155,28 @@ #' Additionally, a substring does not inherently contain all the information #' required to recreate its state as it appeared in the source string. The #' default `fansi` configuration terminates extracted substrings and prepends -#' original state to them so they present on a stand alone basis as they as part -#' of the original string. +#' original state to them so they present on a stand alone basis as they did as +#' part of the original string. #' -#' To allow state in substrings to affect subsequent strings they may be spliced -#' onto set `terminate = FALSE`. Generally you should use `terminate = TRUE` -#' unless you are willing to deal with the resulting mess (see "Terminal -#' Quirks") for the sake of fine control of state bleeding. +#' To allow state in substrings to affect subsequent strings that may be spliced +#' onto them set `terminate = FALSE`. Generally you should use `terminate = +#' TRUE` unless you are willing to deal with the resulting mess (see "Terminal +#' Quirks") in exchange for fine control of state bleeding. #' -#' Additionally, by default, `fansi` assumes that each element in an input -#' character vector is independent, but this is incorrect if the input is a -#' single document with each element a line in it. In that situation state from -#' each line should bleed into subsequent ones. Setting `carry = TRUE` enables -#' the "single document" interpretation. +#' By default, `fansi` assumes that each element in an input character vector is +#' independent, but this is incorrect if the input is a single document with +#' each element a line in it. In that situation state from each line should +#' bleed into subsequent ones. Setting `carry = TRUE` enables the "single +#' document" interpretation. #' #' For `terminate = FALSE` and `carry = TRUE`, `fansi` will re-open active #' state on each new element even if a terminal would naturally carry them #' over. This is to allow the user to manually terminate elements without -#' losing them on the next element. +#' losing carried state on the next element. #' -#' Finally, `fansi` strings will be affected by any active state in strings they -#' are appended to. There are no parameters to control what happens in this -#' case, but `fansi` provides functions that can help the user get the desired +#' `fansi` strings will be affected by any active state in strings they are +#' appended to. There are no parameters to control what happens in this case, +#' but `fansi` provides functions that can help the user get the desired #' behavior. `state_at_end` computes the active state the end of a string, #' which can then be prepended onto the _input_ of `fansi` functions so that #' they are aware of the active style at the beginning of the string. @@ -185,6 +185,10 @@ #' SGR. One could also just prepend "ESC[0m", but in some cases as #' described in [`?normalize_state`][normalize_state] that is sub-optimal. #' +#' If you intend to combine stateful `fansi` manipulated strings with your own, +#' it may be best to set `normalize = TRUE` for best compatibility (see +#' [`?normalize_state`][normalize_state].) +#' #' @section Terminal Quirks: #' #' Some terminals (e.g. OS X terminal, ITerm2) will pre-paint the entirety of a @@ -210,7 +214,10 @@ #' ``` #' #' The simplest way to avoid this problem is to split input strings by any -#' newlines they contain, and use `terminate = TRUE` (the default). +#' newlines they contain, and use `terminate = TRUE` (the default). A more +#' complex solution is to pad with spaces to the terminal window width before +#' emitting the newline to ensure the pre-paint is overpainted with the current +#' line's prevailing background color. #' #' @section Encodings / UTF-8: #' diff --git a/R/internal.R b/R/internal.R index 1560ac28..97edbdc6 100644 --- a/R/internal.R +++ b/R/internal.R @@ -58,9 +58,12 @@ ctl_as_int <- function(x) .Call(FANSI_ctl_as_int, as.integer(x)) ## testing interface for bridging -bridge <- function(end, restart, term.cap=getOption("fansi.term.cap")) { +bridge <- function( + end, restart, term.cap=getOption("fansi.term.cap"), + normalize=getOption('fansi.normalize', FALSE) +) { VAL_IN_ENV(term.cap=term.cap) - .Call(FANSI_bridge_state, end, restart, TERM.CAP.INT) + .Call(FANSI_bridge_state, end, restart, TERM.CAP.INT, normalize) } ## Common argument validation and conversion. Missing args okay. ## diff --git a/R/substr2.R b/R/substr2.R index 16a7cc84..d73b9ba7 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -361,7 +361,7 @@ substr_ctl_internal <- function( # in C given the current structure using ordered indices into each string. # Do before `unique` as this to equal strings may become different. - x.carry <- character(X.LEN) + x.carry <- character(length(x)) if(!is.na(carry)) { ends <- .Call( FANSI_state_at_end, x, warn, term.cap.int, ctl.int, normalize, carry @@ -377,6 +377,8 @@ substr_ctl_internal <- function( # We do this for each unique string in `x` as the indices must be incrementing # for each of them. + # x.scalar is likely needed for strsplit (but not sure, this is after the fact + # documentation) x.scalar <- length(x) == 1 x.u <- if(x.scalar) x else unique_chr(x) ids <- if(x.scalar) seq_along(s.s.valid) else seq_along(x) @@ -388,7 +390,10 @@ substr_ctl_internal <- function( e.start <- start[elems] - 1L e.stop <- stop[elems] e.ids <- ids[elems] - x.elems <- if(x.scalar) rep(x, length.out=elems.len) else x[elems] + x.elems <- if(x.scalar) + rep(x, length.out=elems.len) else x[elems] + x.carries <- if(x.scalar) + rep(x.carry, length.out=elems.len) else x.carry[elems] # note, for expediency we're currently assuming that there is no overlap # between starts and stops @@ -434,8 +439,16 @@ substr_ctl_internal <- function( else "" substring <- substr(x.elems[full], start.ansi[full], stop.ansi[full]) - tmp <- paste0(start.tag[full], substring) term.cap <- VALID.TERM.CAP[term.cap.int] + tmp <- paste0( + if(!terminate && !is.na(carry)) { + bridge( + x.carries[full], start.tag[full], term.cap=term.cap, + normalize=normalize + ) + } else start.tag[full], + substring + ) res[elems[full]] <- paste0( if(normalize) normalize_state(tmp, warn=FALSE, term.cap=term.cap) else tmp, diff --git a/src/carry.c b/src/carry.c index f93b967b..c4367e2f 100644 --- a/src/carry.c +++ b/src/carry.c @@ -116,32 +116,39 @@ static int bridge( struct FANSI_buff * buff, struct FANSI_state end, struct FANSI_state restart, + int normalize, R_xlen_t i ) { struct FANSI_sgr to_close = FANSI_sgr_setdiff(end.sgr, restart.sgr); // Any prior open styles not overriden by new one need to be closed - FANSI_W_sgr_close(buff, to_close, 1, i); + // One option is to always normalize the close, but ended up preferring to be + // consistent with the use of `normalize` as we can't actually know how the + // closed style was closed. + FANSI_W_sgr_close(buff, to_close, normalize, i); // Open all new styles (an alternative would be to open only newly open ones) - FANSI_W_sgr(buff, restart.sgr, 1, i); + FANSI_W_sgr(buff, restart.sgr, normalize, i); // Any changed URLs will need to be written (empty URL acts as a closer // so simpler than with SGR). if(FANSI_url_comp(end.url, restart.url)) - FANSI_W_url(buff, restart.url, 1, i); + FANSI_W_url(buff, restart.url, normalize, i); return buff->len; } -SEXP FANSI_bridge_state_ext(SEXP end, SEXP restart, SEXP term_cap) { +SEXP FANSI_bridge_state_ext(SEXP end, SEXP restart, SEXP term_cap, SEXP norm) { if(TYPEOF(end) != STRSXP) error("Internal Error: `end` must be character vector"); // nocov if(TYPEOF(restart) != STRSXP) error("Internal Error: `restart` must be character vector"); // nocov if(XLENGTH(end) != XLENGTH(restart)) error("Internal Error: `end` and `restart` unequal lengths"); // nocov + if(TYPEOF(norm) != LGLSXP || XLENGTH(norm) != 1) + error("Argument `normalize` should be TRUE or FALSE."); // nocov + int normalize = asInteger(norm); struct FANSI_buff buff; FANSI_INIT_BUFF(&buff); @@ -174,12 +181,12 @@ SEXP FANSI_bridge_state_ext(SEXP end, SEXP restart, SEXP term_cap) { FANSI_reset_buff(&buff); // Measure - int len = bridge(&buff, st_end, st_rst, i); + int len = bridge(&buff, st_end, st_rst, normalize, i); if(len < 0) continue; // Write FANSI_size_buff(&buff); - bridge(&buff, st_end, st_rst, i); + bridge(&buff, st_end, st_rst, normalize, i); SEXP reschr = PROTECT(FANSI_mkChar(buff, CE_NATIVE, i)); SET_STRING_ELT(res, i, reschr); diff --git a/src/fansi.h b/src/fansi.h index a34451eb..c71ebd75 100644 --- a/src/fansi.h +++ b/src/fansi.h @@ -443,7 +443,7 @@ Go to for a copy of the license. SEXP x, SEXP warn, SEXP term_cap, SEXP ctl, SEXP norm, SEXP carry ); SEXP FANSI_utf8_to_cp_ext(SEXP x); - SEXP FANSI_bridge_state_ext(SEXP end, SEXP restart, SEXP term_cap); + SEXP FANSI_bridge_state_ext(SEXP end, SEXP restart, SEXP term_cap, SEXP norm); // - Internal funs ----------------------------------------------------------- diff --git a/src/state.c b/src/state.c index 0c20834a..45ce56ef 100644 --- a/src/state.c +++ b/src/state.c @@ -349,6 +349,9 @@ int FANSI_sgr_comp(struct FANSI_sgr target, struct FANSI_sgr current) { * * This is so that we can then generate the closing SGRs required to transition * from one state to the other (used for diff). + * + * A color change is not considered a missing style as the new color would just + * ovewrite the old. */ struct FANSI_sgr FANSI_sgr_setdiff(struct FANSI_sgr old, struct FANSI_sgr new) { struct FANSI_sgr res = { From ea7d45f7c2a3f734ccc975e21551a35841fbd833 Mon Sep 17 00:00:00 2001 From: brodieG Date: Wed, 20 Oct 2021 22:07:23 -0400 Subject: [PATCH 18/27] don't terminate end of 'substr_ctl<-' --- R/substr2.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/substr2.R b/R/substr2.R index d73b9ba7..4bd118b2 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -282,6 +282,8 @@ substr2_ctl <- function( term.cap.int=TERM.CAP.INT, ctl.int=CTL.INT, normalize=normalize, carry=carry, terminate=terminate ), + # This last one should not terminate ever as it preserves whatever the + # original string did. substr_ctl_internal( x, pmin(stop + 1L, start + ncv), rep(.Machine[['integer.max']], X.LEN), type.int=TYPE.INT, @@ -289,7 +291,7 @@ substr2_ctl <- function( round.stop=round.b == 'stop' || round.b == 'both', tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, term.cap.int=TERM.CAP.INT, ctl.int=CTL.INT, normalize=normalize, - carry=carry, terminate=terminate + carry=carry, terminate=FALSE ) ) x From fdfc7aed4126faeac1353dd392a1feda547201c2 Mon Sep 17 00:00:00 2001 From: brodieG Date: Sat, 23 Oct 2021 11:09:48 -0400 Subject: [PATCH 19/27] add 'fwl' utility fun --- R/misc.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/R/misc.R b/R/misc.R index 63c421f5..968d939b 100644 --- a/R/misc.R +++ b/R/misc.R @@ -495,4 +495,17 @@ size_buff_prot_test <- function() { res[['self']] <- match(res[['self']], addresses) res } +#' Display Strings to Terminal +#' +#' Shortcut for [`writeLines`] with an additional terminating "ESC[0m". +#' +#' @keywords internal +#' @export +#' @param ... character vectors to display. +#' @param end character what to output after the primary inputs. +#' @return whatever writeLines returns + +fwl <- function(..., end='\033[0m') { + writeLines(c(..., end)) +} From ef01a6aa41d406da970c66cc9b5b575e70a058c0 Mon Sep 17 00:00:00 2001 From: brodieG Date: Sat, 23 Oct 2021 11:16:03 -0400 Subject: [PATCH 20/27] always bridge state if carry=TRUE --- R/substr2.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/substr2.R b/R/substr2.R index 4bd118b2..b9bfc849 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -443,7 +443,7 @@ substr_ctl_internal <- function( substring <- substr(x.elems[full], start.ansi[full], stop.ansi[full]) term.cap <- VALID.TERM.CAP[term.cap.int] tmp <- paste0( - if(!terminate && !is.na(carry)) { + if(!is.na(carry)) { bridge( x.carries[full], start.tag[full], term.cap=term.cap, normalize=normalize From 3225559adac241771197e4116ed4b011e221088e Mon Sep 17 00:00:00 2001 From: brodieG Date: Sat, 23 Oct 2021 12:53:17 -0400 Subject: [PATCH 21/27] remove duplicate warnings --- R/substr2.R | 3 +++ src/state.c | 7 +++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/substr2.R b/R/substr2.R index b9bfc849..4106d331 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -355,6 +355,7 @@ substr_ctl_internal <- function( if(tabs.as.spaces) x <- .Call(FANSI_tabs_as_spaces, x, tab.stops, warn, term.cap.int, ctl.int) + warn <- warn && !tabs.as.spaces res <- character(length(x)) s.s.valid <- stop >= start & stop @@ -371,6 +372,8 @@ substr_ctl_internal <- function( x.carry <- c(carry, ends[-length(ends)]) x <- paste0(x.carry, x) } + warn <- warn && is.na(carry) + # We compute style at each start and stop position by getting all those # positions into a vector and then ordering them by position, keeping track of # original order and whether they are starting or ending positions (affects diff --git a/src/state.c b/src/state.c index 45ce56ef..159befc5 100644 --- a/src/state.c +++ b/src/state.c @@ -223,7 +223,6 @@ static struct FANSI_state_pair state_at_pos2( state_res = state_restart = state; int pos_new, pos_restart; int pos_ini = pos; - int warn_max = 0; int os = overshoot; pos_new = pos_restart = type ? state.pos_width : state.pos_raw; @@ -234,7 +233,6 @@ static struct FANSI_state_pair state_at_pos2( ) { pos_restart = pos_new; state = FANSI_read_next(state, i, 1); - warn_max = warn_max < state.warn ? state.warn : warn_max; pos_new = type ? state.pos_width : state.pos_raw; // Last spot that's safe to restart from either as start or stop @@ -253,7 +251,7 @@ static struct FANSI_state_pair state_at_pos2( } } // Avoid potential double warning next time we read - state_res.warn = state_restart.warn = warn_max; + state_restart.warn = state.warn = state.warn; return (struct FANSI_state_pair){.cur=state_res, .restart=state_restart}; } @@ -596,7 +594,8 @@ SEXP FANSI_state_at_pos_ext( error("Internal Error: `pos` must be sorted %d %d.", pos_i[i], pos_prev); // nocov end - // index could be int or double + // index could be int or double (should we just coerce to double, assuming + // 64 bit IEEE754 double?) R_xlen_t id_i; id_i = (R_xlen_t)(TYPEOF(ids) == INTSXP ? id_i_p.i[i] : id_i_p.d[i]) - 1; From 0e5343eaa9ac193d1cf4fc7a7bd4a9d8e21063c8 Mon Sep 17 00:00:00 2001 From: brodieG Date: Sat, 23 Oct 2021 20:42:34 -0400 Subject: [PATCH 22/27] more warning fixups --- R/substr2.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/substr2.R b/R/substr2.R index 4106d331..f7b69b94 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -263,13 +263,13 @@ substr2_ctl <- function( value <- rep_len(enc2utf8(as.character(value)), X.LEN) ncv <- nchar_ctl(value, type=type, ctl=ctl, warn=warn) - # Actual replacement operation as substr/paste + # All warnings should have been emitted by `nchar_ctl` above x[] <- paste0( substr_ctl_internal( x, rep(1L, X.LEN), start - 1L, type.int=TYPE.INT, round.start=round.a == 'start' || round.a == 'both', round.stop=round.a == 'stop' || round.a == 'both', - tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, + tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=FALSE, term.cap.int=TERM.CAP.INT, ctl.int=CTL.INT, normalize=normalize, carry=carry, terminate=terminate ), @@ -278,7 +278,7 @@ substr2_ctl <- function( type.int=TYPE.INT, round.start=round == 'start' || round == 'both', round.stop=round == 'stop' || round == 'both', - tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, + tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=FALSE, term.cap.int=TERM.CAP.INT, ctl.int=CTL.INT, normalize=normalize, carry=carry, terminate=terminate ), @@ -289,7 +289,7 @@ substr2_ctl <- function( rep(.Machine[['integer.max']], X.LEN), type.int=TYPE.INT, round.start=round.b == 'start' || round.b == 'both', round.stop=round.b == 'stop' || round.b == 'both', - tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=warn, + tabs.as.spaces=tabs.as.spaces, tab.stops=tab.stops, warn=FALSE, term.cap.int=TERM.CAP.INT, ctl.int=CTL.INT, normalize=normalize, carry=carry, terminate=FALSE ) From 949bbc6d6976f5417760e1c6aa984eeaabed98a4 Mon Sep 17 00:00:00 2001 From: brodieG Date: Sat, 23 Oct 2021 21:39:36 -0400 Subject: [PATCH 23/27] fixup switch to UC for VAL_IN_ENV --- R/sgr.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/sgr.R b/R/sgr.R index cce23d25..71282bd1 100644 --- a/R/sgr.R +++ b/R/sgr.R @@ -113,7 +113,7 @@ has_ctl <- function(x, ctl='all', warn=getOption('fansi.warn'), which) { } ## modifies / creates NEW VARS in fun env VAL_IN_ENV(x=x, ctl=ctl, warn=warn) - if(length(ctl.int)) { + if(length(CTL.INT)) { .Call(FANSI_has_csi, x, CTL.INT, warn) } else rep(FALSE, length(x)) } From 22c059f096059da1288207d38040f2a0d14aa76b Mon Sep 17 00:00:00 2001 From: brodieG Date: Sat, 23 Oct 2021 21:40:18 -0400 Subject: [PATCH 24/27] doc --- NAMESPACE | 1 + R/fansi-package.R | 20 ++++--- R/substr2.R | 76 +++++++++++++++++++------- man/fansi.Rd | 119 +++++++++++++++++++++++++++-------------- man/fwl.Rd | 20 +++++++ man/normalize_state.Rd | 17 +++--- man/sgr_to_html.Rd | 17 +++--- man/state_at_end.Rd | 17 +++--- man/strsplit_ctl.Rd | 21 ++++---- man/strsplit_sgr.Rd | 21 ++++---- man/strtrim_ctl.Rd | 21 ++++---- man/strtrim_sgr.Rd | 21 ++++---- man/strwrap_ctl.Rd | 21 ++++---- man/strwrap_sgr.Rd | 21 ++++---- man/substr_ctl.Rd | 72 ++++++++++++++++++------- man/substr_sgr.Rd | 21 ++++---- man/to_html.Rd | 17 +++--- 17 files changed, 349 insertions(+), 174 deletions(-) create mode 100644 man/fwl.Rd diff --git a/NAMESPACE b/NAMESPACE index f75d8b9c..8ae17b8d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export("substr2_ctl<-") export("substr_ctl<-") export(close_state) export(fansi_lines) +export(fwl) export(has_ctl) export(has_sgr) export(html_code_block) diff --git a/R/fansi-package.R b/R/fansi-package.R index f723a1e4..24e7d87b 100644 --- a/R/fansi-package.R +++ b/R/fansi-package.R @@ -132,8 +132,10 @@ #' are output, we focus on minimizing the changes to rendered output, not #' necessarily the specific SGR sequences used to produce it. To maximize the #' odds of getting stable SGR output use [`normalize_state`] and set `term.cap` -#' to a specific set of capabilities. In general it is likely best not to rely -#' on the exact SGR encoding of `fansi` output. +#' to a specific set of capabilities. +#' +#' **In general it is likely best not to rely on the exact SGR encoding of +#' `fansi` output, particularly in tests.** #' #' Note that `width` calculations may also change across R versions, locales, #' etc. (see "Encodings / UTF-8" below). @@ -169,10 +171,16 @@ #' bleed into subsequent ones. Setting `carry = TRUE` enables the "single #' document" interpretation. #' -#' For `terminate = FALSE` and `carry = TRUE`, `fansi` will re-open active -#' state on each new element even if a terminal would naturally carry them -#' over. This is to allow the user to manually terminate elements without -#' losing carried state on the next element. +#' To most closely approximate what `writeLines(x)` produces on your terminal, +#' where `x` is a stateful string, use `writeLines(fansi_fun(x, carry=TRUE, +#' terminate=FALSE))`. `fansi_fun` is a stand-in for any of the `fansi` string +#' manipulation functions. Note that even with a "null-op" such as +#' `substr_ctl(x, 1, nchar_ctl(x), carry=TRUE, terminate=FALSE)` the output +#' control sequences may not match the input ones, but the output _should_ look +#' the same if displayed to the terminal. With these settings `fansi` will +#' re-open active state on each new element even if a terminal would naturally +#' carry them over. This is to allow the user to manually terminate elements +#' without losing carried state on the next element. #' #' `fansi` strings will be affected by any active state in strings they are #' appended to. There are no parameters to control what happens in this case, diff --git a/R/substr2.R b/R/substr2.R index f7b69b94..ac098309 100644 --- a/R/substr2.R +++ b/R/substr2.R @@ -47,12 +47,30 @@ #' directly from Gábor Csárdi's `crayon` package, although the implementation of #' the calculation is different. #' -#' Replacement functions are implemented as two substring operations to select -#' the beginning and end of the final string, and a `paste` operation to stick -#' all the pieces back together. The `carry` parameter is applied separately to -#' the `value` and to the `x` parameter. Styles in `value` will only carry to -#' substrings in the result that were originally part of `value`, and vice -#' versa. +#' Replacement functions are implemented as three substring operations, so: +#' ``` +#' x <- "ABC" +#' y <- "_." +#' substr_ctl(x, 2, 2, ...) <- y +#' ``` +#' Is treated roughly as: +#' ``` +#' x <- paste0( +#' substr(x, 1, 1, ...), +#' substr(y, 1, 1, ...), +#' substr(x, 3, 3, terminate=FALSE, ...) +#' ) +#' ``` +#' Except for the `terminate` parameter for the trailing substring, all other +#' parameters are passed from `substr_ctl<-` to the internal substring calls. +#' If you wish for the whole return value to be terminated you must manually add +#' terminating sequences. `substr_ctl` refrains from doing so to maintain the +#' illusion of a string modified in place. +#' +#' Another implication of the three substring approach is that the `carry` +#' parameter causes state to carry within the original string and the +#' replacement values independently, as if they were columns of text cut from +#' different pages and pasted together. #' #' @note Non-ASCII strings are converted to and returned in UTF-8 encoding. #' Width calculations will not work properly in R < 3.2.2. @@ -122,17 +140,20 @@ #' normalized strings will occupy more space (e.g. "\033[31;42m" becomes #' "\033[31m\033[42m"), but will work better with code that assumes each SGR #' code will be in its own escape as `crayon` does. -#' @param carry TRUE, FALSE, or a scalar string, controls whether active SGR -#' present at the end of an input vector element is carried into the next -#' vector element. If FALSE each vector element is interpreted as if there -#' were no active state when it begins. If character, then the active -#' state at the end of the `carry` string is carried into the first element of -#' `x`. See the "State Interactions" section of [`?fansi`][fansi] for -#' details. +#' @param carry TRUE, FALSE (default), or a scalar string, controls whether to +#' interpret the character vector as a "single document" (TRUE or string) or +#' as independent elements (FALSE). In "single document" mode, active state +#' at the end of an input element is considered active at the beginning of the +#' next vector element, simulating what happens with a document with active +#' state at the end of a line. If FALSE each vector element is interpreted as +#' if there were no active state when it begins. If character, then the +#' active state at the end of the `carry` string is carried into the first +#' element of `x`. See the "State Interactions" section of [`?fansi`][fansi] +#' for details. #' @param terminate TRUE (default) or FALSE whether substrings should have #' active state closed to avoid it bleeding into other strings they may be -#' prepended onto. See the "State Interactions" section of [`?fansi`][fansi] -#' for details. +#' prepended onto. This does not stop state from carrying if `carry = TRUE`. +#' See the "State Interactions" section of [`?fansi`][fansi] for details. #' @param value a character vector or object that can be coerced to such. #' @return a character vector of the same length and with the same attributes as #' x (after possible coercion and re-encoding to UTF-8). @@ -150,8 +171,8 @@ #' substr2_ctl(cn.string, 2, 3, type='width', round='stop') #' #' ## We can specify which escapes are considered special: -#' substr_ctl("\033[31mhello\tworld", 1, 6, ctl='sgr') -#' substr_ctl("\033[31mhello\tworld", 1, 6, ctl=c('all', 'c0')) +#' substr_ctl("\033[31mhello\tworld", 1, 6, ctl='sgr', warn=FALSE) +#' substr_ctl("\033[31mhello\tworld", 1, 6, ctl=c('all', 'c0'), warn=FALSE) #' #' ## `carry` allows SGR to carry from one element to the next #' substr_ctl(c("\033[33mhello", "world"), 1, 3) @@ -159,9 +180,24 @@ #' substr_ctl(c("\033[33mhello", "world"), 1, 3, carry="\033[44m") #' #' ## We can omit the termination -#' bleed <- substr_ctl(c("\033[41hello", "world"), 1, 3, terminate=FALSE) -#' \dontrun{writeLines(bleed)} # Style will bleed out of string -#' writeLines("\033[m") # Stop bleeding if needed +#' bleed <- substr_ctl(c("\033[41mhello", "world"), 1, 3, terminate=FALSE) +#' writeLines(bleed) # Style will bleed out of string +#' end <- "\033[0m\n" +#' writeLines(end) # Stanch bleeding +#' +#' ## Replacement functions +#' x0<- x1 <- x2 <- x3 <- c("\033[42mABC", "\033[34mDEF") +#' substr_ctl(x1, 2, 2) <- "_" +#' substr_ctl(x2, 2, 2) <- "\033[m_" +#' substr_ctl(x3, 2, 2) <- "\033[45m_" +#' writeLines(c(x0, end, x1, end, x2, end, x3, end)) +#' +#' ## With `carry = TRUE` strings look like original +#' x0<- x1 <- x2 <- x3 <- c("\033[42mABC", "\033[34mDEF") +#' substr_ctl(x0, 2, 2, carry=TRUE) <- "_" +#' substr_ctl(x1, 2, 2, carry=TRUE) <- "\033[m_" +#' substr_ctl(x2, 2, 2, carry=TRUE) <- "\033[45m_" +#' writeLines(c(x0, end, x1, end, x2, end, x3, end)) substr_ctl <- function( x, start, stop, diff --git a/man/fansi.Rd b/man/fansi.Rd index d0019e86..b9e4c894 100644 --- a/man/fansi.Rd +++ b/man/fansi.Rd @@ -11,9 +11,9 @@ the effects of some ANSI X3.64 (a.k.a. ECMA-48, ISO-6429) control sequences. \section{Control Characters and Sequences}{ -Control characters and sequences are non-printing inline characters that can -be used to modify terminal display and behavior, for example by changing text -color or cursor position. +Control characters and sequences are non-printing inline characters or +sequences initiated by them that can be used to modify terminal display and +behavior, for example by changing text color or cursor position. We will refer to X3.64/ECMA-48/ISO-6429 control characters and sequences as "\emph{Control Sequences}" hereafter. @@ -44,17 +44,17 @@ characters, but \code{fansi} will (incorrectly) treat them as if they were two characters long. There are many more unimplemented ECMA-48 specifications. -In theory it is possible to encode CSI sequenes with a single byte +In theory it is possible to encode CSI sequences with a single byte introducing character in the 0x40-0x5F range instead of the traditional "ESC[". Since this is rare and it conflicts with UTF-8 encoding, \code{fansi} does not support it. The special treatment of \emph{Control Sequences} is to compute their display/character width as zero. For the SGR subset of the CSI sequences and -OSC-anchored URLs,, \code{fansi} will also parse, interpret, and reapply the text -the sequences as needed. Whether a particular type of \emph{Control Sequence} is -treated specially can be specified via the \code{ctl} parameter to the \code{fansi} -functions that have it. +OSC-anchored URLs, \code{fansi} will also parse, interpret, and reapply the +sequences to the text as needed. Whether a particular type of \emph{Control +Sequence} is treated specially can be specified via the \code{ctl} parameter to +the \code{fansi} functions that have it. } \section{CSI SGR Control Sequences}{ @@ -124,9 +124,11 @@ same as replacement (e.g. if you have a color active and pick another one). While we try to minimize changes across \code{fansi} versions in how SGR sequences are output, we focus on minimizing the changes to rendered output, not necessarily the specific SGR sequences used to produce it. To maximize the -odds of getting stable SGR output use \code{\link{normalize_state}} and set \code{term.cap} to -a specific set of capabilities. In general it is likely best not to rely on -the exact SGR encoding of \code{fansi} output. +odds of getting stable SGR output use \code{\link{normalize_state}} and set \code{term.cap} +to a specific set of capabilities. + +\strong{In general it is likely best not to rely on the exact SGR encoding of +\code{fansi} output, particularly in tests.} Note that \code{width} calculations may also change across R versions, locales, etc. (see "Encodings / UTF-8" below). @@ -148,36 +150,75 @@ or terminates them as needed. The cumulative nature of state as specified by SGR or OSC-anchored URLs means -that SGR in strings that are spliced will interact with each other. +that unterminated strings that are spliced will interact with each other. Additionally, a substring does not inherently contain all the information -required to recreate its state as it appeared in the source string. - -One form of interaction to consider is how a character vector provided to -\code{fansi} functions affect itself. By default, \code{fansi} assumes that each -element in an input character vector is independent, but this is incorrect if -the input is a single document with each element a line in it. In that -situation state from each line should bleed into subsequent ones. Setting -\code{carry = TRUE} enables the "single document" interpretation. - -Another form of interaction is when substrings produced by \code{fansi} are -spliced with or into other substrings. By default \code{fansi} automatically -terminates substrings it produces if they contain active formats or URLs. -This prevents the state bleeding into external strings, which is useful e.g. -when arranging text in columns. We can allow the state to bleed into -appended strings by setting \code{terminate = FALSE}. \code{carry} is unaffected by -\code{terminate} as \code{fansi} records the ending SGR state prior to termination -internally. - -Finally, \code{fansi} strings will be affected by any active state in strings they -are appended to. There are no parameters to control what happens -automatically in this case, but \code{fansi} provides several functions that can -help the user get their desired outcome. \code{state_at_end} computes the active -state the end of a string, this can then be prepended onto the \emph{input} of -\code{fansi} functions so that they are aware of the active style at the beginning -of the string. Alternatively, one could use \code{close_state(state_at_end(...))} -and pre-pend that to the \emph{output} of \code{fansi} functions so they are unaffected -by preceding SGR. One could also just prepend "ESC[0m", but in some cases as +required to recreate its state as it appeared in the source string. The +default \code{fansi} configuration terminates extracted substrings and prepends +original state to them so they present on a stand alone basis as they did as +part of the original string. + +To allow state in substrings to affect subsequent strings that may be spliced +onto them set \code{terminate = FALSE}. Generally you should use \code{terminate = TRUE} unless you are willing to deal with the resulting mess (see "Terminal +Quirks") in exchange for fine control of state bleeding. + +By default, \code{fansi} assumes that each element in an input character vector is +independent, but this is incorrect if the input is a single document with +each element a line in it. In that situation state from each line should +bleed into subsequent ones. Setting \code{carry = TRUE} enables the "single +document" interpretation. + +To most closely approximate what \code{writeLines(x)} produces on your terminal, +where \code{x} is a stateful string, use \code{writeLines(fansi_fun(x, carry=TRUE, terminate=FALSE))}. \code{fansi_fun} is a stand-in for any of the \code{fansi} string +manipulation functions. Note that even with a "null-op" such as +\code{substr_ctl(x, 1, nchar_ctl(x), carry=TRUE, terminate=FALSE)} the output +control sequences may not match the input ones, but the output \emph{should} look +the same if displayed to the terminal. With these settings \code{fansi} will +re-open active state on each new element even if a terminal would naturally +carry them over. This is to allow the user to manually terminate elements +without losing carried state on the next element. + +\code{fansi} strings will be affected by any active state in strings they are +appended to. There are no parameters to control what happens in this case, +but \code{fansi} provides functions that can help the user get the desired +behavior. \code{state_at_end} computes the active state the end of a string, +which can then be prepended onto the \emph{input} of \code{fansi} functions so that +they are aware of the active style at the beginning of the string. +Alternatively, one could use \code{close_state(state_at_end(...))} and pre-pend +that to the \emph{output} of \code{fansi} functions so they are unaffected by preceding +SGR. One could also just prepend "ESC[0m", but in some cases as described in \code{\link[=normalize_state]{?normalize_state}} that is sub-optimal. + +If you intend to combine stateful \code{fansi} manipulated strings with your own, +it may be best to set \code{normalize = TRUE} for best compatibility (see +\code{\link[=normalize_state]{?normalize_state}}.) +} + +\section{Terminal Quirks}{ + + +Some terminals (e.g. OS X terminal, ITerm2) will pre-paint the entirety of a +new line with the currently active background before writing the contents of +the line. If there is a non-default active background color, any unwritten +columns in the new line will keep the prior background color even if the new +line changes the background color. To avoid this be sure to use \code{terminate = TRUE} or to manually terminate each line with e.g. "ESC[0m". The +problem manifests as:\preformatted{" " = default background +"#" = new background +">" = start new background +"!" = restore default background + ++-----------+ +| abc\\n | +|>###\\n | +|!abc\\n#####| <- trailing "#" after newline are from pre-paint +| abc | ++-----------+ +} + +The simplest way to avoid this problem is to split input strings by any +newlines they contain, and use \code{terminate = TRUE} (the default). A more +complex solution is to pad with spaces to the terminal window width before +emitting the newline to ensure the pre-paint is overpainted with the current +line's prevailing background color. } \section{Encodings / UTF-8}{ diff --git a/man/fwl.Rd b/man/fwl.Rd new file mode 100644 index 00000000..0839a76b --- /dev/null +++ b/man/fwl.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{fwl} +\alias{fwl} +\title{Display Strings to Terminal} +\usage{ +fwl(..., end = "\\033[0m") +} +\arguments{ +\item{...}{character vectors to display.} + +\item{end}{character what to output after the primary inputs.} +} +\value{ +whatever writeLines returns +} +\description{ +Shortcut for \code{\link{writeLines}} with an additional terminating "ESC[0m". +} +\keyword{internal} diff --git a/man/normalize_state.Rd b/man/normalize_state.Rd index b0118e68..67b50202 100644 --- a/man/normalize_state.Rd +++ b/man/normalize_state.Rd @@ -31,13 +31,16 @@ terminal capabilities. See \code{\link{term_cap_test}} for details. "all" behav it does for the \code{ctl} parameter: "all" combined with any other value means all terminal capabilities except that one.} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} } \value{ \code{x}, with all SGRs normalized. diff --git a/man/sgr_to_html.Rd b/man/sgr_to_html.Rd index e8218f90..3ac2186a 100644 --- a/man/sgr_to_html.Rd +++ b/man/sgr_to_html.Rd @@ -64,13 +64,16 @@ mapped. other 8-bit colors are mapped. }} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} } \value{ A character vector of the same length as \code{x} with all escape diff --git a/man/state_at_end.Rd b/man/state_at_end.Rd index 461b22f3..e7931127 100644 --- a/man/state_at_end.Rd +++ b/man/state_at_end.Rd @@ -45,13 +45,16 @@ normalized strings will occupy more space (e.g. "\033[31;42m" becomes "\033[31m\033[42m"), but will work better with code that assumes each SGR code will be in its own escape as \code{crayon} does.} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} } \value{ character vector same length as \code{x}. diff --git a/man/strsplit_ctl.Rd b/man/strsplit_ctl.Rd index 58e5ee89..621ceb53 100644 --- a/man/strsplit_ctl.Rd +++ b/man/strsplit_ctl.Rd @@ -87,18 +87,21 @@ normalized strings will occupy more space (e.g. "\033[31;42m" becomes "\033[31m\033[42m"), but will work better with code that assumes each SGR code will be in its own escape as \code{crayon} does.} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} \item{terminate}{TRUE (default) or FALSE whether substrings should have active state closed to avoid it bleeding into other strings they may be -prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} -for details.} +prepended onto. This does not stop state from carrying if \code{carry = TRUE}. +See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for details.} } \value{ A list of the same length as \code{x}, the \code{i}-th element of which diff --git a/man/strsplit_sgr.Rd b/man/strsplit_sgr.Rd index 69f32555..25172869 100644 --- a/man/strsplit_sgr.Rd +++ b/man/strsplit_sgr.Rd @@ -66,18 +66,21 @@ normalized strings will occupy more space (e.g. "\033[31;42m" becomes "\033[31m\033[42m"), but will work better with code that assumes each SGR code will be in its own escape as \code{crayon} does.} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} \item{terminate}{TRUE (default) or FALSE whether substrings should have active state closed to avoid it bleeding into other strings they may be -prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} -for details.} +prepended onto. This does not stop state from carrying if \code{carry = TRUE}. +See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for details.} } \value{ A list of the same length as \code{x}, the \code{i}-th element of which diff --git a/man/strtrim_ctl.Rd b/man/strtrim_ctl.Rd index d2369935..4b91f63b 100644 --- a/man/strtrim_ctl.Rd +++ b/man/strtrim_ctl.Rd @@ -67,18 +67,21 @@ normalized strings will occupy more space (e.g. "\033[31;42m" becomes "\033[31m\033[42m"), but will work better with code that assumes each SGR code will be in its own escape as \code{crayon} does.} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} \item{terminate}{TRUE (default) or FALSE whether substrings should have active state closed to avoid it bleeding into other strings they may be -prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} -for details.} +prepended onto. This does not stop state from carrying if \code{carry = TRUE}. +See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for details.} \item{tabs.as.spaces}{FALSE (default) or TRUE, whether to convert tabs to spaces. This can only be set to TRUE if \code{strip.spaces} is FALSE.} diff --git a/man/strtrim_sgr.Rd b/man/strtrim_sgr.Rd index 44256466..da8a1979 100644 --- a/man/strtrim_sgr.Rd +++ b/man/strtrim_sgr.Rd @@ -45,18 +45,21 @@ normalized strings will occupy more space (e.g. "\033[31;42m" becomes "\033[31m\033[42m"), but will work better with code that assumes each SGR code will be in its own escape as \code{crayon} does.} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} \item{terminate}{TRUE (default) or FALSE whether substrings should have active state closed to avoid it bleeding into other strings they may be -prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} -for details.} +prepended onto. This does not stop state from carrying if \code{carry = TRUE}. +See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for details.} \item{tabs.as.spaces}{FALSE (default) or TRUE, whether to convert tabs to spaces. This can only be set to TRUE if \code{strip.spaces} is FALSE.} diff --git a/man/strwrap_ctl.Rd b/man/strwrap_ctl.Rd index f452c7d6..a6ca20c5 100644 --- a/man/strwrap_ctl.Rd +++ b/man/strwrap_ctl.Rd @@ -111,18 +111,21 @@ normalized strings will occupy more space (e.g. "\033[31;42m" becomes "\033[31m\033[42m"), but will work better with code that assumes each SGR code will be in its own escape as \code{crayon} does.} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} \item{terminate}{TRUE (default) or FALSE whether substrings should have active state closed to avoid it bleeding into other strings they may be -prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} -for details.} +prepended onto. This does not stop state from carrying if \code{carry = TRUE}. +See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for details.} \item{wrap.always}{TRUE or FALSE (default), whether to hard wrap at requested width if no word breaks are detected within a line. If set to TRUE then diff --git a/man/strwrap_sgr.Rd b/man/strwrap_sgr.Rd index ea9cdc59..a1a2678a 100644 --- a/man/strwrap_sgr.Rd +++ b/man/strwrap_sgr.Rd @@ -89,18 +89,21 @@ normalized strings will occupy more space (e.g. "\033[31;42m" becomes "\033[31m\033[42m"), but will work better with code that assumes each SGR code will be in its own escape as \code{crayon} does.} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} \item{terminate}{TRUE (default) or FALSE whether substrings should have active state closed to avoid it bleeding into other strings they may be -prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} -for details.} +prepended onto. This does not stop state from carrying if \code{carry = TRUE}. +See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for details.} \item{wrap.always}{TRUE or FALSE (default), whether to hard wrap at requested width if no word breaks are detected within a line. If set to TRUE then diff --git a/man/substr_ctl.Rd b/man/substr_ctl.Rd index dc4b61e9..72aa9574 100644 --- a/man/substr_ctl.Rd +++ b/man/substr_ctl.Rd @@ -113,18 +113,21 @@ normalized strings will occupy more space (e.g. "\033[31;42m" becomes "\033[31m\033[42m"), but will work better with code that assumes each SGR code will be in its own escape as \code{crayon} does.} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} \item{terminate}{TRUE (default) or FALSE whether substrings should have active state closed to avoid it bleeding into other strings they may be -prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} -for details.} +prepended onto. This does not stop state from carrying if \code{carry = TRUE}. +See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for details.} \item{type}{character(1L) partial matching \code{c("chars", "width")}, although \code{type="width"} only works correctly with R >= 3.2.2. See @@ -188,10 +191,28 @@ with \code{\link[base:substr]{base::substr()}} to extract the string. This conc directly from Gábor Csárdi's \code{crayon} package, although the implementation of the calculation is different. -Replacement functions are implemented as two substring operations to select -the beginning and end of the final string, and a \code{paste} operation to stick -all the pieces back together. The \code{carry} parameter is applied separately to -the \code{value} and to the \code{x} parameter. +Replacement functions are implemented as three substring operations, so:\preformatted{x <- "ABC" +y <- "_." +substr_ctl(x, 2, 2, ...) <- y +} + +Is treated roughly as:\preformatted{x <- paste0( + substr(x, 1, 1, ...), + substr(y, 1, 1, ...), + substr(x, 3, 3, terminate=FALSE, ...) +) +} + +Except for the \code{terminate} parameter for the trailing substring, all other +parameters are passed from \verb{substr_ctl<-} to the internal substring calls. +If you wish for the whole return value to be terminated you must manually add +terminating sequences. \code{substr_ctl} refrains from doing so to maintain the +illusion of a string modified in place. + +Another implication of the three substring approach is that the \code{carry} +parameter causes state to carry within the original string and the +replacement values independently, as if they were columns of text cut from +different pages and pasted together. } \note{ Non-ASCII strings are converted to and returned in UTF-8 encoding. @@ -213,8 +234,8 @@ substr2_ctl(cn.string, 2, 3, type='width', round='start') substr2_ctl(cn.string, 2, 3, type='width', round='stop') ## We can specify which escapes are considered special: -substr_ctl("\033[31mhello\tworld", 1, 6, ctl='sgr') -substr_ctl("\033[31mhello\tworld", 1, 6, ctl=c('all', 'c0')) +substr_ctl("\033[31mhello\tworld", 1, 6, ctl='sgr', warn=FALSE) +substr_ctl("\033[31mhello\tworld", 1, 6, ctl=c('all', 'c0'), warn=FALSE) ## `carry` allows SGR to carry from one element to the next substr_ctl(c("\033[33mhello", "world"), 1, 3) @@ -222,9 +243,24 @@ substr_ctl(c("\033[33mhello", "world"), 1, 3, carry=TRUE) substr_ctl(c("\033[33mhello", "world"), 1, 3, carry="\033[44m") ## We can omit the termination -bleed <- substr_ctl(c("\033[41hello", "world"), 1, 3, terminate=FALSE) -\dontrun{writeLines(bleed)} # Style will bleed out of string -writeLines("\033[m") # Stop bleeding if needed +bleed <- substr_ctl(c("\033[41mhello", "world"), 1, 3, terminate=FALSE) +writeLines(bleed) # Style will bleed out of string +end <- "\033[0m\n" +writeLines(end) # Stanch bleeding + +## Replacement functions +x0<- x1 <- x2 <- x3 <- c("\033[42mABC", "\033[34mDEF") +substr_ctl(x1, 2, 2) <- "_" +substr_ctl(x2, 2, 2) <- "\033[m_" +substr_ctl(x3, 2, 2) <- "\033[45m_" +writeLines(c(x0, end, x1, end, x2, end, x3, end)) + +## With `carry = TRUE` strings look like original +x0<- x1 <- x2 <- x3 <- c("\033[42mABC", "\033[34mDEF") +substr_ctl(x0, 2, 2, carry=TRUE) <- "_" +substr_ctl(x1, 2, 2, carry=TRUE) <- "\033[m_" +substr_ctl(x2, 2, 2, carry=TRUE) <- "\033[45m_" +writeLines(c(x0, end, x1, end, x2, end, x3, end)) } \seealso{ \code{\link[=fansi]{?fansi}} for details on how \emph{Control Sequences} are diff --git a/man/substr_sgr.Rd b/man/substr_sgr.Rd index 52e01bf9..dfddb2e6 100644 --- a/man/substr_sgr.Rd +++ b/man/substr_sgr.Rd @@ -61,18 +61,21 @@ normalized strings will occupy more space (e.g. "\033[31;42m" becomes "\033[31m\033[42m"), but will work better with code that assumes each SGR code will be in its own escape as \code{crayon} does.} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} \item{terminate}{TRUE (default) or FALSE whether substrings should have active state closed to avoid it bleeding into other strings they may be -prepended onto. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} -for details.} +prepended onto. This does not stop state from carrying if \code{carry = TRUE}. +See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for details.} \item{type}{character(1L) partial matching \code{c("chars", "width")}, although \code{type="width"} only works correctly with R >= 3.2.2. See diff --git a/man/to_html.Rd b/man/to_html.Rd index c2651f6d..98caf2fa 100644 --- a/man/to_html.Rd +++ b/man/to_html.Rd @@ -64,13 +64,16 @@ mapped. other 8-bit colors are mapped. }} -\item{carry}{TRUE, FALSE, or a scalar string, controls whether active SGR -present at the end of an input vector element is carried into the next -vector element. If FALSE each vector element is interpreted as if there -were no active state when they begin. If character, then the active -state at the end of the \code{carry} string is carried into the first element of -\code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} for -details.} +\item{carry}{TRUE, FALSE (default), or a scalar string, controls whether to +interpret the character vector as a "single document" (TRUE or string) or +as independent elements (FALSE). In "single document" mode, active state +at the end of an input element is considered active at the beginning of the +next vector element, simulating what happens with a document with active +state at the end of a line. If FALSE each vector element is interpreted as +if there were no active state when it begins. If character, then the +active state at the end of the \code{carry} string is carried into the first +element of \code{x}. See the "State Interactions" section of \code{\link[=fansi]{?fansi}} +for details.} } \value{ A character vector of the same length as \code{x} with all escape From 3f143f1426df3cf89bed50210e2c5775b2977015 Mon Sep 17 00:00:00 2001 From: brodieG Date: Sat, 23 Oct 2021 21:50:50 -0400 Subject: [PATCH 25/27] NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 7bd87970..679e7cfc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ ### Features +* [#26](https://github.com/brodieG/fansi/issues/26) Replacement forms of + `substr_cl` (i.e `substr_ctl<-`). * [#58](https://github.com/brodieG/fansi/issues/58) Add support for OSC-anchored URLs. * [#66](https://github.com/brodieG/fansi/issues/66) Improved handling of From 595a6ff4a53ff4d4a4f18f0df7bcdf0a1aaaf80a Mon Sep 17 00:00:00 2001 From: brodieG Date: Sat, 23 Oct 2021 21:51:00 -0400 Subject: [PATCH 26/27] tests --- tests/run.R | 2 +- tests/unitizer/interactions.R | 26 +++++- tests/unitizer/interactions.unitizer/data.rds | Bin 7493 -> 8712 bytes tests/unitizer/overflow.unitizer/data.rds | Bin 8155 -> 8174 bytes tests/unitizer/substr.R | 81 ++++++++++++++++++ tests/unitizer/substr.unitizer/data.rds | Bin 12122 -> 16132 bytes tests/unitizer/url.R | 1 + 7 files changed, 108 insertions(+), 2 deletions(-) diff --git a/tests/run.R b/tests/run.R index cbbe415b..7ba0ad88 100644 --- a/tests/run.R +++ b/tests/run.R @@ -21,7 +21,7 @@ if(getRversion() < "3.2.2") { ) on.exit(old.opt) pattern <- "^[^.].*\\.[Rr]$" - # pattern <- "strip" + # pattern <- "substr" unitize_dir( 'unitizer', pattern=pattern, diff --git a/tests/unitizer/interactions.R b/tests/unitizer/interactions.R index fd901d20..3544f620 100644 --- a/tests/unitizer/interactions.R +++ b/tests/unitizer/interactions.R @@ -14,6 +14,15 @@ unitizer_sect("substr", { substr_ctl(str.1, 2, 4) substr_ctl(str.1, 2, 4, carry=TRUE) substr_ctl(str.1, 2, 4, carry="\033[33m") + + str.2 <- c("\033[33mA\033[44mBCD", "ABCD", "\033[39mABCD") + substr_ctl(str.2, 2, 2) + substr_ctl(str.2, 2, 2, carry=TRUE) + substr_ctl(str.2, 2, 2, carry=TRUE, terminate=FALSE) + + ## End background should be kept + str.3 <- c("\033[35mA\033[42mB", "\033[49mCD") + substr_ctl(str.3, 2, 2, carry=TRUE, terminate=FALSE) }) wrp.0 <- c( "once upon \033[44ma time in a land far away over ", @@ -58,7 +67,7 @@ unitizer_sect("carry corner cases", { substr_ctl(character(), 2, 4, carry="\033[33m") substr_ctl(NA, 2, 4, carry="\033[33m") substr_ctl(environment(), 2, 4, carry="\033[33m") - substr_ctl("hello", carry=c("\033[33m", "\033[44m")) + substr_ctl("hello", 2, 4, carry=c("\033[33m", "\033[44m")) substr_ctl(str.0, 2, 4, carry=NA_character_) substr_ctl(str.0, 2, 4, carry=character()) @@ -97,3 +106,18 @@ unitizer_sect("terminate", { strtrim2_sgr(wrp.0, 20, terminate=FALSE) }) +unitizer_sect("bridge", { + fansi:::bridge("\033[42m", "\033[31m") + fansi:::bridge("\033[42m", "\033[31m", normalize=TRUE) + fansi:::bridge("", "\033[31m") + fansi:::bridge("\033[42m", "") + fansi:::bridge("\033[42m", "\033[42m") + + # this is unterminated URL + base.st <- '%s\033]8;%s;%s\033\\' + url <- "https://x.yz" + u0 <- sprintf(base.st, "", "", url) + + fansi:::bridge(paste0("\033[42m", u0), "\033[31m") + fansi:::bridge("\033[31m", paste0("\033[42m", u0)) +}) diff --git a/tests/unitizer/interactions.unitizer/data.rds b/tests/unitizer/interactions.unitizer/data.rds index 22399787b9b972ad2efe94c6829569971be4d7b3..480e28903552f31da17017f0bc0ff888d0b1930b 100644 GIT binary patch literal 8712 zcmZA6RZtwjwl3fq26y*ikl^kPgIfZ@-92bxC9Rp+}+*X9d7ond+Jo3 z{vQ8T-EUpJRzqTu0smc~6T=73<-h)r)y=~~Rwb8Vfne>L4+{8RgR=xn)0WKN2*d|G zrY3#3n$VJ>2s;rwlgt*|e<_?Xdpk_=9oS_TYcF0uvx;Aj&WO|dpRoJ;voDdDAQGb^ zq8+=B0dM2f837|h)8RBll7xDh`DR`V3``m0y|u4~016?%@_Z9cwFvzp$& z7zg{u)^A4`O^LQC2F5MBkuTw(LQ1CP1ts<8>SE-Q3L$6%SmmE!u)|rp5`u}plV@+J zlSStDS0bXT6=Q6TRF1V{M{(eJ`*Ow}Fx^%qjD**)Vu8Sv7&hGMG)G!|I@;=bb=sz~ zrmL6t`MUXJwA?n$ZulO?JynH1*2z0eoc2@LYq{#^_o3mpJX3%5vW>GONZNJvXWUAk zh>|UcT%mMynE6*9O&YOOquI|{(la9U#db}B4Vn}B76MhDmasmIc=xxgcjn%ur6cvI zo3>e1CdH8xU+b){HQ+{o+yO9puZU2{cS~4 zy=*Eirb2{-w$!FRt4!;rdAF&Yjq|~kaX@n|-hv<{g#2DancIpHaK#JvH~2>nK2j*Y z51?oA-IfIIro|3cke%?uoL zRE*pkn3=w2x41-x-2z7C>iPWoW{m+?)Ai4WPr|^DQCkl@*5PE$ks~{@^wZoUP44mh z$uRH2%x=8=uil$2-xc!#CF*(h4F}<4@D*|8<7QtN)C3;&-e_cO&KqgXtjIkjJg)@; z=szy#G}_B0Z88QN_ZWmqPoi_qWfwRKQC06EUrnKie`ZN&2tr{o=X<6S47F930sp$ay$iCNW$ zHlw#GV`kPpS7OD(aqbgCo1jgfI3Z?qd0Q9D&lAhzcD&iNOEtNA^|3c`n)XxEtn4Ey zBE#!KNugI9Br4I7>R-^fFywL3e@&c!pIZ4EVe9A_#UfJU0YSow{J~BJn={2DneKCq z=EG#9q|uee=(5eSd4|W@osaAj@wzg%K1bh&7?#RQi0Q?N4FuJLEs+qBZmMSrdZSq@K@0p3jJ$t<(0ON5Qd; z*LlX8`X}%4y`wy9mo39rd=*?M+F4LyFQ~w<5-U4Gk?zF_l8anRO_#0Jti4eb3~Sl! zG@e%xe&te~OWSTD;bP`UUHKKh^hg%KHIpRNLYg^AP0BN`>M3 zW@+hms*7G)h1I-*<$5|m_PPwG37I00ufzx6pibY0KF8%CYL2rx7ybEBqPOhB12{`- z_h)MFDNt`2Gt$rf>mOGa%UO$05Gfng0tLg&mdY@epr;MUXqM%VFTSc;n}M`|nlG!s zgd2+fh&zOYHU^{`+aHC3qYalRJ1Z?`E88YtE7-8Tj?#I{^CaxCBX9<>n8jvmpvyFb z$U0~`El)baHSATl286SS^!r^Qx@nZBH*FD3a`d0IE8ju+(bo|K$M==(|OD=DIk734;sHK0uBCYYkR(7VIC~{_Xw* zJ-`Y0KgIr#X)n}gYJA~=*cL2BS{q@xJBje8{Q1-j69_)0PA`YO(?HeT2%9-Oqxh(! zYu$hpOTyY;GW%nU+e0=o8a9*`h4m9EAjt`3 zIK{oN3>m(9uBf>mHZ)5*T)ZK(2=HNI{|KAHTD1I3Si*`Bl;3h&aKK!_2v_1t)=F_d7*-VIyu`UCHK~V2XWN#55 zHgjkQpO8B}J>9n;(9?ZH5_NvElV)2WVO>VA+DjZf(90XE^WN4ncqf~;djYrH5Kwc6 zRigr^p-*@*qp--=5?SHIK*JKo{9Rw?r-FFvc}qGUAK8>g_%R0B&H#9ehVN8L$i>=R zCwC@yZpE4{$XERHIm~R&@AF*2i^CvVHSa!}J%$fOe#A!Jj#9DT%EOdsh?$zoykb5+ z5jT#i3AEE%tl@fx}vaPMCLt9^FE#9%^ahh*xh<&8IBJO>qrwCH+2EU zQhPIy9q|$1#$0Ly_G6_%$(X^2h*ifR48QVzQZGuRet+MglE`=#dZ!9CQuigbuC@wP zZ>cmhotWZj9v0vnkK9BHG#Ei?J1OwdhpgGm?CZX5#17NDZ^m6^#hZr$KdVc$ zKcm=XZ7>kX_02lF^#;;4Z#aw(4wvqVXrQY!K5HZ0huaJ`XjM8fYx)|dqMUpByz9I| zab0s`XwUd2%a=233eB=(v+Wp|{aZxx(+{LuHZ^iTQDxZ(+Y)660oGKVnjg1h=&9*ti>?KDGVT3ryR}X;~S+M6z*!d?-RW zo!qVEp_3)MIQnLx#9fTTc3uLXEd8VHQ;c7iWe(-V7o(>00sIIY(oZpA>IG%hAfgvt z1+*KQ-v{(!ER1tD2lqX&lglMn+|blzn7ZEYPDQ1-{O!WeJW(}zO3an3PCuFpMe0D& zpTFMJVf}W>$?4(FGtYqfKSeyofo{HymYmh4JaT!0SNyw@D=FTa)GDdJSN^+;!QuykO{~PN(5qOga{6 z9Fi5!{84wQ$RfuK+anPlOdNUnks50%zTHkeizq~yN#xeawjYF704l_>)kn%183(eS zWe0dxmIM<$FFm=)hYXy<4m315a8{kbM2KUx=UR*F=3y(L7OdNUq8U=9salD9tzzM6 zu~F*O5oS@=VpfRq)Y+ZYB!tR!KT(nH^TA8S5LwtF{?@kWk7!EX*1*g;tD~XV7K48T z1_$iRy*T4uOYuoLR^|=vLQe~?xx#M z4h{P(Gzb91(IbY3M@GiRrldrrqZEUKqb5xRBS$G~Q3k;M3!=umC@G=PpnsLB9KZ{5 z??<%~E&O0in)Yfwvi#j=_>1NmB>vL^6ajF0U^DN`DTAN6g5|B9<_GO191 z8A`IVB_NbN*7X>ao#Y*y?GcDmp4M6r6{^aK+_Ca^LHwl64>09MSzn=*LmRB#7rJ3*lL!zuIVDP6%R8#< z4F;LK4fMUBW10M+dm^e0Za1mQ8T&VqlLm-|P(9Pb@0icat4Sklr*|eE5{F*d_31!A zTP}M_vP)LqNhhjWkMY6LBlyo9jXiCa!o^zMjKIdbcaAhDjvau_cJSx?NJizby$R4H zm#9}JV;=%mGvju$fOw6i611JTsfn2JG{Fd^1=k3}N!RpxaqKP?kajB0!U{_BAX8FN zs^+<>Uc&MzsL8KV9F=^)RlSN$xA1A7{|E`Y2Q!1FX@VXD!XDKDH!Bg3wIm8fGPo^*HBeg0G#plX#YJVxvL}3i{SI3deVmy(B5!ABbQeUsp0Iks*nVBa4w&JG9l zBrB-ghTpgCodQo)Rc)=G>njc2S;W3C^Tnu@dlr?D0iG6Bj|c+R0qd$$k8qvFaDX$u zps~*YqlIzuijZKR!}t1XEj8LEWCRE$SaMRlzKG7G&dnXpkCvSWMzFAD-A`zFWxfb` z19x6{Zv+U%FR2N_N`P^|_zS|gI$(SSaUTJ*z};@j*?wr6wekaF1re|;63$!}@k_I^ ze{{TPziy}&G>M!_VHLVzjL-KcME=KwT8xwUpM87(UUptB+wt!Y1s|$I5Gkmxs~FSb z+6MlNlttro!>s3UxbQagv&^Y1qBoHGh0^sVoGJ9bkX@3g6NNs+hUXV0%5vZL;6>Qk zAPas_s8l**XBBVq-}^Hne1@c}Ag*hypswqxFr{-P@2it7e^W!3u$W`vTBn(sfs`MX zN9+wR*Ic?=>S%bi(UMqI&}i5PUU-ilZq8rMB&p96f{sE)$c1jj?J}Y9`+SQK*Axm6 z)w-}F7@_t>WNJ9oa0};A@y>KzO(z)B;=75l7LI9?-Rh2>AL`N}@AAi@q5avrK8T8e zfl(-U1p98-WBPE#6&!0$Okn6>XuYFRr}v?=`P+Fb$FR?bjst;)?&Atq@xM|JG+z{d ziBC71>*N{9aCTZc9AT}5tHIATuQ|OD20A(yN3CdU4aN`No#m^tC0kP*5928&!pQI+ z!3y1JR`3rf!pVWw&{uc;SHm6lun?}`6}BA)(W|4*)KFmOf^ZFyUEfft}Z8iGX4#j_>x=_6Rg2KhwijD|6aN{6}{Jl zs+#Y0acFy|@o`ji;h3xraj3UTg<<6w( zNfr1hkTg-`K5}!di=QqjRazR;)oeERNpEFFHRNa^XH(=zhke?VLijkTThvzdehO}x zQ0n=`fZ?_2_r^30`fz+PLEvK~){E2S6zr+v__0pX%I6;G0kwfM;Q)+j5;>(vNm@(( zJ)Gq^6F)Wt6CONYQ=WU!Sz7>VsQ_szBR$UIX|Ct~K-zeJ5iI@iVh>DRBd2-w2wR1( zXKE`1#jxg^J;BxTp1DI*T;dx})&QXgd!uSGvcALMH*(Qe>WW(>Ts?nkA!a#ksMSsN zu+Q_hrPi=G)G~PRkUiBMQ$wp!bJi_waI=9+k!N;{8*b@{G}hRbWUo^85AMP!ymn-n znSV)>iDlV(5}H;6)HQfF#ujEfZm=?Iu){LKORuWeQDvNx26=sq@2aDO2@z?3Vp^L% zb_c1)g)FdX)XF63o!p^^X+zK|+0U}hlbX@`ITB1?XuC&4(T^I?g>z}U9rf*Sscw~? z6r=n%9H&jl7FrCKm`G+q*kE|T7A9tu3YE1DvqP#Tr-PRSI zT;HWNRF;9Mditw=2rPK|<^|MejxAD>gmI=QMqo(}jL&+b`z8dvq4mFUcfPIczPXY# zP57WqR^ms!{OAeo4ouwY$@};$wrrW)Hw!)Q0w|&c(6axzqGvkx1O~8EcZ|VxcxJB| z_XlAtc%&OM3zb2r5ak`s4}@yFQ^V&|!&}XP!|doKDG!is%h%s5?U6J7CU0<6Z{$^h zjSFuYh;IF!y3db!>u=kqDC=)4n{W3zfHyQLrky}m_P69@VtfEY&?a+`_ea2r7GSTM zwcpP2r9!%$u{iZ%;d$;Zun4M0d3-Gan+hgdF@JY7H9;1OEYs_D7LNPV?VMe`3KTb%|DFz(JL72utE1+qvpc&;{cAf*1IM%JwpJ&T_#JO(&`aP!8 zQ1;At@A6}7k>t2SNOlydinaFs=7xm? zo~X=>sH}bV*zZ`P47rp^X7hCYY z2jQobpRp?0amw=HC2us>!7LoK4k}Ap_NkuLflPQ95;I}$UQv3uO`8!YT@)qz&@4T5 zuJ<#jOr>I*^oUEo_V&AOWlp*mGzFT07J^WSu2w-46lY zx)EW|A;2`(ci|ujxAkzk^>4@Rb+b#6A02Md*{c89E)S4UCNurlq_~O3l1eGhGNZ>~ zirQ3tsc8lC-D+zrRj&)DfEK|>DO2%j`Bpb8sEEZs-2eSxA^Hi&23c@i^4sCmkAYO( z1`%|m?a)mZV;K<6l)-u4^#h)eIrD6KOfcdyZzDoq67-~1#?N907AkMrk znnVDsL;`$J0Cm~`McBg*;zzoYjtLosHYGTcPV#^J_&cAug1j65Jd?lig*Hq)s{ls2 zFM%Wfp&G!`0cW$y&2uK79f&tB!U7p5|BoHHtFoZo71=9p#(2;`=uTj>cGWY0_iydL z5o@aKN&`Nmm$#Suy&)$H=^HvmMIxBfF005!e~}OLPJDEAdI2`{vlYXT^CV*DU%#02 zfk&0GG7A8-@nMYo| zq29+|zhv>~lSIEY;9(mtGj~GS=E0?IMzN};L!!8@##vlZqFlvuS~_s2N5EM@k(o&6 zbvq;FcbnkEuO23biq@E`Cy=yfWN+Uu^3x3kbk*^I0h6(y>PPh%a2F- zzRCx=`N@UABtmA~MK9qczR z8Uw!p{t5dEFUd&!%>YUA0Q}93L+Zhf691o+szfLQ0!<1Tl;H$>PzO`e+xA1P!Pe!@ zP+6R+;oQMiCehx=`L|&-*WzQr?6p;93tVbc@5hUG2ac+pZ_?Mu+q&H3WdYJh6P+!9 z(mM=|HTYkrd>Y2X2s2YL|LqIR74q}hfoB~HcI$>+q_1k#<4+c0t4#-hHNcub{8}TR zy#-?pg0~^8Wcqeu1Ug8R+_+LbMW6{98fXm~Y6%*0f@8ZQPxsbk?cV8FvTG7C_ikSKUEODH`h~9s*_*eoTLLczV^34p??bT$6$+jsbD#>B;RG0bR^g@b zA2zdK_FhISZ6e%P@++^;+eioDw5a5t1bDyoNcovcUUDv~QVneottiZHlphMiIcrV0 z{=|3cb|!ePG!56sE*c~G8r^}6b^4y@L;Bf;Y6<0&h!AwQ+@)DAKbv{ zOK0iSjVU3xniTEa2kStTpnL7@oSCZm+Tu2`i)vUEoT}cry3)7W2=sj-@qKXUa3SBV zN@sn>MPF6d?iV9H2b|Wru{IOuaAdE;FGK#K_@xIyf*{TPSi&L*_d-i`4#a_zl$n}x z#_G&^6}zRHZ7@NYa;$eYov<3W7I7=BNns`X#qHb`E2|J&rTwbL_7zjn&Vk;?2`_iYg5|B(hqiJ3{Z3 zir<7QKvw}ju57^t6PK6nO2ey4EXPd{ZD%Nhp2`lPn z{I|P`f(b4uO%V7?-vt7k-C}>A_ZWwZZfjqRja@&D;$dA`eh{Pb@1>4Msv+VSj5xNJ zUHm>2E*RFwUMCc+Zd8#-uXL(6?@G0J6n(7fS&>capP`z^^4snqj{P=2K?<>cEE}Z{ z9rd6`cnK2S3;(5;WEhKU2icFhb>R${S-KCr4BHFg;oC9jm(RAp!$%Ueco0Q{Rc^mu zgR9tWqGlq?D$;bGEa6^ozjA1+P00-iR;tv`jxg7!qj(h2G~8(!h~UhNE@IBJ8nUKl z>&GVYOrr>U38cgh^kEydU{Mid9y5S1Ct-Bbw_`bZK(|ifdoz%wQ#dm2@5kkP)sU5J zKOy-}>Ew&ZF3*}Tf$mEE<3}OG+~bnpk+*J-^D*h!=Q)D%chVK>=4&GH*59D>>qd4I zrg;7p-YHVM43~EceXczSc4g#y+iF7EY0vd_7hKje@(@0LzG7DHr{{-XG2mj+Df zm5jT;^J=?i`fVO4vmUbba$yp!FT`(1`L|*rrdUlrv|gbf>`S=6h|6yY*2XHtRlRc` z`7=pRJ42;~Ln5uWpi^f>$cph-ktBh~yRKOOXcbYB8OS7tc#(7?y9-^_)j~w zqs_`buYu8oJmjD7NpE_soR8U)!Swa&uH>}EfAf>4dB&!i(&4xt`l5=xNoAC_X&73Y zppNqwLox<2)vC4KdJA7;eo#y=S#KHsfaJ>T7ht+fNO zwb$dfE~@>d{qEtl1vf7@-NY#^U->bFl`(uinvF}0R-y8;6<*NPq3hjYWgF;r*Ogm0 zT}%DB^UO97%kk(}c@t2iXMz2#bUKPZj63~zF*ZAB-pDbyeEH(jF&FEjD~JRs@_1b= zEehK-7YR->A6opJ(qPW9gJ%c-@X#100I!;3K$qZ zK3i`#+r$TNm}Pfj3n78s)=2L0{6CtRSPg%>FF5TK(4phGFdiFrK^dmz9+&XXe{|V_XeaBa_m6?=>Ng77bJ)@ENC){Gb)JrV_5L-z{~-O%V(luZ~+_kFSL(D z947#if3?dH0O-B2$>M}-ZD*bN*azW=sbcVkmU4ziP&2fE4Qkj~2MPiLIh+Ik?RgSp zD>pU*Hx(z!ebz|Sd<^_z3}aFT~P|x zNtEaTWS9KUzkeI#?+Ef}DQF`mqrm{;|1cQ^x?*Oy9zO_1=Cd(Hh#Kg4U32pz3%_s} z+3az%_{lcklboz`r=8((pESLNfMzBe!|p15W z)Zo9G4}ZQc_mRleWFHxga554YPD*H~N|#iw>U5VYb0Q`neAU1YkXpN5ekIp?Z+FEvzrD6jWNY7jDfi+lpOnrF0Y~zO@qG)+ zLo(#td#2Y{G0DcSE8SDogArC0=E+JFqaDR0ylsk0!~L#Md0uG-TE&pUT{(AwW?QGf zib=F&mGf~^zkD-M=>`dDkcI`8T2kqjlJ1VBbEyyS zz27|l%sKP^&b)JG<~tx#IiS0;Q^R)rt4WEFTuNbpM=34dmQ=Br>ak zFPPGtn}Q=?Q+t8A-PgIP*=^Lb?b=t8FO~Lgob(NtA{syOkr5Lw+83po3yc|^r*!%S zOybjq5ZzdZk&3s0UuQ%PDz7mDFHx7zQD1>?Fr#VY4!GiNT4ID;7PmK}PyUQ~axRl+ zFIOLkK8m}NIESa0bVF!Xs2)W0YmY7vt92^tHvz7Ccj6aPkd zS>4?Ff88BZI`xk5Un-lXYj|N>TV+0q=jc&f(M4Oh?p!$warnDf%8UDMOX|~p#k0%p z-HECC=6|c@klUhq3okmmB&J)Jp)I#HIQWac zk3G>ZF0>7*nyXWb2_OSo)qLU*lvmrdpqFfwC@*mh5$4j1{qJ*GR25=SOrrB@Y@@~BB!2WLm} z76pUB}HdVXdjsX}{XgjFjz2_~eF$4UOs61rg3Zz)_DvNzv)ElL2{pc=jdx{`(Tg`Zn!RT{{n+nZq=zZcYTV&o&42WEGl&wCELH% zaKF*o7KCAs<@V&hWbTmq<)6U>)HRH(rX1Y9$)UYdS1Z)%{Yu69P45dMf_({H2sPdT zn&B5>!e_mT1S9$G;sSIdJg7_?(z8E$G~WR#>ol|7OiZp+{$yEfQ8?b z&P+Gp9hODG(mrEB^7F5;d}{II@oWSSWNSYTH}e~yfhMP3pM*L+5W=siw?47{P5Fo zrfU7UU|FfIn2sD>{9^ehL#?Z<36pQ5_P>dJ`@Y%NFQ%c7hI_ax_;Q84EOa>pYZ@ zLBz~4SA;NDBNyZ}Gjru)(<3|gOvO&r>|~H~wF-QfKON>HrzUjRa7d(pY4zTbl}G05 z(6oS!?^{NO<;m-{2$rMDoCRY?fM9o|I=F`y-NERhxDj0SN4;sIhN#NkU5o^Iv1q56 zZB3ibm$DVLxdb#C$5%*&v#f~J!v7H3WlQhmNGCSIy?Vn80?vc|Ou)KP*t>GwiVteQ zxXp9?0EeqUhjVGbY^^sagReAY+mX78@@UcfF&)j`5)$~^xV_v0LDdy%ftG79vtm_& z$s|D>7*H2b=M!6Jg@Ix}x^3a;6=>EnQw%1&LfnTo_I^}+S5+V_giXi2U>f5hw3mX? zs?`qg1Nz-!`1KN?AxFM`X`N)gGef`rFNDY)ohae)tFEf{Lr7rT^uN-7d}7*2Fi`v$ zPv&Xw+E9;i{$u{kPcxD7I8t+sfqubHlSOV`u8rX3+#}OrSIBLSVuu7u=V1!IkJNShzCj_)Y%pQ1f7UX9JSU^QtP()qOq3n?0U>pp>w+UtR zUwqCZ@xwh;0S0v7sRJI=BItJXfc=bU@!jUEg3QO@puS*~_s)D3%E)}D+wnsU?w1Aj zP#Ii8=p*6r@h^F4$o7oouS~0KM0M6;ZCyyv3{5Ce2JU#!8^buVz=8~D8vwK@1@hZ4 z4~D@vInc*xzx(++waZ)(!P_z#w#v{5C|sB0z6X))cMy3Mr38_z0-pFhTZJ>P*I zXrd4z-NLxO@aLEf$@oiDE$dYl%fvh`3IIx58T0GWA$N!TJg@gntD7>5+8Z% zkyOiR4`^U?=)5b+poecT%Lir?Ckjax5rmovBXhZ7{YPC1)$&z~;Cr%Zd_gbk==9A+ zp#y-}0qcG_;owYt`mkgER%M20!u5@UDsg^i!fsFMt|sy3@JuKlI&PmE=TC&t#H^S2 zItSm1YTJeA6_$S2)l`Jno(a}Usb7ztVy9j&Z{tgN-On^9cWK@v?;`}wh6RQ2_yx^} z0zhjf%i_XmIZH+e)GR@%FQ|)2Xig&Tnxv&(!zQr30Fk}IgT))S78^P!970pjLBjqe zOr?22)9}rJu-ZDht>P*lWlJtUEO)#QFf1QxW?37LpDte;l0-xH8OdAct%bB@q@@=T ztX58@D%T~}o{P+bonJF!lZAJI_Utl^T78QWQzFie@e6tc#wH{`z?8g+MlrG$0dK^( zLp*3O)qG%D>3>|EeHXmCND57j)#f3kehKJ5K_*-3zf0V!OHmP78%q2`#;aO4Xm_OH zt-r_dd2#%kxp8_O>fMV7nN>t0T-|THACi#pX!Hgf(j70Y#*9O$5SIkD}in>>7_%QW32${C?#gJuML&0+{ z!Tm7_^Qh}OJMRZA%f2+SXkC|T$zL#LxabRG!Iz~Hpv|Md;sOyzF$xnfhxV^H9wC7{ z7G>Nj`e$Y7qvs-)pj{^Pi+~L=Yo@@ZZ_>{}n&|h%*y;f<`}EPzF_NtHer-!X4|o~I zfm;gD1s0=dqND!ArbE@snKh#>4Jxc8!&yQ%1k`|PxcgAz&_OuTWVopu!dNvq!5lj_ zbt~jDWC0vc;TpgP;J*S?792VZ!<^P{(pIo zx;sk&T9T7u8)%kSSVJ67bO>j6wcKz#pTdYr9KngI#fWmlreMu*fv0_2de#H{pYVep z=0Bcl=L$8vDcwC4&Lwha{^v*E2-biIYSKiYD?Dwpg&+y_e@MPEvu%gt4#=fL|7 zGb4JqW-pBDR&c*F!=zJ*p5*bhcp)Ypc@!)G;+qTYOHsAv$YWxJc+xEwqHLcG7pV2S z9UW`7(L#A>`pETgH_$ig8qNweKEzFf$8Hjy{gr3@_@yARc+50Y7E&j)dghqg(9wdv z#R<3vJLkR^a{lZZx6PT44)RGGpCZm~zyyS4aW`$&_D9U?v}TxfsssvHX)S>3T)U&_g`eu}%*w9->D~A}q6%&OLSL$T=-WSb zQjbU!^Rty-TdY7qtt;qF6q(@Dv+l$1*OuFvoVTiHbcyVsZf(;ZMd zf#BEo`it;=O8cyNOdqLU9OQvvCw>%Hoo@onsBpitty*4tZliVAf_~z{CZ%F`mg&c& zwb;I-sZ-AuTxr0O-x!ERJiRyzg~Ru)vStq0>1m66-A^`q%oE4)XxZg-aMjfdzGo_y zR`~<)KR}NA_H?B6Pr{h`?8*U#!^)$MmAc9ju$#zxMZIF2nRhv5k1ltFohNNAs7Qky zQN3w5sKQ+z`6mnmUjX!aBB1*uEU9Ph6?(PT2qttIO1s9$beEY--2f?*^}M7OkFE9- z3l19VC7}ib%7A6-KQGYrsRO3|h4$pI%F3uyKI&nyjAp%J&hfWy2k}#pYD!WOO#K!K z_zjJw#&$Ts84cnLx)m88{46cC9!MNFI`IcuU9!rF=I^5c{^PA8DuiBaWBr0 zSq&bz0m!0|(3w5kiT1U^Z0ny#OWmG6A+4pC4{%vX#SBB-i7 z{nJOn;k-Bvi$tdRiWIIK5CZv@je7Hbl4^Jg2AnNSL7L3;9z z6s^P(><++{Q}CLSM@{E>kzRvYL%O6q``UB(UC_>qlVTl%#|$He_aru_CcuR|X=a*% znPs@jhMo}M$Ndq?PI}X~wEM21zpZIlBzPpHj~q$1G+3m=L=2J~q09{$bc@5Msz|ob zBD4k#h^Ex>X^WD&?ab@pCAQNon4H!Yd~;=7dkX zXmD z9PtAX1q)5vAFJS_45*t?+XytgiN$35DodX39w`&AaO&Mx;ETEq;8L}%)BbL+<;R|4 zE@9g-MO@ArXoAv|6sIy8XcD1N05d7w=r5FuRzyYzGb9*XQ%gmT2fa?vXW%QzJFJ`! z;lzET8xN&nO4e=rn3IMy;PbN|hAieEmokiNiqg7at(9{XTX%^uA%{5Fb`VC`w1gHd29y>zJ*hA#cn-CZh` zjY^c+u7+pzol4CFg)=Qi+?BGWagiwaGX4_YzUgsZ7BsdBDKa zi|ke8>1uFUf_y-?zgTZYK(}79 z1Iv-Df;j|;4`RNlSX)trU)QeI{CxtzQhVnXB@?;5cR?#UJ>t%<8A-_}%6!fi58j8| zUcs7{9XzF!bEuPCruIUwv%av42lyaCl+QGIO#uCbv5Gf7NEC)Uhz15x=F#rs2nh4p zgGw089AqlrU99M~|Cm8lO{m9HZ#^A^7xG0TnxY|-%15x{sL1j-n0t#rn@5eHYLE8}p(9u=>U45MC_^{LMtV}`` zec9M_^gSs-G7pHRO)vwo?t|j%(chrSkN4Mgk;a z0wE7k3R)tkrqRCf*292Gc8Io3-eAt}ffDGVtJz-RZ=Dk3%Ru{}PrBkduV~tH$zz&f zIp}l5uuTAX5dBZe(h(U8@(upMRKp-uU7Q#^`Op9l`1Lg^i(Y#wy zNr?*;&7cKGoGEwKMe&#g@R*tV+O>EGN}hV5PHz^`IX4Bdw|H0N<%UQdy{ThJ!Hddb28{)@9CT%oLKH7en%I7^02F1P7u56kvPn3BnIxlC;u%Eyuv ziUtJz2i}wD2-GWPz~R$y zLcMB=A`yt_xO|TwpUGP3@%jxZodFA&_L>JcQU)$|gd&cwdq0CSopcTV`?`pJ2^IDv ziS&zQ4Z?HaUq~S_s9&FG<#b{HPA$pOJ(J-+fy3TUmKO7;vGiMbte554CS^T&DCu&s z;gVk`KJ=~D(s!z<9K4z+$FkW4LAyh^2uPeSgkrSIh8&l~u#NaAK>J|ivO=}Otc9c~ z=kGSP=~}z89Zdm~P8nCk-7ZgwqOr}ImXa=aop;*CN7ODMN9NvQV#f+4Jgu28=33@s zU(Ck_JMvD#+(+{UszM^5JMU2=QU2qn1eSq&hrv0s`PkFxm|H5nSNlRlbx)n6^&ev} zR$w$fMRXf3KUHiWJ{T4`yR(cE{a;U6uzLsi{CUp5tkDFXc=Ooshx6OpuX5clxU%~! zwch@5z>ddL_6em}WaY&ir6AOQD;zr&6%m?(tIq$-B06rm=JQulz-0Ug&SW*lqzig6 zBVGJqNq!{O%rX!Hcp@8(wnxFe=k@&GmP)cCHi$cf-~gk8_8%?AK(S|0VrHm^VG18G z7RI_;){g?k0-of_qj-wD9?q;5GEN(g*K6HTX4nWywAhR@j{f&UrjyyRV~9ap>;@d&`U1U4JbZ8gq}$i_zZ@)NfoBf7TFh!u){$xn^$5*Y2I zZns=&W?D`q^;z*6#S%2(RRZM#3EwrX zI{ulTm;ai*v49nod^;1`2yx_I9+2Oexn3#M2(BHPOt2AHJYSyQeA^0wdalPcF z==R`eGo2!iul7XPr6wBgDy^9rg_*15{o0Kw*}PM?VKltuekJj)L(n*d zYsNOU-c}YD7a!8;AFQQ#Yp1S!$x*_M_(~%*F8JuH&Z4%TJz$u3-8@3`0>B6Tf`THJRsqP zKRROxQ$+dqGLeYCT9&MF3Aw6V5tT^KroSYkiEHfYopz`Ff9(nS+k-E)arsu)?HaL! zs#8e1>9HOj5d?vU^j0Ne2Fr`DISQf-kG@Qk<^)j0RuGFm9|rh8A`a zV1==)v|vPT)igDhU%rC2KvaDm{7cAS;!dP=a`e0kQnfdC>|JD#RFN1Roy2KMr}QGP z1-IO^nU7r);-PEkT|5srgxJ}!*E|}m>D&%gC&b{P$72K20hjrrsGI`20YwShseP6Q z*TGU1%D!cS`Tip*?KVDCP)SE@;z_uIM&Ig$_NL;1%J_Y1%FVL)0jbcAMYPN}=1!`G z#<1Sl6JD=HDO(lm=+hdy@8hu{M^X+dH;QvaDGwGu)D6*<&#kDWz}_=gyPJ^3l2r^g zkEkEs{xIr`2v>Y5S?dSoDW0zhR^rj)Rzoc!>9{sr4T^?R)Q7Yg{u8-?+0g|BRnuiy7k z(cDy1Re^dnNI`%cX#goIHtA#T&qYUbuM#G1*^yrunIgfyxdisO98n?(G?TOx-}*uX zZ{`JRTX1d0(l(!StK?GqlgJpT_TM^}xW+`fH*}V0udd~3A=%)u*W5L0QUh0H1m7$k zlTflgr7=d`f3#-(-446Zp^qbL!H**YapJ>a0U{VE=71+pD?=jYBS5zp@Q4eO)d-K0 zv*O6a?PqQT01_x|8UPQl#Q>n9n8!T;D9q=86EUVjII3coi!dvG`#B=Vy|1XV0Nj7L5l?+R z^>GgiE{u#M!ti77EmjKFJZC;`AF|i|9DQ*tVD^yAJEi*Vdlv z>#uXumglR1x37ANdv+yW=DqCk*BPVoTQiO25`nj@F`S;U1==Zol0>QRJ3LFuX_vra zuz&sBL$;Bpxyk(L?_4Su7l&-Gwj_H(SW2AkstcH*TPr@OUQYbGxe;2WkG@-HWyCRw z%e7uhKYr>vWn1)fuuvf>atDiEp^%<@6_3WDQGiIiX&ie2ZZ8Kcl7;&)3E5uPG<|se zW8EfgfPe4-!PgjMmlE?19v8mmDkkBH5w(TvAX=O zBhakEL~Ha5{fk7<*n(+Dij;C__5;c3LpG*q!xg?NzMc@r$4uPdt)yOo!OC?i=QHjI zO^JBBkTu&MXx+oH`^VgVM&6vPvmd4|TJ-t))P$E{zEa6{?6C&RTM~#7O%hq3RDny( YxFHOEMQhTs^+%x4Xb4{xE(+lP0Jbc4oB#j- diff --git a/tests/unitizer/overflow.unitizer/data.rds b/tests/unitizer/overflow.unitizer/data.rds index a51898f9402f92e2544ba42358e84a2ba7564b3c..666b8714fa3a29ad1a1973a04c346fb6ce1b8099 100644 GIT binary patch literal 8174 zcmZ9QRZtvC6Q~z=4ek=$LJ02e4vV|HyDvck2Pe3@yIXLA6Wnca54Jd$bN*ZR;m-Fk z)!j8#J<`+U(TITm3e35|vm5b9fKiX+SfcfK;yF+3k}*{Xk`Js1sK8nlk43dbS7%YC z0W9H}MqT$Ai}Yf<6GoOsso--!(dBKz?W1_<SD!CVJ3DhP51+d`yD?}n#X_yU<9jeDzkRvIz7EWT`u5N8@!>L8eRiV7 zY;BU-wKY(0rU51Dii6cfU)oK7qnZy9En95UNWyZSVr|gb+lN}f<00pQXZM+<+x+TM z+-o$}dQ{OAGZp7?Ize--#lH9|0n|69y4sPCuTL8#q&cWub&CEcvVGE4va9~(MmWI7 zJ*CHRXRGt>#Fe;!@0qrq#mLHxbAOqE>uptPlp@0fH`%1^8>GiVV|*~Zv(@*#uhUEG zVq&Gpl-G^8<2d=CtDHlNQ{M5%;uotwKCLF&{Hp=Qu_g`G#J6t!@|C$p;Ip=al(Fw` zJfz4ANU@qslG&4`dks@ASc+bpp+`+lDLNErU$eAZt41s~0-2S6g>+tSv~%RKhAIx| zT9NV=GP=}hXngZ>2+H`xPyv!-W#G?{#Rt}{OWV??Jr*{TWqA~4EZJ^W@HN}vR+@KI zRUT=~cFmB8k=YURUkBf)5|$W{C36PK9bL2-*}3m(-1eIVB+(bTQ_XwX<14}WE{*wy zwQ4x*qvWfIa-&K!B}O^&7qDu+a3Jkwa7XMird_$s++`L`AdwGp6CgfksL?1rVGkl~ zYbUrEeOGMohH_-I-_<}%wG~#fEYoJZaYTKJDDPI0%85$jGa%~X2-SZ~qHkiM#=)$R zxUDfxm*i7QVVIaK)AV4{)Iw!t88-~zbIS>8LmoqJ^+a{}D!AhsEGQCu=DyUVN zn{jBzGBJox&GDDAa+bs2U#&-F4b1Gn>Euxs25&Z)&`(R;9fuRruVp4P!E+8>FN@Q8 zirL;|qwD$4VB__2c{@DePeN>Ur$ptsQ38k9NFTmqBx!6P6j&G2N7mnsg^uR-QI`b* zHt86Zb0(lwHiNY~4@>kydf?aN4tfS_ZEho45;fzS&l1TzET|l}Kk6;$wE19Yigik3 zW;?3alxK6`GJ#70-xIP0_jK?U5-o6+#-3`9p1tgvbt`pHfb9wj+%|k|EfyvdL0@TRcvP0l!p)vJ@X?JwrJ04f;k=bZ};m8a}P@I~Vf=s$(+8l(s^TN2Do zfHB)DyO*MW7PwasFKS<>BRQ>n$GJN4YBdyF_lSzT6(@%Xih%_uOqg_*wps3xj`%H2 zQ5TM~g>D6o!&)zWiY02(7uxsW#P)Jrc`lQhg-^+-Xmk!6%(@oCJ?5~48l^uYER%H- z(lSoJ1t?ixvBXJk9b|9jc1RPPC90Td+3gT@&;|sFKLr{*^{Cm^9ZX5-J@r=FDxX&| zT@(6bH0uLrx>49quS^wm2haZQ$Sn~OciQ}keO+l~i()Kxak8K4Df*^Q??5|jIV}e+ zDyJ#snE^93E_29@kB_6fES2f?CC;q~w5qv%vkFg5RPXe2uti=gQW!fO9KJ}o*v)1! zoh>@YUdd+)FTsj1Hl*KVAcQxhyXC1y?o3JBXu@HPWa(_u`RW1VHgBal#U#_If+Eeu z7Au=#QvYX9AX2nV^vVK#kT2t-x%%ey zU5fJK8;`-LeP`cD`N5|kqGN#Wxz1IG= z@XzK#Gy`vwy2JzxD!-9-`lIJa6Xmzqc*3abBQ>suJE10;Ww!t$eAD_PPwDOG%^~dv z;Iz|8)-PI%;du*%J@1va4Aju{sY&C@bT82LsEijZBAfLqnlXUiBe3zxBpxC{)8c+w zmM)BgjT(skl?dPjfVt=pz#L$1a1%fBDOdpj+@>61&*?Mn_Z)O0`)4XW;9;toVgsc~ z-hQ?gSD*Ua%L(2JpQgu0q|B}t{TZe-=T5+U6#2GD9<|q_D%fs!8D>cmU7r`O-O^c- z&32G^w~{Ap-!ASucmhy;sPvcz6MT}6nf2mqo=b7+1E=$f`MH;=w~~1Y&0EmtY{z4m zCEi_N&`^z%3*Kyt&z3+>n*mQpBKUs&`IZf)0V#WHmyhBRnIdo>W6?nWYqWW4w&kBI z@n@t(?z=?U#!kIuhSP9Sw}(g4Y9r@oVlqc^kXSF?w`1aE(9{~pDz=*?EjA8Ig~K>0 zW|p-Py77!H*8W$(JZc|dZRIQe@YQI`tT$kpHUK90T%b3H?;gNUmvC-L!B0Hi>O{_& z#)9H|4C$#WUi0mvX>qGcZzVR_j3^-bf~EKbPg`(9yL(&DjioMo5K?VESA0N8ZiW}Q zUzQH{p0X8au_XtF(#^a@%WJ-w@jX};x9h-qM&I^ekM(>%Q613A+YssO;|g{_jNeIH z3x6!}q%4)N-UV%Y#b%@%i(b7I?|ihk{kMI~jmq;`eZ*>UiuHp(>H6^YIlnZTQ%DL7 z3xb{tiNlb{Kh@r>eLxBv(Tun5*NetC*Kr2Omd}>jopVrYw6m}PeY4*`t$N2(gvBVM zyoW!tlH{iRn@roDF)xHr*ARF%t#|_O%fJszBn!LF0&Gx^ot(WKA2bT}bKrzpP19|! zID-ko`?F1v{y2ZhP7+!}{u!$5W6BiY?&R%XFSMGV?${$l@s$gl{~F?O5&-HU2juWj z>i}26GPMg>T&RvH1`9FUxY3gMGeJMb7++eX0~fXY-DIib#Kal)hPUJsY-y zPQPUOJrRpAjoufYXWDDtyH}%niI4q7N8r)VXr3RhM+&sE zfh#36izd~R_w^c~nX6%Y#0e{DxdY;z4Km?9yXn(|#N7>{wv41=sIvD~efU77>t^C< zsBo&v$gZpTdB3e}1SU)m0xOS4DyOWtFYl;YrFwtHL2GTgj>7}a5@5A)VCvXlv>bV^Ep&sOxCY+hVGkwu^*_jnXK zfvkPgQJsx|5=F-kWZsl7wNp7PymX%@PpWrJH5 z0M{jBMh(2C{wKGImtn$z~Zx)OWrIJi!=ub#}=At)ch#j}YA6 zEMg+;#0z}h;rEL5;Mvp589E9N?|u9Cz~xS@0IvQiYogWfZVM z8W{M5!^mN+%hV^RjbnO=DC?w*V@${Sw3FZ5kQNcca?hL4W>p|k$wEO4QzHH|$J)X&y_1_ z)bt(lM5~zp%MHKQXOV(udR6mlkT3EUV>>!+r*9u;4j>u<4Agp_j;$QhtNJ`O*75gVLA)Py_CW$6+@;$&OPfV>=vn?=Uw!A_ut)mokPP7 zskWKruH(#Z<(xCNo1t9Jh^}vuLePV0fwxCn&M369+pYQiVhXAjr3VRg`$!(hpyX}s z0q5*#rx2^W`Vi~5)+^@5wXagMP`GS_)lzL@wG?gDqu?2!(aDKP-ey%tOk(Crm>Sy( z_*qAb9Moh>h0Ldk3Md3ZQBq8fa3i)Wd1x-H49lR#{^gE%ET14bQ&xmG(*CnFmoC(j zTc2qnS^T;X>K%ezGPhg{{G?%n0Pz{+p4%CgA(>gTd&qOea>Pp!*a}7 zB7L&3Zrl8Uy~-AYU-{JJ=njZjGtRHnRV z-lu;BWaD8&nL=>71EDl*i+zY`sc;{+V)u3da~na|0*YZG92?w+RlZ?c?ZfJ=gV#m! zp|<46&x33zm*K;D!km@hFHbVp5zixea_r-I&Z>s1Z|6{3@wu=I4Ew~+|H$&gXpK~? zQ!=uM?vb1lfIUHijAr$5xm31{c3s3Ip0|b6S#3QXl1!#{yL~&q2vE)mgz9ubJr9TX zgz@NiG+$;h<#NelJ0(Ns@#Cf%qBfQ=&4%>h8CEoIO;B!CzI=v0B+Y{4xNo{C!W~-l zNg2L@n3sQ#PmhCQx?Uu8>Jn0eWby3x_Sr`NRg5Dbpx~}E_B9+ z*fIoSScPkubjH$p#^U(IMu-EvmLy12ugaipW@i-_Wzlol%bqXqWbA-7YNQr=czX^c zV-zx{hLJVoVK(^RN(go_EKDbRmKPJ-_7{ne9*He{{3AV9FpZTfikJECtuj(XODw)g zUEkN!9`h?5*@qXb)-M5|JO;c-bKO0s_LqWNsh{bWl6)uxe$4D#Cz(WcNCH8b~g z@maS}YTaK%2fv`d^TZLJc4dBJwk24_$tn~yr4SVwO(SjZj%lxtS!NP*)}dQAQ=MU? z_ai7!Tmf-&f24VF!K|!-UAJ5hXy&fU`$UUHIRx#{(V!77mZ@4pV6KV$O{yRk9HMiF zQpRnl8zu&eA}6(2X|3n9?3Qd?9khLy8DlMPN6H39`e%*bfmW>lOWDz9>jQ0ZT$oyb z;h!HOI;JMymul17X+D?|KRq#4#|U923U&&IEPPhS0#x;!V2z@!*nJhR-NmLaJN{%@ev z1+h$kRUG+L0YTPeXlfXd$Xn0Ot10$I<}`Z__hphGA(&}$opF9JjCT@t^8v%?`pM&0 z=F+7>oc}$FgwoRz(e|THS6b=2vjvohWt_+Xl?d$#p80`UElb%g42z?c1}f99EtSAp ziM9E~GJM``Fna$XDf0ma%|WxYg{VShdgis3qM7bkl!G*8<&X~yCG$3t!8Vc$WtBXV z091Zt$k7(&;+s?Xqjwy9<~~LRX#wdjBk3*+8H9M)aM?(*k50aGDhOpI@35a%zGcO& zkd|ge6vyzVm)H}I>omgvy?tLW1FF;z?b>dq_&;bi+qB(s4{d(W-(b`FTF0ks3E73Y z9qgpv91=+}G*24M7mBjfqMrCCsA8@3_I)G7N+iy;O6$ZCJqCx6Nj>Y>gAH>Q9a{9FwQ@6FaY}%3qJKV2dyCPy$ zz?0FeUr#TbY=;=TwkRYpO^p*@TBux> zPu14+{`R?;YS#RceRlq*x_BA8D3H$Pas+L7W#u^nvZWU`cOSei2wEQxUXO@g?=QEw znjo=4>I!>bLu{`MUjH9Q9wHYt)DSz8_I+{L-`!}x|3^#>z?B@&_3&qz3>U5CEb{p# z)D`@HNNGWJb;4}&66V8PIfnUMLVLsfN1+{H(*VwiX4`;VwH)D+4zT$@&d?{G96%{e+xd$D+_$vo1&ri z+GDm9bPa<3Z)yP(@qQHW?OO^Wj^C-G8@O9_3GJZRKEme(IU&cSpH2{AfqpU)(IG*& z#)17rb%4KM;zN|_)gAm7u;&*rF#b)mv_t8x4(J@?eljre@=_S~c#RyGguNs1n(F+< z&k3g|`M#One*qjdAwG6k1`>AOfASP=wc7zH9>ETBxm1KlkMk`+Snr$ILrGCGzvI-K zq-PG6&>YeFeFFPBM&WL6G&;2F+1Cm!`ORCvPOh+$UC%Mm^-sw((xw%&-Sqr5D_yBS zou>YapL$=eHX!>eJj)2qZlW;b-E<449*b{`OLF(J93h-^seYamo9p%3A)63u`4w$jme3^e(^F&c5!bPk0+)&24Gf;ltXZ251g25z3N!C?}r`25z6{nO{$yazYZn90Jg=xG{ zZA|;OXwKe}XN+RPg~fBb+UmA>N8ma=)uy-K{#z)pSnYe{VFf!#&x z=j!O@D|$xm>@HIWnV~wm8wvI21t>@f8jk@tvV{zS`+jw$Z9*3 zWz=x}8lR-7@RVW*F3*vcM~42zzLbdy?T!*IFGHY)k+_rX-3!*~ycjY0=+94v3tHS; z#x*^;5%iVynRGLaDqnuxKb&V@6d1Ml8>UpI#c!KRZ04f2LW!_(BTeCO#c@zXI_y-d z7GUGU=~Ies?D&g0F?fl*bJ2elvO94B?Kf9WqQByf46~$LPC@QED6@= zLxRVZYuw^G&|-h7n5J`P+FPUAl2pqucAR4xPE_#tm?bYdYy|E~!RMr>H8^SxNoCby_9HV>X0B9Gq*Jm|ms4KE zj4G-KKi4-Q!(Bm%X@nY;04pQwPxBF%imRPMT-F)R}XOyv!UQMn$k_C&zn62yi%Dyv0S#dE?VnN$#iLI%_ z`XCj0^#6XQTh<%e$Uu0d2m|-LwNu*Z9yYdga*tZtdt-WoU#e2HJl@nOeABc7DMAJh!EabvQrpwE62xgdK;=SGut zW0Ub#lK8N&&xH+AINRUQW}|7;anE&Q7Gyza}&>Nc)N&iQ-Y|1@TiV`Tk_#m>&Dm27d}{&6uirI(LQ=bd&5 zvnBlJj)oJ?6q-Toj2?aCDOli_#T`RHsMc*U{l#~2By*MioavSV#0j!#4*WVG3WMFL(?)1yX1LF#iIrz^<2?*7~tvfYcv$u zy1f;#$iZ<}5(#rqJ3Amv%b2J^gHvLmCRK~S%%0cVL9c3{Wu2qbYclg@IF`=5`|*Wy z#eUznAP<(ZRegq&d`iW+7xU?8p@{yGm24B;(pvns6vLenRtjMd&!FC@c@Fq%)jKsk z&(f}e*8@W;N{ukH!xi>&!ux?V__MS;x><14tqACr2TnZ3UmuN?W*=I(S~BQe;qmo` zi0w%ISe=io9bQx^H2uEfruwc6!0pK2}r;huU^>My|s z_K*TIfgdL$z$P%N(};0D(=6f}5t5OgjTVj-1D|zCkg56C4XCy@$L?!L zOKg^LR^2CKywrPS4~FXFLvV$++rvr*$S%S3J4UlN zIbtMSEleaFW;9rdxfd(~K>e3(m*5d7G6ZjukP#QIQLr)o`G-6R4OI^?{ZIp7ppE0- z00g~YKwKf%7Ag+>+lM4cmJFdYkXQhn0`H?sOq44g00AZ-^cJBP@NY+?S}5Km(oAfgy?O{VUTIEwFy`G4MM#CJO8zbR?WD()jI0F`w%z9g&eh>1?OR;c4IV!t>OTq%g#tFm_B>3r4F7%w zjI;u4>3^MuzwRG}Mw6}j?zoJu`)})VtsLTPzZz#sEiOc0mC zFWHvvQaOfV4TF}^CN*Tgy<&0D>~!SqSC`hR0vR%+@{SafW6^$I%2p+}xnVYIUKWnY zg|9Av=3Bemq*5^y7&Kd`=3reBmOMO_anbqNczvX4gD+X-?GTniUD~Pj?uBQJ&F)TD zt0`LC@${Qyt`UOv!Yg(uC7+oPtjSv=YWKI1+nX)S4uhx?wqJ6M`bAqovx4Z{z~$LV zMLrz%xX0xg`OQg8!%P|~@&qE&Z(KN{CN~3o_G&FH)HFjsV(^*QYJlq{phDdHtFM}J ob_bX1+kzXp1xhNZDono0x{CfhWg1d;9xuX@Iuk?v)X;$c18P6mng9R* literal 8155 zcmXAtWl$YJ4uElYdAPe<@!}4}3dP|;ad&;VTXA=X;!xb(wZ-+}?(WCk?e}A6lT2oI zCz&Km7KH%uzXg4!|Khy1;M(OKl|(a}LbK{f3}nN$OjiM)Cn(4@P$#BX=vFa4KtTb5=|xd`l>b@p`j4C))Y z%bh!seI-6e=6i5;ICt-IhS2k755XXmF6_VcfyvL0$BTP_M$K3KA}2!U7Jin*oamdK z!V_!WD8J=B2n@gct5 zMDunRz!9zx`FgOFVXNEp{Yk6W@Q&VReGZuwTPl}vn~ujN%wk~8sETq!`~4}m>#KkQ zknQSX-OIHVT{YV$aVe$dPeexj1&-kRirFZ6h7oR3zk}zy&v;o%1lPy;O6SYPntmsj zePZO^g#B@eTE@dS^De8DH7r}BjRLo3ORe@5q2gbv^@T*Y&SR?@_P|7&?){Xp;CDU} zq1;_6#u>0iJQ@I5=1(8?A-HdOMMXN&CZdko!5x?|+8lwLD9r zQ)TkRthl(7>wIbZ1Arf5G^t4>lSSA}Y3Ih=I9l%>Rq$1IdN*E^pdY1fRX>xzrJ~|y z%hw}MQtZYzwK9=8_^L}E#f6#v%EfAHKWirB&WbefD6+@gqt8aTBM}T%3)pC^H`o&T z0eaG-SD!)cpN%XkSdFnz!2S3Z*s{N@q6e+!izwiin8d#q^hgVB1XL>@kOKxjLJ%`? z9!0TJMd5*~`)HNs9e&^Mm`Fz!bCM0!c4s~#?~SNos+Fn*p_`>K`4NV6nMOm2Cx_4C zGXIf{O*BPv=d$~;E_n76q&6Da517(!_u*B{__UOh zFFr~Zmx{4coBKh+X>G&!rwJ;?i`HAjZ4^F!iqBn?KENvOG-vZdq0S5L#v0#+A5Td( zW-N=iC>P=rUpw&sux#HyZpkka3bt#{#s)n#vqjPt?`$k(`^X)&sI2HkPj^w1&?#2& z4+f9|+k=Hl8#+2pG2F95ac!IhG7k)_a#AKPb)1eJ>?bA+Gn3>zgO--o7xX*Y6~DDv zmP)!EMdz=EOR(4AT%@2t+herl@`UIsXz^BsApIO?wN3uE-a~w+orsL|mvy0!DiiO8 zdb)OQgb#fOgLTmh<-MlNsbi}GaW&N#rFK2;-6|Wh;?k!I085ISYClkR-H5+q5RqP0 z!C@*0Ra~4H^!BqYB zSaJ?%`WI2=O2aJ})(Jz_6%(}l(L}~8A5Qxn7iHOthGGHW(h2&<=^lROHJ5A^;iP_Q zJ$g_L+bMrkxwDevKiE=fl&&lM#9e2Hzrf0{z3NU@+1>dZu)qd>&4HgfBM~$NCDuVU zxeG^=DfGAnQ)4k+E;FL`hnK-QyRKd*VnR)EfIB0XUo4U zdfqMV?l3y)A!&U>N+mv>A9Jhw2D{qACIW>Ro@XTLE&5tr_jG04w_hi|jT}8xx?iij zuf}=0S>6~xfgF;BLD9jtP7!kZ7O2LT5G4z1?~UK1ml=)fPVx1VbUxf&p2A5P;Kbb;1Kr1lF`l=z0kx~2sXve4@BIzpX&?{!+B0XR-3_r+ z{HsC;d50SxDWWLg3yLV5=t3icCnw|xj3F$h6Y&lK8cGwDG=AC<2Fn%=AJ^Bh^^BE&OZls3FBq0Cn)DbtXxK5n$W_xtPi4@q* zTN&Wef^8=lYn6XtV)YmM1(hmlOE|4DG~TQyp5yaUdw4qaLyuYg(f5bASV!bf=XSl_ zd(FV*=D(i)3!iotXI~p~`0r;j>;}4^J@g|B*Sm!*9+4V|EO;N`E>R&liJdoVXXe;; zlTZ}(uwJL~S`kxVE`Ws9E~I%)qe~E0g6NIPWT3LnzgVcivY=J`4j<6WCBTAz#_1Aw zcvcf-@G;UH%Ntf4v*>7Z^+NbWxjD<}Bm05n2l|71JHkU@|MzY>kM88HEY;9BJtr{+ z{;H%28KTg3^sV#$T0j3CpU@q1c#>h@b{QaS#}jd6^EYJ9f4RfV$;bZ+;(?r&COTlp zImb~5mxgv$dbVLwM-$m1(hZn^?%%n&TaL+cy1Ao>uOmP|L$^`8z`osq2p@pFGHJCV z-{2SRc*E>_B)$j?^WuomgY$z<9?|c-pu;leiZ9vwS*r8fW16_%GAK>=oM--9TEWEB zy`s}S3wi^(ke>h3=j)|ic<1>FL+)cnl8b-99kadgbsWMTlbHbkq^u_6b;!AhCS9fE zT&ra2!}`kwf5+1C3g-)&c5X4j{M1B&p^%A?iS5#zxe7fdSGyO`sux<1gvgccOwrzz zMA6==rn?W;wkEx?8`QLQt`}DVu>DR;2XDL=Bqvb>HnD7E`9J-&c?pQ}JPy+DZi1#N zftDQ&FY_&x!#*Lp4=@S|zeRv7!>zG=5-xFeMZ=@el<{wXo=7DB$aG(aBBl-2Gh* z41nB$8$wM(Punyb`V}j<6Vu7M&!mXYn~1K1zoM3*cRwxSb?Ez6uy{;!!Mt1ijW+6S zlStPE`7OKMqyGl3*K?sd;prdOWDe6R;D>du{PcX>vV!!IaK*e%M5c z=f%M2acl3Ts4{xEFXy>(Yk1P)e<>{RX8=CAnaO*!ZdOcPDzO7|ubL^OtNiN~_$B4WK(StCXIF0>`TJ=*0lRjA zZL2c$AJ3Bq0oxYIn2o}oX;!XjY|QZg3To!>nQ{6%)E{)DBkme_VBycGT9Y|gmmyq( z7l5MlqgViW`^H4|QG{THD7$S%8{}yn6Q8vE313~jAB9HtEPf#% zdd!%5$A$>%I?==J?PIB+aT1ND|I+BW(_>FB zf~Rgqmv{@}o)V-11(~sF=a8IY?mC}x7?2q^x&*$I^ZCCvJRC6@J(`sSZNA=@sGRsY zZHCgWe9%~x_Dv0Uzyv>voz1yPSzNSeNS%_Oeyge#TP%r>7o#X1X^zGyone#+&+w|! zcp;k;sdhXBa_ln@DgQ`2LI92R=4-J37*+>>{FX1*LG9^R&E0ig2k(SKWXEd!+U$58 z^T@0tfylkOo!|oI7W~iSzG<)br3PCIH8Lx#{as+HY47-dwYb`s@}FXlAAkOj>BZB!etLemahUyk2NuJae*{nTs&=vK({8TNIE6kO@))wsoK3T%_3c_3n5p*16AbnouSHY{4 zcCBhsZYnI1)RRKJVIloTJ@XR%ST`jyPvmvY*r$azX~)ZdH*zDQ!EFC*WpX0~ zsVqDD9Dfq~091V&Zwi!yG)L<2TM@*-KRsgoLa>c=oiBZ_paBRQcq|)LlqEB)9suRW zZg?|pG!Q8+o3zrEr^+HOvz06CPwV)`3+{7QgY2dgfxR*PwlAt3)>WhoQz zfhK5LzV9HmHS7o*MzKzk`X8m=_+DxeZ~25b+h6UTCF6)X?H;W;#OOD182kn?y#u`Tx255XqL!`xhZU!ixz-)mJ#RsAD**QY4LBTkbLGHV=$(haT z#1^bbKbsb?eHf_vet>S953&Vt8Er}Iwwm>5iJ0F~^hd^`x?R79S|tj^at<*vsg;WS z{OxDSks_brcGdeM45dnJ&Xm@ItV6`(Ln&B0Du~pKTRY8|;a=MkAgMihhF4Gtc!uEv zy*dB}!8W7?%ySUe=qML>iH95FF;^Y1xu>J}Kudr|J}t$BCGF*k-(vz3(>uH7hJJ+i zJ+h?I%OW8-1hylo^uxgvQq(L~bRSEU_84HYZ(PGtb_>m7XRd+5)D2Ygt&x72Td2h2 z-`zp$J0M}+i={qjR5le;s?bim=2djo8H=`+!K#pI55NIj2D4rUx8N-7!Q}X>42a#@ zBb|Vo=e#?V!zRbzCu3$H9g|}&NtK^TjM+}73&4rCx?9~LHtbGYi8Pz1A*%lpPT7%a z#sn#RkXp@?`suxU;~0pg2WbVj+a;nEk9rhAzb<#qtNvja!U0(YD@=oZ?3Nd*UstKy zNz3Q(M$0pu-!;{#s|`4bZPGp5 zb=~XQ6^R)eZGWI1>mn>nQm$WV8JUz(2nI@;&ag|agB49%?IhPVu{w@|PjLkR{>m_+ zo7e%5k`-WY^6qy}9c10ImYDS#O`CCj_lTVmno_$(oLcFZKJClh&h1fbpT7>`jNsLa zbOBSprN&xClmfxy`3>fJkRDzstcIkIQv{$iKkEp{WKbI^*VZy@Kb5JeZ?drt zoQvf4s1b`KALvSxR$VQ4Q}b<@PF=*@bEgu~jd*;KZ?>y?z*)KpwmkF^9$G4jK&kNX zMB1q)ef}+c%s8Xcsvs&Ms4P(fg(}2Im{HjH^_!_y*mK;+92(O3e|LDVC zo}+mjm;|HswBzn~!h=!&$1ghw4|m*-0;6jGH8$g^G6T|vLh__k*FPgV_RTV z8vLdRVgG*`puj#2=1haO=!$WI4CdD?qIUnM2?K%r;S$?#C!WHcm7tZ%oZ}?^1^a)x zbK}r8367CBFDNG}ob;s5*10iBWREBLonnn^$EZs0_Kz*h1uz2PIYSx=OKKJr_(XJ@ zDYADlvnP7BH(FQhIW_wjZoc2Em;1B0av>w5Hz@(ZC;OJg)a01qO=&wkG4a7F7G#e3SA zlU4$a0Qc#pJ-a`R&w3ZCshD5PD?CP&YqF5wsk9Xhi`lO0u6#ki4Oy#cbDHVx5x|%u*HJ2dfmk zG$Y|hr6y-$8_n_W?>C_a^%6pjYZ<*u(CZDlb^gQyZu-UWh-|r+9tGi9MuY9zs)h!& za{F^=;Ze-Kx(j;yN09;1bLbEn?)p=yH=Q7l>+k`CqxWm5U*)-orS6YEWsc6_7FLgm z%0F*;z2Dl(`D!BZK>+D}5=Mh*pabfgDN9MuAzZdRJ=DY2943kB{;1XSjZQ`5HRsvk z;8KnmhMMXq-+IFyE~Z;CATv+;h|Irx8{VIwi{01kq;DhL@JyH3*UMY}cD-aWk+~zdnbrUnIF|}U#r{toUN4WY-g^fM~iJ*zKnz@*2-Eozf)In`O`)|YKaRCV- z^x1zIC6kOC=vK0NrG6a%F5;+5 zBNbdT|8?K_8IaGMfGt7)8Bs%Ow8|bqPG;d!=9B~j1?=$<2~hyq zaN>C=XhMGRKB$RmWHWd~WLYf;JmG4+$GCsWcFwtNsP`h8KSQ`PZLCnNh^u57+s-fy zCNuaNjgxq6)_qUS(afZz1Jq7O)ZliL#0_M-MwC#iPNv%Ytmw0!XD0UnfE$HWHF1-{ zMzYPh^*%QN)q8m>x2ah0Q5IUyfh`$FkP^3?^R8J4wFV($ZOKw{q(RW{Wew{I_=JPE zo`55&WX$ZY46L>ikh^W?wyG7-3C%oJax2Q)U1?b)jLA!t$}ApDAOzZHvK2NVhRsQv zg>r0S=)pfy4VwOp#SFu*8iY%#{sGD6?xLR8W4#+Fc0y39 zlP|l?_bP{?6t$=0OPb5pm2`VMWpiHt;5+Ci`$Rlt+A*1)^)TSneTzq!+BFavR=Xj2 zgPhKDS3XK%fb^?r55A$9HK*HoMq+jaJJowRh&hTab#1eLLQ$Sa3oys#*<*UNjnv%1 z{5_~|GIh_Q(_zAPtG?;pUH7N{Xo`K5C1T}G!h^k&kCC+&L55eMi#@(m@T_>czTm+i zi?#A0QAYurlp`JScf&d))PK9!px*aH@9rXvAG-}0`Bwy2)k9Hmn;N-{%OuYO8cz!ZG&Gx=5r|Gc=I`>XoVm-|rN1cN@PZs~_;G~S z&Qbnx`3Ma+-ePUKLq72jU~VYT-k_u!?T|_IF5_K@cPO{&IsE!*nYh?<|H`{bM8|L7 zgnyt`6oyqq$M9;oyJO{G#DMoG#q#`;I(7k^_mIk!&;9#8@&obu-txC2b#k#&x0&lZ z-lkg&)2&Igco@y#&uYDahuA9tkJawqa<0Fw+j5e3WnQO&b45<-ZUc#aSYr&!DI#@%wy_AH-UY=GpiPY%5qDxiZ>Tuuq`d>J*E+bt6yNCJ~ zeV=iqG$PS5j&hHa#$(h9fplcN3Xt_{%g|%v8)X?U<9VZW!SDI|JO4zkqYKR~h*f+U zd_HhLHY_Ed=sYQh!6P;EZ%9tB(vVOPNHC(arJ`5Uv#=A2x6Ri8aE z$b}Fa@DVel-)#MK-F;KC?XDl*vX_R(KYUn(;h=u@u0s5T9N2EbMh@r0fPnrllEfH} zq63haG8m3oijjOvrUDp_5Ot!W%>G)R;vRnp{2~zuxNtu5?ZXxw@@@QM)1KTB$~*DzP@huuwh&xgYn8}WP^I3A=x255d}jA zz{8MYc)$e6Z0qlCF>YcwK6VFak&^>NnN<|D;4tB#3?V-84~M2i3+ICq-oD)Jvl^i6 z8^fAP`1?bT$a2T}Hm;^FZP9}Wh&`(*>6&=@6fMa$T77L({3rj%hKg@@$?NCDQp4DN zg7y+MD}yUkd)HT((CWI`XyYV`4c;*vmspJ5fvGvA#>@Hp8oD?^GFymr7T6_~(s?)F z=tWn!*BI@_oj4jAS+bY1B8S#@aAL0qU~1=p_T%^QIp-nRJ>J)uvirwOd(nt9iwi!+4EZDd~Gw>GJC1 z;245$kaJL0kd6L2qAdDq3KC88%yuGk^A>uUvJ zbo)kRnO>vqVtpq!?2*(aP`@#yZ6z>q+(c~pX-%!^zN=*|ne^75Sq<7U@);P#va1#E oXAX+;KFOk&lwp7QHW#^Zqk&C^$v8i&L-c`8R*{aqO9ch-ABJq-$^ZZW diff --git a/tests/unitizer/substr.R b/tests/unitizer/substr.R index 59151a36..7400a880 100644 --- a/tests/unitizer/substr.R +++ b/tests/unitizer/substr.R @@ -230,3 +230,84 @@ unitizer_sect('`ctl` related issues', { substr_sgr(c("a", "\b", "c"), 1, 1) substr_sgr(c("a", "b", "\ac"), 1, 1) }) +unitizer_sect("Rep Funs - Equivalence", { + txt0 <- "ABCD" + ## Basic equivalence + identical(`substr_ctl<-`(txt0, 2, 2, "#"), `substr<-`(txt0, 2, 2, "#")) + identical(`substr_ctl<-`(txt0, 2, 2, "#?"), `substr<-`(txt0, 2, 2, "#?")) + identical(`substr_ctl<-`(txt0, 2, 3, "#?-"), `substr<-`(txt0, 2, 3, "#?-")) + + identical(`substr_ctl<-`(txt0, 0, 0, "#"), `substr<-`(txt0, 0, 0, "#")) + identical(`substr_ctl<-`(txt0, 2, 1, "#"), `substr<-`(txt0, 2, 1, "#")) + identical(`substr_ctl<-`(txt0, 10, 12, "#"), `substr<-`(txt0, 10, 12, "#")) + identical(`substr_ctl<-`(txt0, 2, 3, "#"), `substr<-`(txt0, 2, 3, "#")) + ## Bug in R means we can't use identical + `substr_ctl<-`(txt0, 0, -1, "#") + + ## Recycling + rep1 <- c("_", "_.") + rep2 <- c("_", "_.", "...") + + identical(`substr_ctl<-`(txt0, 2, 3, rep1), `substr<-`(txt0, 2, 3, rep1)) + identical(`substr_ctl<-`(txt0, 2, 3, rep2), `substr<-`(txt0, 2, 3, rep2)) + + txt1 <- c("AB", "CDE") + identical(`substr_ctl<-`(txt1, 2, 3, '_'), `substr<-`(txt1, 2, 3, '_')) + identical(`substr_ctl<-`(txt1, 2, 3, rep1), `substr<-`(txt1, 2, 3, rep1)) + identical(`substr_ctl<-`(txt1, 2, 3, rep2), `substr<-`(txt1, 2, 3, rep2)) + + txt2 <- c("AB", "CDE", "EFGH") + identical(`substr_ctl<-`(txt2, 2, 3, '_'), `substr<-`(txt2, 2, 3, '_')) + identical(`substr_ctl<-`(txt2, 2, 3, rep1), `substr<-`(txt2, 2, 3, rep1)) + identical(`substr_ctl<-`(txt2, 2, 3, rep2), `substr<-`(txt2, 2, 3, rep2)) +}) + +unitizer_sect("Rep Funs - SGR", { + txt1 <- "\033[33mABCD" + txt2 <- "\033[33mA\033[44mBCD" + txt3 <- "\033[33mA\033[44mBC\033[1mD" + + `substr_ctl<-`(txt1, 2, 2, "#") + `substr_ctl<-`(txt1, 2, 3, "#?-") + `substr_ctl<-`(txt1, 2, 3, "#\033[32m?-") + `substr_ctl<-`(txt1, 2, 3, "#\033[32m?-\033[0m") + `substr_ctl<-`(txt1, 2, 3, "#\033[0m?-") + + `substr_ctl<-`(txt2, 2, 3, "#\033[32m?-") + `substr_ctl<-`(txt2, 2, 3, "#\033[32m?-\033[0m") + `substr_ctl<-`(txt2, 2, 3, "#\033[0m?-") + + `substr_ctl<-`(txt3, 2, 3, "#\033[32m?-") + `substr_ctl<-`(txt3, 2, 3, "#\033[32m?-\033[0m") + `substr_ctl<-`(txt3, 2, 3, "#\033[0m?-") + + ## Terminate + `substr_ctl<-`(txt2, 2, 2, terminate=FALSE, "#") + `substr_ctl<-`(txt2, 2, 3, terminate=FALSE, "#\033[32m?-") + `substr_ctl<-`(txt2, 2, 3, terminate=FALSE, "#\033[32m?-\033[0m") + `substr_ctl<-`(txt2, 2, 3, terminate=FALSE, "#\033[0m?-") + `substr_ctl<-`(txt1, 2, 3, terminate=FALSE, "#\033[0m?\033[45m-") + + txt4 <- c(txt2, txt0, "\033[39mABCD") + + ## Different lengths + `substr_ctl<-`(txt4, 2, 3, "#") + `substr_ctl<-`(txt4, 2, 3, c("#", "?")) + `substr_ctl<-`(txt4, 2, 3, c("#", "?", "$")) + + ## Lengths + Carry + `substr_ctl<-`(txt4, 2, 2, carry=TRUE, "#") + `substr_ctl<-`(txt4, 2, 3, carry=TRUE, "#\033[32m?-") + `substr_ctl<-`(txt4, 2, 3, carry=TRUE, "#\033[42m?-\033[0m") + `substr_ctl<-`(txt4, 2, 3, carry=TRUE, "#\033[0m?-") + rep4 <- c("\033[32m_\033[45m", ".-", "\033[39m__") + `substr_ctl<-`(txt4, 2, 3, carry=TRUE, rep4) + + ## Lengths + Terminate + Carry + `substr_ctl<-`(txt4, 2, 2, terminate=FALSE, carry=TRUE, "#") + `substr_ctl<-`(txt4, 2, 3, terminate=FALSE, carry=TRUE, "#\033[32m?-") + `substr_ctl<-`(txt4, 2, 3, terminate=FALSE, carry=TRUE, "#\033[35m?-\033[0m") + `substr_ctl<-`(txt4, 2, 3, terminate=FALSE, carry=TRUE, "#\033[0m?-") + `substr_ctl<-`(txt4, 2, 3, terminate=FALSE, carry=TRUE, rep4) + +}) diff --git a/tests/unitizer/substr.unitizer/data.rds b/tests/unitizer/substr.unitizer/data.rds index ed57c885fb33a477f330abd13423cacc25cd8dea..a56803ec38720627b0f9a4e59da69d418bfa6c96 100644 GIT binary patch literal 16132 zcmb`uV~{0H7d6_pZQGo-yQlk1YudJL+nTm*+qOMz+qU)fJnxO~#{GZKii%YyE6=W- zbs{VF&b^W_0t)EA59mcEwBx#j`<9SArIVet87Hg89Nt9?>!h(u{JfYM8(#SAND)

PaEx_0`?uWB#kxfYJ5pIiJ`PUHXO9 zOBavmsz9@PC3S0>@*{iy`MidDrJBm0Qi}-MrD}$0M(LAZBCJKqMhhh}GgYh$isi=W z*?ME9NpK3&mCsW(lC6wO*GEhnnCDt=qfcD*>MGZSAseE8fgb6R;Zp7juG+FI0QtHz>L0IiYOE6SA8^oH?R5%++S8hAcDniuosn1!mNiVi=G6t|!eH3fXI!d~}U%YUd z1i^-fO&Yae&dYFLAEla4XUc3 z(4oc)@Dba_y%<*d?Kn6d;5(+Wq$rJ9;OcL~^CcHBz=fl)WixPDL<(YYY5d_BNbz{y zAlakIY`@OA#REd0ZxC1W-ZJi)+|2`LKrx0T!y;6nGFV2_#Zbo24tJldCqfb4}lNkGj0)DMaH`|Y3jE12iZ7mIsndd$*n!^b2z|Fsn zzm$ZM4f83M)S7W|>zk8q$x5^Ii_llI@1dN-fwOwcMHk z3}9^C#Ah&V)~8BN@LHB-4T+IbL|>A0KBp}sPWItST5>VDm1$(rtfmSUd36;eTBVA$ z@a2U?VUqeJYL!xE++};a)zh+4{ zNi*_|6(zy`hMFLC!>TGYB?EQszV)P#+GONy*2z#>^R}NXrN1$#mEJp}&X1wcS4z)4 z>!DaD#?JaVtL;|E7G`1>i9%$zxh?Z>9%(c2?15oBiECmkLSf3v?gP)%L6@BIvw@a; z^3}_8sBj_E;0`7WS+6^9}jSCv^(8Eo7Z`cFJ2N8Uk-4uCUy)aoF-@jNm*!z z%{%4FcTu;w&+5#rcgt3=JEK_J>RRfqDfsO_BgZ9^Z8`$Z!Wgu>YCBv%xZ7Nvz$f!9 z?r1p}7}qt^bcB|yt2Eo)y_iF6zleeipVRwZL>oqFth6kz!w0)_tR${F3Wya%OqQhU z^)gb^R+pDYPU4r9w*nK%i%%>RoSlc}3oRt$2bWBnSZMYn{n(W!f5qPE)D|%)NMpV* zw};+C>*}_HE((GAgMo6XjP~*M!%n(;)k{L%$w*vj1&hdBO15A+tBD?>i-oX7HP2I| zo{7DLVo<$9U0rDs(JyPgAhKAOqIqdkWm%OOFH}$`F1?G#N-(Mn9@5eV=UCt|p{X%e zAK}!sPBlh#>s&U@lhO=%!1eDFByDOImeCsa7%0FDp8AJ!ozP6)3K-oIx zK~3S++$&7@C!0EYtW-I=9BxW$Vh%Pgza)9UcTQPNJFMB*uEAD@T3kZg0IKH{oD`=7 zlOSKLF?bk@-S_;@Y=vv4_qbOlAAZrU>Ukz6=JOCTkIRfUdNfFKp3ri#s>}R-rmo7o z{mg)!_Y*dujGgJR4r=Ajs_iJ=gK)}>!X2<1GYrd*8C31C;qeB~keiuu3^p>y^jRzp zr&PD5%`n`uqw%IGBefFikZnexSTK1%IY*riIv1clrt3`Fa`<%gs6wS2yfm$D>;TzW znqC4o&cFoIv4P?~^xs~#itT`<|IMYtcw4gmJ9JF!r<%M?;k3(W(74W${L>%yA@^d- z#UZnxI;bz7Iv4*#z6~hCps3BUrMW!7^cIaLXdSgJj{u)Etsuwk!vqtQa{8*Du2r}0 z#VWGD6C0H(Wvwwf=b%2QwPC=}u z9X|Tv_(jk_w7~lrsM2Hau}vi6M6zk}jLkRdY~nt(`*m3^YgGd?HXPC6$g-C@Kf?~W z#e=6G$nBHDMbI&3h%f;C62^Eg$YJ<*@JtPR{&=x|sIQ6>^Qs2pt!O#L_vkL=F?OZ1 zOStB8P7h(C^3lXH0%lLhjBN9~vLQ zum-hReNDk2G!JYiVfoSw=BzGV<+vxB@#-nL@frYliliXMxnFF_`1x$vV6IhbY;CSG8eWV|3;h#*wz zb!NZ5gZdJxw(Oq`Rn=<7h%4Lm}MzLmy0YrGprKu6ipRI*7&|)f71KZl+T-zu@>dOqbHnSvL12b%iWp^I9Rj z?1#P!A-$xK2tbX@jt;CuUZWi$73Fbd% zN;;3NS96gXlgXk53!bc3X+rR)0&xb4$ zeVXZBg03HdHp+ylOM*1=3uf85O)r8_dXvvs(mms`VSvf6JdLg{Ab-t)jHQg;G zYw+F2vd+wAQZTxXeO=eK{Mz31*QFTlpZUHeT0}$Pk z5|ebNC&xc(*H&`tW4blZ)f0x5cX()M9aKy!WsT;kz5a|am3>iX;AO z5ToIjPV_?b68-a5%z!XDM52q#jp5TU$_s8VX5VGBjrTz_>yI~^VZY+--lNuQsyl@w zLFQhTU{L^JT zrV3W)kZfk~JA_-?zqf9l`q55r+ozm%dlj?jJe@bD?wmIY?%R{u0v(8_@qQw9fw{ zYG&yxo&&*%g0A)OD3GbuyI5yGt#N(3t*^;^p+VwV@6FBaTxqFnTR9+@@@IcGIDB%G zNPo5a(GB~!v4RR6doNo5<$jHxXb{hRb=*{u#zacb*~o zu0rZM#s4Zr8StV& z6nL`+k>x*lkyuVYwb$SmB4jr%(>}1&hyKFH{0b=W`$nPsESXk(IRekE)mDKhP8a%tt^<9C!Jdg-`qP>v41NuA9pR8lwN9p5%mFqb z{a0e7xr9)&k#iQ3GA}!Yh{o|6c9!u@?0~I!lvESLx;6ama6*1ki9pKoBQ>y%j(ZM!vv3K)OIoF--^o$v_Z* zz+T$jksfuP%CgeRk^dk0H>p@}(g8zXSsdkavuTgi*q5r33@@M|OEFK7zq?#>sWw^=ObKJZ=lT@Cx|CW*M?b zX;2=Wkerd6n;nVHiOwCjz@R)(g0z^X1VpmoqFASNz$+muT_hpZ5!Gd%IziO4MJs*S zrOw9ds%+v^vNWE)j8C!smVHn|`9eE2xRE@NJld|5klx6`KP+wr`LpVM5bp`^x9IVY z1&#$alEFV@^FHr53^E(fj0a^s7hc6y)eayV8cbjL%T92pxT)Or(Y#R5_}Ig}LB1G& z-@A8Hw%f!z`QgUlnk;Iu;ik=u>yc>71uQM*~t9LXtUX^Dmv*cifC7Mc;Ge_b(P zMJ%fgVeIk})Clq$ly5~sD+>AN;I-4%zIE+yf-6Gb812IT{YXahRwA*&8uaJ6lEt{& z?5)RL-Eku=w+kT+AuS6_IvZSCHnIq_qym<$6Z*U9AFMBUfYY)>rG2!Ii^un~<0KZY zo}$YP=9;c$14|HJUujrZ>&NdP%F2qWl+ug}{ePvGR8YHPmvflucg1d|>9jexy-e-i zYAA0qNE1lR8jm+SECpBW(QVd0=~%0EXR(R2l#-p~e{Q@fNmq}vdk+4Z9e&rjEV2oT zDX>AnJLtd>j_V?SF4P>uE?Vr_c-Z@7MBHPH%&^@56D&1&`ypsFF$ppCi0apOtgcWg zF-r-mvuzPK6(NC8|c@cryg; zt(9Bq=K0k6tDV2VxFS`EguuC1FrEiFl=yj`VeVg5Ticv>V2*n$a1Zz_sEa7*13tPs zBNS_rywKMlLTmAP7bZe$;spt&9t0YB*jIQWH3lg<)ALKTY$idEgjz^pUQn(g>r0 zU|WQ_Uu6F^XLY7>4*nQ(HQdS(x!uJkNN)C<3-LF0tUO2}=1XeLB9L3|0N)vrJM4iA zB{q2~vtk_W$nQ-n8uQfk@t@>Im1VQV!)*A}<{a?_8R@SVo8~`|ZhiNQF&F9<740w; zX(WD@70GCKH~3>z=}-jKIj?J~39D3pN7mX(v}2%w%rS%Q=#EdG3R=kvzobiJDB5{K zel${GsG_sOztg*cT;gegelW1G8`jxzIcW9jtjUt#aE-oUU7=bN=CI~CDOIf~ZO6l} z*Ez=xbDShc$tltok6EY3n9z>KZdp*iCw#oLCZ!*O>ZBopnQ8iwOs^i=&OCc@Y_>L~ zw0C&+{~8x~8Wlhs6EHXpCs6>=$%3?@tI`~$pU`o*bL!#|%s!i3LA-H?V|}iThMr(| zz07EQil5NViW*>fE%P{jY9>KgR+^n~ve|Me^i7Q96_>JqwWqbNdSvdT8oY#TP0PFG z;g8c$*L*BojqpgFKKH=1rfbEpc^Y8kVdO0xFOIka>EGHW$V1n>-9xmVflZZwKgl@G zco@A{z;P^Iw0)c@Hj6K?x_)Qi>_A-G@xsU3M5c~JsOD+J_2}F|xOyU+*-32L9zs|{ zeYUH7>reimJGy2{j(aKXkX)f%y1#RMgROk7I>Nj;Nr}QQfT35P4q62fF;-`K>C(3o-^TegMbQ#5_9HbYQ7@;qu!on< zPD|rz1dqNs>KKzn%cklky~R~>z-M3ZC~NAC5xLrxqeH?&qI*`ro%|^>C!Jv+LJ=A1 zz|k}#2j}EiNM`qmNp+-Pb1AwR%WL0gm@vv`JlLZTy}v26?+rS__dOBTtC0(5nH?^e zhK@tGL4JLyqEORY0S|}#w3#`za8E0$=RNBR>SgeXE3O8jbEc!i0K)j5aJscP$cHzK zHoXHWnY2?`C8Uh3$Fx2&#xtcdBHh(w$TSl)Y8>~p7VPjmuk2!7)j7f1gUlcsL8SgH zk9SLL?iNS`K_~-3NSGj}b5XCbzOTsCDOUzD2huwrwy#0G9hT&= zOR|IWJfMqJD=7tCS{|!|#wJ!UvDQ0A2Y1@zo1u3UIzK0M18LS^&#}Xe_68;4f4?4X zFzAtOTcK>RV`pJ8cREXnG(TNYc1WyZq0HKu2T_5oC4rGqeg#Sj27bFM+Crlln&;Yw zlU295#)Jzccr1@5<`H;`rje2tg9)$r;o2P&&`3!<9M;yy$iW;q8IKWa$Q2r6&S?QK zv?cefz6PoRQbhqv;sB=mexg0MMM!j%rY(9_Q;T8}sd{YCI(x^m_r!8CoVB5i7B$KM z;@>jYa?oS*G@mNMz`ud0ph3orfW|@-P;xY(vjsUk@*txnK1Rvp2WL8%P1+JlHx%Y8 z*J1ryZD&PDNGF0o%|RPIWGh#%=$32gp4D}GzpZ14irP}TBlmukdhdaXnxlxCBb9QAfpZuCnUWMd`hF0O+6cuC z%#0cSo5WdPY};IB9YDOGS-{gg7}F|1)89k8_Eh7{K5XF0E{b7v zsIIC^f5a7T1Ny-+eb``=HHlnjF;%CPMgX2-N4b&lDAi`Sq~&l59t{oT)gk6_e2KznseMZM)+<(eJ@` zC|s|-b$*Tl;0GxH8++lmvG9+2N{x~P!SBY&+k*tLk1r|N^>t`1AZg?L>*RdO@r9LtqRerlsOR1u$FF7;R2W+45!LKkSc6`9E@g$5Nyd!`_HJID?|1JPh zJAS#?9H5mmeMb+P^NUGL-_-zBxteZl+Dr|^w1gagc7?OyXzcrlts?gizuQRBEHjl;-6jM5_`|V|^S78K-?ep5`$?;d1AIsNya$JZoV(Hg76)#3 zsRw(;l(sYQFhai20>t8eN|0ywlQ#-6+r)vgEs9e~8)U7Y$(pXj^HxEZEW4FFa|?={ z%Eewmp^x8zD8C=D)$}1uwZms^x~Pv?CnkvP`(!OF%{^M@>{^{n_1$#$Sba5gey}=o zedhVfA(9td7FqLM5+WJC;bu<3)ZkdWU(uHCG1k(_@@5<`F;L1z?a@yOC{^A>Kp znPZw9ja+lcm5<)^VegP;`4~Yy%M66v{251-`qkU}>Ss4I#cCznIK6UIt-QKX??BL? z<~3TF7T!HuZd6Cb`^jXHUXqKgntMsRSUv!b&0kErKVz7YFQxsH0hg1QWN+LeJd0xM zGLs&=C=%KI@5~pDNkjLP)mP->OVFdSErl1K^s$sTR~74tim@N#%D6YU3?*kKz?Qo@ zU@q0Y_%5+!HVN}N-N&i;IXz_KN1nho5)2Di$Tcx%pnn$BR7Q0&RA%e6%lX7XZ#1ZY zHRZD%M^Ch5Ct-4gJiC^KJ~ej;3%T0h7dEyZ;}eIAFXI!Sd*N{vBKEbv&|^Ui-O3(U z0bY%x+#Y^m^o@E%-B&_GOy1Vh(MGHnaME!gWc}p#W?!j3I)m5R^eceYr4lqCVVLzf zZE;cx!w71B)C9n~G+JDyII14L-?lRJ2^p199Sw!wdKGmr0!}`CHStg+;)@8$uZiN> zlD^$vxEYcj`bWyDKL~E{;~c?uD|BliS;TG~Rih$x13CD%tzz7WRm;Hi#-aiK1Nfav zCm!~rgy#lgz;y-sPly;BJ?GDDEb?AvcUZjqd7hs^DKO~lT@TGJ;Q<4_&g;zkM&o0<(_e}b94RVw()g-HZ>If7|j zmkCV_oy53K;xd3;j{RU3ky-fa`-z z2pxupu64tL8mk`(KfKqcFMtFtQN1pQJNv#@kX<|05`ZzMx)=fWy1{^mFK31>V5Z#B zoN%F-4!O#dGmVG2RY!*FIZu4yT@TAKNCHvex`b|Ii(5r`$(^EELua$WK^#~WwsDT9t^xV z{k`h9(m(|M7w3?&~#hZSa=)ipqdruDH<2kKZ2*_X_8izYs zH!@gg9b)@o-n+yfgV(0v6N;$pd%$35Yvt0u(wKv@J67f%bHY_p70DM#^1EtMl)~q} zpyFXSS_Mfb7CmJO9Ccl-Q~ShixqTK43Tc&&8VVhh>9Oev4dVxT&WFeW`3+n`hC|Z~Oj8;%rlqi@NmTDYG$=F?K~kqmJF2w6 z?ubzNV}WI+`vwwq1i7Df5eZ|HGUqQNMXYc|?IxZ`veK+x!Y_24#FB=vFKmvba)?Ea z0&@ztVT_@<2vmpAR0O3o7#4O#rCMOP31BgLo7ETe;h3VlNT;BV8Q|Dj-Z(@VH4EiX zTT^U4I&hA9l<% z-?_m}{QoyMsBLO@_^#jX_LY~IQ7y{*9UY^9y8s#|i>gShoX~e@cB?O{^!8`{H>MTN zDMsxDJ)ljq*;ai(|MEXH`0xR}tgN)2D6nHlL#SQR;Ycb;3f^@mi>B4TW)a*`xplq* z(JuX5&@2u}O&OyR<91`lzAy6YPo80(SIDP$c3M`p(e>Q)i19+c0&zrzffTZGJ;@g4ZyZoS2f34@o*7&4|rf}kL23IK(#teU=gStRd zKx_C)jZvt$_M@k$0S5w@^bCol2vLm3L3O^U!UAJ`$ple|0Kx!ykbie`XT5%iev2nt zhBzUIX5fth`{zi4ADNc~3Ib6mD#@=nY?$~TWG{u@q@xP#RtahY{=d><>@!Hn3$%b4 zeUl4z3qf1~eA;#I`0lNa8l6^2w9QI?;!fzeXE4aW$ik@foYn+_F>Xy^N*n2GjFzrH z?aAq>xD3T88<2sqQHV&*Mt%cfmx>Qb0byGjmXkWZ+oQ}7Tlt}d1e=}v*~Jze?{Njg zbs-ZU1NoCB5S0tY%%kqp2o#~m6P-K7POa64Ab%29|HbI6+0K5%)kNU2#g;+U(ngvQ z1MdRTi5p91-s+jbuUU1JVa#UW-g|5|lbcW*vg}*E3vCI!Rke zKu?8!^?&mR!s<>F)vcl7`?;C_Q#-Pd7G9d8{8Sy2Oz#X2?%W8Myh$!n-mXpUqkm0LCA?iWAkmBu1Y43I(b3$V%?DJ;SJ_Y1<416}7aPvWHaMF6 zwl6&gYyRWn-(~Xg+^3)MJGT#y+>ptLnq46~xA0J!T^_^?*<@}=>$HTS7N@pbAq6S- z=$glf7tuS2_&*3!+`gBRj1IR{TVkNy`M-9NJ@^ZF8O$Zjyx5n;U@oP%3Pbk4W88&$ z#99XDT(xQ&=f&c90b+3&;D~!DvF8)?LO+HdN}$qtgD&X{Z}yRNP~a@<$t-lxhAkK* z{gClEZjcA3IS#~yG&llSAre}1CZ+0fiuK{0FE{N~?e5OEmz=eDUb2Lr0<1qt9C-YW zB)MniWO~Iex)*QldhCvdhW_H#$(&Wdjypx2`_b3~O^%IF zy?+K2(_*9PKDb3d^GX%gv>hiVu;E7!TEF5+2zF%{N#1;>pO0i`YJ3rluT+GNz? z9oCA#AP3+s4u=H}SqNEpRXA(oI~y=NEcdGKQOFp6*@-eJyT058HV=FX zDiL?|Ds}ZdI;Tpw0`>BaYj|w9)0b!Z=bgjzR-((ro|x?}+6Hl|Th9i5hpsT|Y+vNB z0@jnA3|t#r+k2>^8p`plaVP^Jz<~{EewK7 zlHeq{gt(-dbSh{`C3Zjp56(HVW8@8cu;hP7_rfZQN{P%KnoWoi17(h=Aiy9O?G_c{0n_NFUbvlvPVHnbxXp<7V$ZHhvBN@DSPHRku}m!Fhw!Ul3`n_l1Op@C}9!7)ofgO9f(>CshFi zT*&<0;f#nYIiE)~vVbcM9>NIFgsPpJ+?udEu`iFC=FrF(r)$sdf1~qo%Q5wmca3|(^@Aj<}_S48}+s;wrwr!Ns4xf| zg~2+@irSyLSWI+gaBmFXTp<|@9gJJNhrsb!?Hzyx6E+6pKy9!RhOo<0Y%j>3qwZhw zXqJQXyHc_YIv6iVF#^l}-OCrZ2YwsEb`7J&3*wv4P^|s+#0A#-g7Lpxx`nLRIL}kK zvkvs(ctM7J4FLPgE}-A6WnY2i{08?WyINSbYXOoef_ao&#rV2~JQ(^22_Q>z5Q~b; z#jT*w$XTCT*Jv4Dj%>W|7(VR>>4{CVZJyuOQU+=g^}F!+9%d6muk#Om?NB7kndX&G zSrlYSD`1m+{tR>lP7P7ChE^%sK2K!mFN|df%dAzfw1Z$Zj zIJFq$`{wfj(PBc^!`!svUVK)lR_%I%VJhZofsu0|mv1HL+cX`LBOS-PhAn1m?nr`tak z^Zgd+5b8ihO7h~~l6m*;OLS9aq?5NJ0p)NLHWc8TMW9aSC=K}i~n zH!wIBc5+ghL0=GM7t>;0ii~2~BR^5-N+i1gxWbhfbX_Fq?rZOp#6Pa{CJ-x)<^0Xw z(U{x%%a4o%8=8ov?}Z!+K5$B0s~tt8?1sk*8*t`n>`#graixxYqxB1uV*#zT5zT<9>elf5wxYsTg!XG(i~FFzJz7-uZjTx9dV$GT{2j zKfCG9UMaXkz89L{JVf1fnZh~@6FodF;po*w>W`Hg1k38R$oR${13kYg9{A}P{60+F zQ{GloCk{^39p@D0L<#k0Qha)qx3OFVc<11CWzbT(;dCLGo{%Oe^LJGVQ%DJK2(W&U zD1yDcs-+HP(DZZqj#&zz8clDr$L4;nU%-z1~kvp=@rr&<_y0_!$ z4_toI`JVMW7}|e8{$m3B57A;xM7MPxU6<|~UVS|Q+Fl3m1lZXQA}n=*Y<}M_S4c^) zAxM|y*jk9SY0whjd1&AVU!ZvbED&?Rt%QO*;VGTIO`mrOYd^&BWsP{{C|S|EVX(*0 z-4s4D`=cc*`^ZZM3hLMJSHdwcca(bw|AqAx&V)2eh{-&So5@jtIM2?EOw4rxpoJ{v znh+-Tnv(lZBi+VHddnH~QJ&Q5QPY5q>;SRF658ZoRJR1BJv4Vsb0hg`V3ZaMK^tk@ z6mM=ahztD@6T3rv-p+XP#}nO&PCZu930~j3ksjFog9q{&4Bt$nBc0&OO!EszoWjcQ=ZxfX^*+?!;kt`3zFSktcs{C zrw)Oz`J|?vy%8mj7NjkN&6k({O~p{ewU4oBm+=S;uR?Vt>^6@W-9k3vok+ z5o9%4sb1oNfkFCH#%~Kie6tN}FXNN%wjOI3;yrF_J_bU)Jl;Vs{qv{VR*4%-Exo&l z`0=a(^8BUcEYk!AgmJOoiVm6Kio z%&Y9uxB!Q?y6`ada_rXd*%&90+WHrmGowaxK=Q-njglIB8S3LENFj%t=&x8!!y~Kk zcj(FwCcr$*t1j;gpl!?m*mb5Iw#LRFox;2=Hz%yc#}quei`+R9(LE+DR&^|DnCaBX zfBsWf{0Z86yj|{{wl}Fa4~*`o%%BTxW5_09WMOOskrXx8Bw&mwc>Jm7kAKhW^4m>o z@qoqJ_XfILr&hGMRh0dFtwl}4XXUU+g-p(WA~4aD|o=zj!|VEURU7@$cczC2Mh>Nmw>0)S^^s?$W5)mv;{54O$v+zIHrb5riUIUf`$l?LwQnwe5QrBLwS8MvPXe7Vb zMCkF<^Hlnybdi(FiO*w}b_r4J^mmlF#~VYKdZFKa4E^ITP%@zW@Ulf5v2QPAVr`N| z{KoYZ6NnT>Y%6J`HV!iI!z)v~$+%!(_hUo%GvG`XuBF+a@5<^u?f zE#O*1!wO{PQo4GYOe`zYoD7{~5uSfT@Z2V=S+QW#j5_NUnt0YFi`4;|B<)M00`%JmM*jg_P}#tp1Hu+NGWEY$;44kji> zT-|shB`KH-JiM)cr9dcqJ9on=EArI4)-AU^@6%{k3;71C1o570EVs~%J^YgfRx>Ii zSTgk7UKkE&cOt{AmW>2>Ho7JUQirJ)!m1=2@v_XA87*HpTeUHMjF>eG!Mq5I)5n>B zK2#-#us<}7fr}y?^t7Tmn9zC22l;jCIqz8YlUviO z%*Y-)N+1PFGv1YuXZp)E<;2ZR zC`{lej?PVnWhoKqgLl>Y^t2+S=W(yFl+VDq*k>~(N8`%Z-Iu-ipm+h zkn>T?d$>n{Yb#6*%wj(2Bx>`>rg46lbZ32Npy@SQo_r1!STOcH^ve@qHnz4~zz7{T zo6vk9xZ1xT#c;6pjth&4)tgj?F2-JN)cS*v<)o+-tm;>Ys;^PTlR8nb60B9F`t{*T)hF3sn!x77 zMDpaKfI69G`m+WOSv2EOK)bwi8|7#ql!xgj!b!7>{gg|-p?}~N&#iY|MmpI~@${m- z)}SAJ{uC#iOZsn`5i(V>@c=Yur^78GcD0t8l&}RT@M7dkFx=d2t3V!R#(v=fCm1dD zVkEOn!Kx}JZKAnfAd9_(5txjf*weoTsc6yL>*bkee+@EJD*kDZCsGk2w6{5PkiT4J za-GYTbjpxgg_bk|mvy4N3F%8~Id1)<^XIf)(K2l<- z8BMuo`Yf8hQST)~n_BGnoVP%+5>f=s6fD;XxB#-Z+thWBalUCS29vX5QT;>I1yFmv z7-3~Vn|90?f!fCIE`~D0M%S;kE|@mg%rZX1*&W*b^BAjf#{SydlY@X)<*2o5#~i-p zrS=ev3OEn;{Mk&_eVM1yarn_R*Zah$8-Z)s?&#a4CAlYT82n*YvT0AHqc_mUCryq= zO0J1GHbZ=m7UEKbI@H*RKK_*&o}0gb=Hd8p-<%N-^Lr&#d0mXcc>QKIb_+}MOxCAY zONGKW+Ul3MtMBT9W+ir<$ztuBlUjMaup0$FBrJc!s^U!*amNd!G6PDw^rZ|!L!|o1 z)eayrVf|F;u4zthX5Q}e(73LZTD+xQlm5_9=}+)?%F<7Eyt(VP}B6?qYGhD z1td!F`E4v)wB+Ub$;RWOwmcG#P8e;UuG7MlpXZ&&V{=}DM;QdNxX!6x%>3JdI(_kX z!N_7A`1x!m>FMgI!$$)mrUkD1^kehR-O>9C%Edm6$5{}^)o=f${jsaqGJpUH%Q1$P zt}c>xW$UWxu4anwIu|2(iGin@vh`j>xorl+1y6=YbguiV=uTSUVctO0P0;2s*5bTQ zU5-G)df3!q^QdXBgDxBNN;31^(J@MIbZv-KzQ#(8D{A=w3-09st?zT=;?r54p}J3B zYov4$mGHgR!?w6Pzhlk%k}cKIQ-paULQxB%-5u}_#05b+*Hgc!XjPuCMx)RE*{_t~ zck}|wRYJNbhE;{P2o5d`EF9~dizWEkutpDO!i@$|?64du!vpG*iwFZZ@rP^QYMveo zvK^ZKz23?=6|_Ed!X9G+(Cq0Vb4qw(PP; z^&nmVG4t^M)!Gl>Z)^K{0AGR+;G;l+LIoT_1_JLQgz_|P&>^|pYppXQp}3rrOiPFi z07nIZMS*i-AldmcNf5!UBSL|S;vv~twyP2C9=C^*;-c8${a^ihC~r?M5F{ubTqrLY z5(E;&^uOK&NN)dn-yXM*?ERM5x7#-#rQ&b`r~avCEqG1r7Y)o0@r;G!`E8sCE{hNe z0*asLdD*#M@Os9jcepoZ+dv0UMWm(^Py(6(Ys=8n>RTf6Z*14^zD$Diz;eS)>EL-$ zT@l;_uwmR;9U4C@Tw<%{4HZgrzN=(AT_Ipm5S%#Mrh;H3eoVehuHJ%JL?C`phHH}` z$P!y(M6yAmM6%UX@SidZuRki`2S<@^d6dTRz2VZR>5JLr3;e*lu>$K;GtIaWW2BHIy)Kutl~C{P zEWEW!Y5DTwd#;t6)z$A&CshST4;5yQqC2##Cs_rHSj`=whsXdo0k8R5_59Gqt%3hu z-vIsp>+2!$D#fiH{^LHqhrY4pDShQ)1?|v$<6z>MO7c;1kz-*lw!*xt#KA@pTuep6oNeep2T@wdkI}On;Uy!51^Y{WaqG?uh)vexcpxS%v(h(uDlv{Uo#V z>qRH7V8|k_;mzOWtG$~Mu%MG%5Z!UHvGtA^cmFIa8TTH6=;HIxY#aaGagF(@`^BlH z-$yO(52HX^$mnCzl93aFP%l z*9D_R+oolT73!tgNHDU*1O5epqgIW@YUa1NDLj^xi2^@&pO(X?YWI}J5 z$j=Iq_dDlW*%e86RH48`KS8%P_R{0(ks|Z*cv~m)&0|bGP%KI~QVVcL6lJS+) z-F+sp;=Dqj5{k9Y?~>q?2%)sJqoU(nb7|-KqoT61hpmEYJdcG>>Jwij$v;08{YeyC zDI^)?-usle#d?ZubniRSi$OV)Ce@TU388b{)hfh>a&}2ImFNii6J~^?(g;$$E$|)j zc6Kkb!Y1;7wxR@hkpHcVT*d+BVzrDLbe6;hm5M9eBD^PoTK>6rCU<$o)X>6TdGlkepauPH5#8~x{hvC$WeI6dj zdXw(a7;}}LnX0*BTSZ4d^S#Y*o%N!^@F#24#UvZT)ZFMdvrDtzE>#0ohp>Y$`4x#b z$`}|ox}*q5z@^niz`%Lgq+LG728f~P2 zY&(Oar~7-M2a46%DcI`K@G=;vX6+$cyX#@web+_4jKB+*ej?6f*GfVQYQ0*)RT;xr z-tPv*$HfuY-Lp*WgZb*}G31XC0nvAX1AjE?+TAnZ6KRkjue{|}uQNQCezjVbEkT__ z#x%_t?rhJ~-~li7$|@1`<}Il8Z%NhFbz9-sH*Mf>aCCQD%37tShB#P(SZ47jMehCQ0vhgfN6YZ}k;%eRrt-;+h; zC*THZn#JMz%H6pi0sI3gFq$hfB%+%VEuky8EjY3z7Eq#P?;9)+?sKhvH|d0hwVafK zYP1F68!>G3=I8tju72~P-DVfuI?to*!8#9bJtfCJyHj`-shrSz$@*cn18cJ$Mj9Nv zvJi`&(=V6=ora1)G(uV;k$H#2ED_0rH1alsF>9 zJu38^*g`kKC4PNM{bY5(DC4h8B+1S}DnrKRI@|Ij_(7@??8BZ}5(}&g#zKyod7pjK zh)LK%bZ;^_m$VR7r3mzkwiclK}HH=O+y%a0l?ygW!%!)VG+gRdeB4%`P1&{xBfXScv zVJ1I#T48lzrR&rg4&&1#qO!HVXe~yLNwO0Jz%!{S#A~F_kdUSQxgs?jVw}#SJxIuY z7VVePq6SyUJ~AJL;8bi`r_thA|6UvMj+@dh{0F6bs3VxUVahr5*q*>9Q(jFS!DKH( z!n#a71CcsBP_2-|Si4LiqPGWsG$zXmhuxak9F25f-8nNQ^|P(R0ay1oWQ!X9mbHdc zbwo-t4Tp73dDf;Ctwwhh4#KaDn6dDttS9 zBzVWik33^PRh%uH=}b5*?x|HmZHmw!PLGtrv`&GjaIYa{#8U>Cn{2p6Et!X)MV~)1 zY6@W`ai^Q$a0XmSsonbHw}__O?(*~KejTU7!T1|}ex__? zYmIJKcUMz{?Y3UGyYZC+Nc^WT9QCPP$l*eX{Ib>sYT$~_XXHl6({v?;gWSV$$pPCX zkd^hvj~@^}Ggi+<1Lzt*|7!*@?jMgg3WNabkzIWgjB6l0(%bKP!EHDqE z{~qYDhbFJ<1v+M}T}vzsLr+kRNVCo~{=gx6x^~Tn>e1ve$3XY82t; zM)}?0D6Kdb6QgVVefDS9d?)KO2#s!?T|FHA7l8-uuXmT-m+ZbfmC7mpXY012pO-4S z$*d_ya|WCjT#U6IwX)ZPoMeW?XQP=v8!rY{vo^G(J3pC>Agsp29 zy#lm65w?Wn!mfHSBmOXlc1Q+oBM;f4@+mg`)k=xw!N(PniVyD1?w3d4GA{%00re?? z5=7#wesNFPHCGPE#e72$jAeNz8%-pCc+Udom;MIe1>s!?%!bM}yUa>7I4SlSoIU|} zi5MBt8kkjefOm$Vszm6c?mJ8LS6y>V5rz3xL6wio-8rMxBfZy%=P~Ut?;^R9W*(-n zo&gMrR&!8lUW{#xcY7BpMVdWBPLrsQ<~-F2O61_~KhEmPo+wln`{bZ2R9f4ev8)sR ztmmK!F4+@kh5=7L!6du&<_77p7z~5L9@Bh=Aq6usve?1CUWaZOjG}awF-id~ zVbvV-1XI?rn5Wg(6(Us9^tKE>jAl^Gxos!>ys6J^TzzRQ3+^h&)B$|?EEVdk9T2T! zDSj(&lA232S*YO@)#c=*ooC3ym!$wN#@XLNNaL3^-J9^n;TQcI^EVQ!8cqL2(3O=zriJ7l$PEu6}Po7=KG`weTa-GeFggj{&6+=wS5NGMVJ4vwW=g7()`MYr0f`TVVfZJ z0RjX8=E5IOY(`0IohoNV#~nbu-0WNP@j%xllg2BBJaAB2jz*P|`^;*CSs_QUs^sYImQ^*H%z- z=IHFg*}pYuO-Ra^rPVN_d7Q&HM37UzHnW6y`<_AZNI52&Brig%Hjb{@Dkh|U^f3#3 z)9wkdX_=!}jCdzQz1RQ8e8jRE^bxz4)J4E8JR9{92Q&GcWB`s%q-g))A@LX=%kGXL zi-rZedwUVzMtcRGDCv_7U0j4}h5~uKpUUZRx}NLQ`HAw}0B^H7a1JI`J{TX~YJe|; zE;~0al(ineVls`&N+PaB>p?-Frn7`NAKnN z=S#1DxZfs^$S&#KN%j-F`0m4L_L>kP@!X#HqXKK^ZwJ6_Y$?R8FX-+Uo}cf7z$(gn zC@)T0T)R*jww%^HIu3@O!xO0kNFZ2Fv^ume~DM_V!0H#s|;{(zprb>M*9Xbb^;n#h4y~y z-1HVXb8h?tYPLDtL6t!V>LdcX&8Js?SG#F*mjXkwBjp>k;Pg&8Wrg;NkOo*tDdeC& zB)sMm*q~j|$uoPU!omPUMk~l&Ma{ZXkEHDu+ZmY?hbTYDTJIv`s6ZS)AEBWeq7tIg zXHcG(5a`hXj8Y9%z9(z;&8Jg9d2YVW(DwVCi$U|R=-yH^s8J*j=r}gJT0E%l+$6!g zEYlMQ-J)oUsu4xp$EE4ClqEEh>$FbAhfxHiF@vZ&nX%PTv(fotIs%H`~vqhCsKFMdKgn9b~G7tC*3yft1_`E?I^ zob^yVBx}$cw-o>GI-=X>89nLW(kjUVa-s`nZ&JA?!p+g>7R!a#B0i*X7OM0teugJ^ z3Be?flIDZh$3|+9!PFTQV5QHuhwU@i*A zLPi#q2s2E1XCw}(eV}AsG#ZL4$2U$0&&Qw)QM1Da8@hEu9jp|B$0iQpzV8jfx_-oE z7F(zPAOnaB11`_Np6~#k5`Nwqh?mcA2&=+(V9b|Fl0+dNlp!!gA(zO5T|roCM36*~ z5Mdx;`K?2@ILM+n;d@D&#=l&R7U1Vs{=VC$mhSQ`A}+-{ZAM~Dfq4<6B!1m5i3l~L zvM>HuzQ{0zj<5d2e#G+w$dvxX4TH$QY-X`T5)x8yi89*7B8>#{>Ex}#J(!TIKE87y8zYzL7`aF7_ z{2u)t=`PU?ym9J3oluuG)hE_VCW<>}Ku^xOVOH%xSHV{QdZ>e{GdC;+xm;^;faUy;g*xm$;N&j% z+=)4vo8CXOf0P=E{~LY9eueG;aYOtMg+Up=lk|RIfmhe>OHMw)KANm=N`m7;TQ;OZ zE<#UDM4s4j{Ia7As^Li35Mf{7x;4xrrA?)&i z#bhSg;a=Ue%4hJ&f2wM?gIClWjR~KLpFK?gDJ=(Ej*LW{ z&cRcnyNIjJp?+O2?|_%t8+h}eg${_e#= zAYhiOtT{g<&`hfR4Lh(LuavZdB6~znkx$jzc*H4DwLwUdy{*AnM`fYD^eClCfO=^D z^D!i5hf;km3L)Ny_Q7$8bX3ZpFMx)PZl0nIqo>s3)nz3N$$iO^G(4o>iex>cCD;PV z%2x|lsSs{Z6&H?IwGnnfg;qdX_OnJ~)97rD#Ts;VTqPmtRj{j~h+KJBFVicCAy3kM z$d=M!wC7B*vEL^>ny)~ESnxEpQw;V99F+(W2%SwRPSu_nB{23gFbE9hIgNZ1JMNcd z(l0a^+t=}V{_#-U)ecG&hpAHOQEF@>Hi*Be`d}t~(-L}`B=Q6-A0aIWfS5r2N2%e- zf63AW9ktyXE@~wu6?N~4wIi5PI8t_Y;D{YZKYzW`A&4fq=M*Fd`P#{rxP9h%2G{QA zz%97QfFFWhq1s4NMY3LSPYOovgcyj4^&Fm@zs`0I=ya8Nx?Sw1=TrWbXs$BV2P3T? zfFR(Xl!Hu-ASHF?1xigVBSrTFI}awG4S&Qod?wZJig1%ktvg-7S&G7Kp^;q@OzC(b zUytc2e&kv#&eT&;*&>g5smEE~rFj--UsIsnP-@p}o;#bZH$uxt`l&U~){ld)Zx_=k zUg~B={@p2TkvL%^?_7*De*CH@jakLr2Xw2cF1k824Qr;pNoE0s(>fIXAeg?X^ED>C zb62HNZ|C~DMON^Ust>6TiP~Np1MS7IiMKQuY^}$gsc@SM`g)Rt=64YecAjm^0QpJ z)$57?QEdYCEQ)S;;OfM}>S5V3U-lQg;bspoUlgLZEKF(j*B(xaQktmjcW=06axX`9 zLlbFHI#eu8S8_lrD_D+;7PK-dl*#>B2)jKC|J0IhNo;*-2Alls=UW;)0*R`Z{yuD4 zXvKjr4`Gi`KFSn*)?8V6y1&k@O~twUbk5RI@4~F+GF7%G88$1F18(f}byZSdj>C?> zVv)F|FScQp2&nSU;NX_kkLc`cq3(^vL*q7bnqC_^Wb;j(W1!mOw8?#Z17s4~U{_36 zB1duj8bK&xbZSVhWXik`bvsnIiv00 z>x|d1xVzFrQNObr45vI;Pe;}*-Ze4l!83D$*%sWhkwAu3lQwPkGJWeWf}Yd^A@BG{ zp>jdTJwq>PzpoH9o(&}tk4zYsF8ow;&LmW-)CqSt)eWq#v#(#OTPN^VvOd*8G>+-@ z6tQ!z7tdXg6ZyQmdtdZuL)}X7W0+8m(Uv7C#Hg5$;(sOYvwg#MwCg(C+}V8m0#kh6@1)IPTK#Tch&#? zhjZ0cWQFQhR#bHObdIjh_pJZ5?c8hAQnp&bu|a!kJG)5ZBYc^nRg}N)4uQ`* z;oFt*WbpH{y1F32-g6F&Dx&0)^g`YL$Y>1D7zP{ML`@pZ`C!)0a}u&7#@Vptz$rP3 z!^5Squ}zqGPWKTyKRQ27aQd?Pg0JDdFg{O)g|0i|zd}5fsOQI$qdndRoy7e2 zGh$iZ;PUK0(ucO2WK221Jg1t(v&PP6b$4p{0d;vJ^{=o&RSEv1{;}N?+u{mn*ud=F-0) zw$7EJg{2vC*PQ&~)27%1TQD!KG)05~EI8_~eH^DzlrqD32pYDT!A(wC35{F z7)b|;@@bhF2Q3f9d`}3yE-Mq9^>;I{xwVHNZXn$d+4`o4Su`HDZVd)jA(qjwHG=do zRmc*63X+OKr%6p|@>^aSGkGUEskipG(lmW((%)2`hrw0+6%jyCzA{ifDQZR?2tBO7 z&kl~WXhSy%a}$Kt^&|8(jdg&diwekG*=Y0@siMr`^jP^ZKv=*}?Mt_4|4TKJ*eIt; zNt#=NyoqI-;TyL~@@N2&5Tg(ybwR4Dzuw=aV|Job#Y4-WCe)KlKNV#nYd7ieBk9wE z@P0htB(+UA(1X6QsR8*xNlBqrJT)Cu8$??grCJ9}8(Wn{gmxvdbY+V1VVAu^Fh#=l zj>&EyQ|>4YsjfcNoQz3T!Cmehn)S%9*}vIydR-r|{4e$ECoVbEl;(CYqF;AxEKLni z6a;M5RZ44Noz}m2I7vVl+^pJnDgEI7hW1Ms7s)mslVCIVl+y>GC=a5Dz{g%&);Eo9 zd}3;F!c0zj*_ALkeN^CN<&paEPU)YsFpMcQ^AAOOetjs9zw?RAtQ0j`O}{o;$^4$T zEnRvqXJcs$RYWT@`q!b>7c{{P!cVhtWMJ7i21n3#vPmsY1>3eS-4>(V&4QEfyL)y9 zo}#z%d!2sd5jwYO zsa8i{Gi}5Duy8HJL*sw?-*0RAa~9#L;fT+$bx%jCsk2v-qpKIP}H8G;4q_Btr+N!jkGu52kETe!_og z#4KF&dG%Ve#hHw3<m?F$A|DMV*w7ppn~;arGjM)#v(yNjik)5n z37*zwt&^WNDC*FIL1d@v;8c>gbnpeqwmP|73B1S1Cgw^)E9P7KoHX$#O<Ow0gZ*Y30u#8g6 zR?cT{jHMRZ6^2-%4Y`MHvGu`Pi}0lH(xuZd<5>4=U^3h5(I)#{#j?h+m9!U`#s#4+W6NtfJa= z7$KFZ>!G!Q$nc4)%Zc3(naFui*5B1vtnpRP(OI>5u_CeBK7aaW6Ver22i5$cimp>_ zqi_yU)X=coIvYJSJ~EFbGS7|w>{!<}dw22C&s5k}hFU#F9i0yBI(Ll2`6~tla%P<&uiqe~KW>$q;R;_!!6DyTwM~HP9CS~E%cqzvl@FDc^;i_I?9RUWo!DUw zQmye34fjEfvNj^pXk=+QPYP%`J)5O~=TkH7))t1k#h$%ZEn7h!$2lYo?O-N} z?hWbjaq_x!1Uo1P?}Kct(|0gU5x8l4XvBDV7=KBDSDMhxOPN%i@E%$3kutxP8^_Pk zG=1N{jxENe2NQ^Csg9=~S+M`3I*elqPBgG6K_HYH&Wwi+HXOG@YE^Cng zOn2g7&)mgjkqfOc6q7n%DiDu!b7u!WcHI^1>i2(>gh#6})1 z1jtIZtCT0%yb6h#q1!w^--b<9f+*+($9|CiW-lp+a<#w0(N*6T!xwv10BHTC5OsKR@>g>?A1IA->~8%= zAxaOE`Bli)&|Y@a9*&%_(kuzy-oj>{h+vJX!GTr008vqv0W-7(27`oBXcqjYlbVQ* zMI{L`E{%KS1wmDcL@gPm+p*XPH~u6Cq^y$FurlSL+VEGn8`5I61>wGgL za+LiP|DiHP`E57xb3bt#KKP^Y^AOD_unGuM4tfi7E3N%n1nV^)>V=wzf({@VrgV%_ zaoJ7u-L~_(@3M`7i)>p=AUER1CrSxghWOj)m8KnA^fO^mWO>~MdNw<(v|XtEbVU%M zYIJX^Bh7!0WJ8T8+ZYA!_%6meI{VzmG+0Qwq^$vcly*^ACjmaLjmeL*At`Hq$ z_-@(hfL%vnLW+TjHP(tX!iXih7ohHntfIb>!`QrMvdxE-&9gc^ zvUlXA1ml(*Squ*|yog;UG5wOlQyUU)MyP}*`zvrQ-HsbzMq1~RPb_|9Lp<5Tuy;WG zUis1W{qV*7^$z3fQ(}Viq)>xvnp8ULN-E1nI?FEM!0t}(`6}h@Z*hYl8jupdmw|MlC6WANkj@2Sk`Zx3vEyXH0WJ!1!n16uG| zmi}2Z)^$Bxyl8DAhiGJzUwGTKGg_+m+n@AmZ{Zy0$E1wHun?+(d@}6E8~5gqDUnkI zSH(its{HfB*lR4aex);SX-^#GwM$-PD~|7ZYqtZl?j(J=yWWJW<={nzdDcV-&-yr4 zLUelQUa)oWu*m2{RDIi2Ldg+=0Xo=9e%;+S!XN>l zUMjG+qDp@0ntrv?=tO9Ku1hYcw>yuu;17@>NLQWUSr9PLsK{gnNaRGg;9Zpz0pi%c8cNwBFaa(HAKOWeSRTy*N!-8Ox zC|e&OfYMG&XJ*wkMa!SqFww3}#qt&4%H<}+4-JA% zrH_ch#r|!ll_j>??PRPegJxdR4jpNdrZWl<(m}6$8Km)D13I^e8$}u91Qab{*^;=D%i6kp`tu!-#;+s|a1%N_!O|7meNM(J%R z+!|VH%opDhiidUJI9EDW5=#b(i%mxh$L3$#shuKf95}c=Kk0bUVUD-JOceNUS)g!+-^Htrdh}s`1H*Dk73O98F4DHX1=H z`EapXa8&nogLz=nrql^&yc0>>d)1cy=H_KgkyKo!1M97vBdPU-S#lWErP&fi=wHca zQK8*=p@@DcgE8&OJPk_GGo*DJ{;azT-#u%FWke6hyyo(^N4)zx!(^RYc?-!IszI?5 zVy2!0kA>iv<@}lXMplmT+g^)o_u!d3p-EBsmZt*ishR(Fr}>r2WUKU@5#Mj8x@^6K zEJmoemQB!N4)P3R3)T((5D^l-lPHw z2C5W8n&=w~x@(7+eJP%dKi;q@ibJtZo_eve3|$I*_PfqYSzH)feB8`9s$&sHopY_t zqEMI6S&i5nTT>Ow2$lz$s#Lb=gb99x0DWg%T$8{Y6zC zeY3AHvfANgv}{^+&?^qc=+n&Xp%^ip=xa#a?-dW1`vV2vKSzExH*Ea+bg7%8Hvr3< zwfIgJ#e43esfpZP)k>{jCmLUMj2YfkUHHqaiZveh#ahx-?<#L{OIQ8yU4pLAL0RMG z@Kl9WS}eeKBpLJEpB?nXc*XcxCq4D4stqjsfV9C|Gvg|tmu+jnOE+aNYn0Z3R$tLp z&S+5gsnL$Cf2bmjp?(&EuxMYEw@yMyQC-M{dhgPUVtjZCs@^m3G!pFvBFnjoe3( zYss>qY=&0RFn#F45qtFSaX|TtYLBN!^hCF0(J){&xnzT4)7C(%z^hY?q4@WFSBqnY z(a_{v^oZ$Oj3gFEPqEI1$y@JIyJvQ{T|-8aV(Ub(?ETsjOZ}dgORv0yihyV3%5`e5 z{CMexDbgS*cXqY)QOVlYJcWJ6&Dh1IZOhxkr_rjN#oKfQTZI>*F|;j@`|4HfueZPj zXO3V$#6;g0m{%x)voA3mU|lpe%Z7|ah@a~xv>>6yxkb_eiHvA--5%Kx4-JDDFJkYi z;g@m(=8XYWDoF3>PAVS^kT8%pqcC!WblWSAGZPZ7w)&DBHmR`a;q6yPyPIO}2NYkIM)817J{x9rWuZ21+VbPqZ<*PqHtd&jeY^IN zOZPe8!}%s2Wtsih350Ei=1cp^w`bvZWJa85AocIu6MH{GLY37(x0G5`s)4+gX~#VC zPfkJFNOz^iGcE_RCn1qblmJJFcV*QjFqqLDB<;N&!KimLNF`A0PcAQk7ZV0_0gSdb zR4Lj04~{9I?_J$VyKt-&JP#HV-Ttup%PSZklFt=kGpIWPuqy{{h|rcPq592%);sQnEfxAWnato zya)Z)#3#aEhj}A{@OXj@Vy|}nxLN_Ez4-iu!ZC#6egHoaki1bUrT1rx%@4?sMqSXr z6>hh}#%biPIVf^SjA1-j&U1n&Q4^8K_p3~;X@4yfGj1faGYg|N!;z_yiC5ivFNH9- zpEZt)_AYv~A5b&2e})>3Mpbbf2S1JB-IiC2G@mbhQO(^EzuL)Ny@j-IfBH26Ebp3R z;cmIn0g(T=;>azCT0Ng?DK@TBb@L>U90MSz_*(e1iEWbZJ??YhD*P+#xb`_(B4efO&a#De(=gBU=+gE0AxfDnxIWJ2)8^Z>c; zVF*U%B1=pS;MCP|15ZFe!N4J*;cxl=O&>B#2mo^<*mb|`)anDC^3EK54>+;km-t=7 zK_6A8tB$+tMU}@aWb8K#I1vRg02n}j{Phk5nf~WJE4If`AclgAFqGB`p?n$A4WEcX zNXM@YQ2YN(S3vasnRfjRgeOP(mS^3%9flz64L2wRR0J$2WFDo(AF2Sr^bbEMPy%Q- zBJUgjIyWBS8l0983PKDxV6f4iPW|Z>#A8DkiHwL*`^It@)`%Fq zHpY*<2&N!QyI_%T2?$I zwiSAPKoY&+A_|CH+9bH&)V3*eVvP@W+4;17T77f|7))?!5R;&0AdUau?OL?fdQ5V7 zsHjn&mC-T3s@-xuza#Med_6k3_y`-0Q_eF4+a?6@&wBhG``x6|cV!CSWG_O^BvF0M zBp}qlUhXHu>!-YUdr9!%jBkty5R&_II}x{pS8Yoz8eyIk9{?CKE}Nd#oT@O@6|PBMYCDz`CpjKl1S?J7kUVKI3{?DU{wC)8dPa Xn}XjO`fzTI{ZmlE8#Vb-8w~J&O&e1O diff --git a/tests/unitizer/url.R b/tests/unitizer/url.R index 1c1ad896..a6082623 100644 --- a/tests/unitizer/url.R +++ b/tests/unitizer/url.R @@ -16,6 +16,7 @@ library(fansi) unitizer_sect("wrap", { + # two ways to terminate OSC, with an ST, or with a BELL (\a) txt <- "This is a link" base.st <- '%s\033]8;%s;%s\033\\%s\033]8;;\033\\%s' base.a <- '%s\033]8;%s;%s\a%s\033]8;;\a%s' From 58bd7039d3a3f6cffe48878d3617a5e1e841deac Mon Sep 17 00:00:00 2001 From: brodieG Date: Sat, 23 Oct 2021 21:51:10 -0400 Subject: [PATCH 27/27] notes --- DEVNOTES.Rmd | 68 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 5 deletions(-) diff --git a/DEVNOTES.Rmd b/DEVNOTES.Rmd index e51d7146..5ce5154f 100644 --- a/DEVNOTES.Rmd +++ b/DEVNOTES.Rmd @@ -4,15 +4,13 @@ These are internal developer notes. ## Todo +* `substr` and carry don't work correctly (see `carry` notes) + * Get rid of all the Rprintf. * Probably for 1.1, do normalize as part of first pass, will require rewriting every encountered SGR. -* This produces double warning: `substr_sgr('a\033[21lB', 2, 3)`. - -* This does not produce a warning: `substr_ctl('a\033[21lB', 2, 3, ctl='c0')`. - * Try to assess impact of large size of state object being re-used every character read. @@ -66,6 +64,13 @@ These are internal developer notes. ## Done +* This produces double warning: `substr_sgr('a\033[21lB', 2, 3)`. + +* This does not produce a warning: `substr_ctl('a\033[21lB', 2, 3, ctl='c0')`. + +That's okay? Since we don't recognize CSI it's treated just a single control +width wise. + * Confirm invalid UTF-8 assumptions (do we consume the lead byte count, just on byte?). Right now we consume the lead byte bytes. @@ -249,6 +254,59 @@ unsigned one (I think). Currently takes STRSXP. A little awkward though, but we did it that way because we had the index. +## `carry` + +Problem right now with `substr_ctl` that it has no awareness of `carry`, but it +needs to in order to correctly know when to leave ending sequences in, e.g.: + + str.3 <- c("\033[35mA\033[42mB", "\033[49mCD") + substr_ctl(str.3, 2, 2, carry=TRUE, terminate=FALSE) + +But this highlights a limitation of `state_at_pos`, which is that it can't +necessarily easily tell you when a leading close state tag should be left in +place. In cases where `carry` is TRUE and `terminate` is FALSE, we want to emit +those so they do actually stop any bleeding styles. + +Fundamentally though, we do not track closing tags that should be emitted, only +what the state is. We could do a diff between the state at end of prior +element, and the state at the beginning of the current one, and infer that any +dropped styles must have been closed. + +There is also the awkwardness that maybe we don't need to re-emit the leading +styles that are present? Indeed, in the `carry = TRUE && terminate = FALSE` +case we only want to emit the styles necessary for the transition. So we only +emit the styles that transition from prior end to current state. + +A few issues, for the string `substr_ctl(c("ABC", "DEF"), 2, 2)` if "C" is +actually a style, do we carry it? Yes, presumably. But then, if we're looking +at the extracted "B" and "E" and output them to screen, what should we be +seeing? I guess the carry needs to be explicitly carried because it is not +present at the beginning of the string. So we have to do all this rather +intense stuff of figuring out what the carry is, applying it, the comparing the +end of the substring-ed element to our new state, and figure out what it takes +to bridge the two. Blergh. + +The other option is to declare that if you use `terminate = FALSE`, you're +responsible for the consequences? The big issue of not doing this is that not +just `substr` is affected? No, everything else should be fine. It's really +just `substr` because it drops the portion of the string that would contain the +closing tags. + +And do we want to carry styles that are unchanged from the prior element, or +simply rely that they will be carried by the terminal? The latter seems most +logical, but might be annoying. We'll do this. + +Should `bridge` always normalize? Otherwise we end up in the ugly position +where it will emit "\033[0m", which is not desired. Maybe it only normalizes +the close? + +What's worse, closing styles in a terminate-style when `terminate = FALSE`, or +normalizing closes? Because we don't track the closing tags we have no way of +knowing how we reached a particular state. It could have been through the +close-all + re-open, or just close some. And what about the case where we just +change a color. We don't need to close anything, just change the color. +Setdiff handles this correctly already. + ## `substr<-` Main question here is whether leading and trailing SGR are selected in the @@ -601,7 +659,7 @@ side of insertion. But what if we don't terminate? Styles will bleed through on the beginning. What about styles that change in what is being inserted? Are those completely lost? Or is that handled with carry? -Maybe we just implement as the paste business? +Maybe we just implement as the paste business (yes, with variations)? ## Benchmarks