From ec3193cdbc41f4229b8adb3e18f124d317e25935 Mon Sep 17 00:00:00 2001 From: Kun Ren Date: Tue, 3 Jun 2014 15:24:13 +0800 Subject: [PATCH] major operator change (see issue #12, #13) --- .gitignore | 1 + DESCRIPTION | 6 +-- NAMESPACE | 2 +- R/pipeR.R | 22 ++++----- README.md | 86 +++++++++++++++++++----------------- man/first-argument-piping.Rd | 12 ++--- man/free-piping.Rd | 14 +++--- 7 files changed, 74 insertions(+), 69 deletions(-) diff --git a/.gitignore b/.gitignore index c278b99..f5f5cf5 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ # R project files .Rproj.user + diff --git a/DESCRIPTION b/DESCRIPTION index 896254c..63f6342 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: pipeR Type: Package Title: Pipeline operators for R -Version: 0.2-4 +Version: 0.3 Author: Kun Ren Maintainer: Kun Ren Description: Provides operators for chaining @@ -10,9 +10,9 @@ Description: Provides operators for chaining and lambda piping. Depends: R (>= 2.14) -Date: 2014-05-31 +Date: 2014-06-03 Suggests: - dplyr + plyr,dplyr Enhances: magrittr License: MIT + file LICENSE URL: http://renkun.me/pipeR, diff --git a/NAMESPACE b/NAMESPACE index 22c8311..9125bf1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,5 @@ # Generated by roxygen2 (4.0.1): do not edit by hand -export("%>%") +export("%:>%") export("%>>%") export("%|>%") diff --git a/R/pipeR.R b/R/pipeR.R index b6eef78..f52f86a 100644 --- a/R/pipeR.R +++ b/R/pipeR.R @@ -1,6 +1,6 @@ #' Pipe an object forward as the first argument to a function #' -#' The \code{\%>\%} operator evaluates the function call on the right-hand side +#' The \code{\%>>\%} operator evaluates the function call on the right-hand side #' with the left-hand side object being the first argument. #' #' @param . The object to be piped as the first argument @@ -9,17 +9,17 @@ #' @export #' @examples #' \dontrun{ -#' rnorm(100) %>% plot +#' rnorm(100) %>>% plot #' -#' rnorm(100) %>% plot(col="red") +#' rnorm(100) %>>% plot(col="red") #' -#' rnorm(1000) %>% sample(size=100,replace=F) %>% hist +#' rnorm(1000) %>>% sample(size=100,replace=F) %>>% hist #' } -`%>%` <- .pipe +`%>>%` <- .pipe #' Pipe an object forward as `.` to an expression #' -#' The operator \code{\%>>\%} evaluates the expression on the right-hand side +#' The operator \code{\%:>\%} evaluates the expression on the right-hand side #' with the left-hand side object referred to as \code{.}. #' #' @param . The object to be piped as represented by \code{.} @@ -28,17 +28,17 @@ #' @export #' @examples #' \dontrun{ -#' rnorm(100) %>>% plot(.) +#' rnorm(100) %:>% plot(.) #' -#' rnorm(100) %>>% plot(.,col="red") +#' rnorm(100) %:>% plot(.,col="red") #' -#' rnorm(1000) %>>% sample(.,size=length(.)*0.1,replace=FALSE) +#' rnorm(1000) %:>% sample(.,size=length(.)*0.1,replace=FALSE) #' -#' rnorm(1000) %>>% +#' rnorm(1000) %:>% #' sample(.,length(.)*0.1,FALSE) %>>% #' plot(.,main=sprintf("length: %d",length(.))) #' } -`%>>%` <- .fpipe +`%:>%` <- .fpipe #' Pipe an object by lambda expression #' diff --git a/README.md b/README.md index ca4615a..c87190a 100644 --- a/README.md +++ b/README.md @@ -36,71 +36,71 @@ plot(diff(log(sample(rnorm(10000,mean=10,sd=1),size=100,replace=FALSE))),col="re The code is neither straightforward for reading nor flexible for modification. It is because the functions in the first few steps are hiding in the nested brackets, and the written order of the functions goes against the order of logic. -pipeR borrows the idea of F# pipeline operator which allows you to write the *object* first and *pipe* it to a following *function*. This package defines three binary pipe operators that provide different types of forward-piping mechanisms: first-argument piping (`%>%`), free piping (`%>>%`), and lambda piping (`%|>%`). And the real magic of this kind of operators is chaining commands by the right order. +pipeR borrows the idea of F# pipeline operator which allows you to write the *object* first and *pipe* it to a following *function*. This package defines three binary pipe operators that provide different types of forward-piping mechanisms: first-argument piping (`%>>%`), free piping (`%:>%`), and lambda piping (`%|>%`). And the real magic of this kind of operators is chaining commands by the right order. -### First-argument piping: `%>%` +### First-argument piping: `%>>%` -The first-argument pipe operator `%>%` inserts the expression on the left-hand side to the first argument of the **function** on the right-hand side. In other words, `x %>% f(a=1)` will be transformed to and be evaluated as `f(x,a=1)`. This operator accepts both function call, e.g. `plot()` or `plot(col="red")`, and function name, e.g. `log` or `plot`. +The first-argument pipe operator `%>>%` inserts the expression on the left-hand side to the first argument of the **function** on the right-hand side. In other words, `x %>>% f(a=1)` will be transformed to and be evaluated as `f(x,a=1)`. This operator accepts both function call, e.g. `plot()` or `plot(col="red")`, and function name, e.g. `log` or `plot`. ``` -rnorm(100) %>% plot +rnorm(100) %>>% plot # plot(rnorm(100)) -rnorm(100) %>% plot() +rnorm(100) %>>% plot() # plot(rnorm(100)) -rnorm(100) %>% plot(col="red") +rnorm(100) %>>% plot(col="red") # plot(rnorm(100),col="red") -rnorm(100) %>% sample(size=100,replace=FALSE) %>% hist +rnorm(100) %>>% sample(size=100,replace=FALSE) %>>% hist # hist(sample(rnorm(100),size=100,replace=FALSE)) ``` -With the first-argument pipe operator `%>%`, you may rewrite the first example as +With the first-argument pipe operator `%>>%`, you may rewrite the first example as ``` -rnorm(10000,mean=10,sd=1) %>% - sample(size=100,replace=FALSE) %>% - log %>% - diff %>% +rnorm(10000,mean=10,sd=1) %>>% + sample(size=100,replace=FALSE) %>>% + log %>>% + diff %>>% plot(col="red",type="l") ``` -### Free piping: `%>>%` +### Free piping: `%:>%` -You may not always want to pipe the object to the first argument of the next function. Then you can use free pipe operator `%>>%`, which takes `.` to represent the piped object on the left-hand side and evaluate the *expression* on the right-hand side with `.` as the piped object. In other words, you have the right to decide where the object should be piped to. +You may not always want to pipe the object to the first argument of the next function. Then you can use free pipe operator `%:>%`, which takes `.` to represent the piped object on the left-hand side and evaluate the *expression* on the right-hand side with `.` as the piped object. In other words, you have the right to decide where the object should be piped to. ``` -rnorm(100) %>>% plot(.) +rnorm(100) %:>% plot(.) # plot(rnorm(100)) -rnorm(100) %>>% plot(., col="red") +rnorm(100) %:>% plot(., col="red") # plot(rnorm(100),col="red") -rnorm(100) %>>% sample(., size=length(.)*0.5) +rnorm(100) %:>% sample(., size=length(.)*0.5) # (`.` is piped to multiple places) -mtcars %>>% lm(mpg ~ cyl + disp, data=.) %>% summary +mtcars %:>% lm(mpg ~ cyl + disp, data=.) %>>% summary # summary(lm(mgp ~ cyl + disp, data=mtcars)) -rnorm(100) %>>% - sample(.,length(.)*0.2,FALSE) %>>% +rnorm(100) %:>% + sample(.,length(.)*0.2,FALSE) %:>% plot(.,main=sprintf("length: %d",length(.))) # (`.` is piped to multiple places and mutiple levels) -rnorm(100) %>>% { +rnorm(100) %:>% { par(mfrow=c(1,2)) hist(.,main="hist") plot(.,col="red",main=sprintf("%d",length(.))) } # (`.` is piped to an enclosed expression) -rnorm(10000,mean=10,sd=1) %>>% - sample(.,size=length(.)/500,replace=FALSE) %>% - log %>% - diff %>>% +rnorm(10000,mean=10,sd=1) %:>% + sample(.,size=length(.)/500,replace=FALSE) %>>% + log %>>% + diff %:>% plot(.,col="red",type="l",main=sprintf("length: %d",length(.))) -# (`%>%` and `%>>%` are used together. Be clear what they mean) +# (`%>>%` and `%:>%` are used together. Be clear what they mean) ``` ### Lambda piping: `%|>%` @@ -108,18 +108,18 @@ rnorm(10000,mean=10,sd=1) %>>% It can be confusing to see multiple `.` symbols in the same context. In some cases, they may represent different things in the same expression. Even though the expression mostly still works, it may not be a good idea to keep it in that way. Here is an example: ``` -mtcars %>>% - lm(mpg ~ ., data=.) %>% +mtcars %:>% + lm(mpg ~ ., data=.) %>>% summary ``` -The code above works correctly with `%>>%` and `%>%`, even though the two dots in the second line have different meanings. `.` in formula `mpg ~ .` represents all variables other than `mpg` in data frame `mtcars`; `.` in `data=.` represents `mtcars` itself. One way to reduce ambiguity is to use *lambda expression* that names the piped object on the left of `~` and specifies the expression to evaluate on the right. +The code above works correctly with `%:>%` and `%>>%`, even though the two dots in the second line have different meanings. `.` in formula `mpg ~ .` represents all variables other than `mpg` in data frame `mtcars`; `.` in `data=.` represents `mtcars` itself. One way to reduce ambiguity is to use *lambda expression* that names the piped object on the left of `~` and specifies the expression to evaluate on the right. A new pipe operator `%|>%` is defined, which works with lambda expression in the formula form `x ~ f(x)`. More specifically, the expression will be interpreted as *`f(x)` is evaluated with `x` being the piped object*. Therefore, the previous example can be rewritten with `%|>%` like this: ``` mtcars %|>% - (df ~ lm(mpg ~ ., data=df)) %>% + (df ~ lm(mpg ~ ., data=df)) %>>% summary ``` @@ -143,14 +143,14 @@ All the pipe operators can be used together and each of them only works in their ``` mtcars %|>% - (df ~ lm(mpg ~ ., data=df)) %>% - summary %>>% + (df ~ lm(mpg ~ ., data=df)) %>>% + summary %:>% .$fstatistic ``` ### Piping with `dplyr` package -`dplyr` package provides a group of functions that make data transformation much easier. `%.%` is a built-in chain operator that pipes the previous result to the first-argument in the next function call. `%>%` is fully compatible with `dplyr` and can replace `%.%` with more consistency. +`dplyr` package provides a group of functions that make data transformation much easier. `%.%` is a built-in chain operator that pipes the previous result to the first-argument in the next function call. `%>>%` is fully compatible with `dplyr` and can replace `%.%` with more consistency. The following code demonstrates mixed piping with `dplyr` functions. @@ -160,14 +160,14 @@ library(hflights) library(pipeR) data(hflights) -hflights %>% - mutate(Speed=Distance/ActualElapsedTime) %>% - group_by(UniqueCarrier) %>% +hflights %>>% + mutate(Speed=Distance/ActualElapsedTime) %>>% + group_by(UniqueCarrier) %>>% summarize(n=length(Speed),speed.mean=mean(Speed,na.rm = T), speed.median=median(Speed,na.rm=T), - speed.sd=sd(Speed,na.rm=T)) %>% - mutate(speed.ssd=speed.mean/speed.sd) %>% - arrange(desc(speed.ssd)) %>>% + speed.sd=sd(Speed,na.rm=T)) %>>% + mutate(speed.ssd=speed.mean/speed.sd) %>>% + arrange(desc(speed.ssd)) %:>% barplot(.$speed.ssd, names.arg = .$UniqueCarrier, main=sprintf("Standardized mean of %d carriers", nrow(.))) ``` @@ -176,10 +176,14 @@ hflights %>% The reason why the three operators are not "integrated" into one is that I want to make the functionality of each operator as clear and independent as possible, so that guessing and ambiguity could be sharply reduced. When you decide to use pipe operators to build a chain of expressions, you need to know clearly how you want to pipe your results to the next level. The following bullets are a brief summary: -1. `%>%` only pipes an object to the first-argument of the next *function*, that is, `x %>% f(...)` runs as `f(x,...)`. -2. `%>>%` only evaluates the next *expression* with `.` representing the object being piped, that is, `x %>>% f(a,.,g(.))` runs as `f(a,x,g(x))`. +1. `%>>%` only pipes an object to the first-argument of the next *function*, that is, `x %>>% f(...)` runs as `f(x,...)`. +2. `%:>%` only evaluates the next *expression* with `.` representing the object being piped, that is, `x %:>% f(a,.,g(.))` runs as `f(a,x,g(x))`. 3. `%|>%` only evaluates the *expression* on the right-hand side of `~` in the lambda expression formula with symbol on the left representing the object being piped, that is, `x %|>% (a ~ f(a,g(a)))` runs as `f(x,g(x))`. +## Performance + +Since each pipe operators defined in this package specializes in its work and is made as simple as possible, the overhead is significantly lower than its peer implmentation in `magrittr` package. In general, `pipeR` is more than 3 times faster than `magrittr` and can be more than 30 times faster when the pipeline gets longer or when the data gets bigger. The detailed performance tests can be seen in issues. + ## Help overview ``` diff --git a/man/first-argument-piping.Rd b/man/first-argument-piping.Rd index 2e857b4..6d27fb9 100644 --- a/man/first-argument-piping.Rd +++ b/man/first-argument-piping.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2 (4.0.1): do not edit by hand \name{first-argument piping} -\alias{\%>\%} +\alias{\%>>\%} \alias{first-argument piping} \title{Pipe an object forward as the first argument to a function} \usage{ -. \%>\% fun +. \%>>\% fun } \arguments{ \item{.}{The object to be piped as the first argument} @@ -12,16 +12,16 @@ \item{fun}{The function call to evaluate with the piped object as the first argument.} } \description{ -The \code{\%>\%} operator evaluates the function call on the right-hand side +The \code{\%>>\%} operator evaluates the function call on the right-hand side with the left-hand side object being the first argument. } \examples{ \dontrun{ -rnorm(100) \%>\% plot +rnorm(100) \%>>\% plot -rnorm(100) \%>\% plot(col="red") +rnorm(100) \%>>\% plot(col="red") -rnorm(1000) \%>\% sample(size=100,replace=F) \%>\% hist +rnorm(1000) \%>>\% sample(size=100,replace=F) \%>>\% hist } } diff --git a/man/free-piping.Rd b/man/free-piping.Rd index 9be4b20..9fc6d2b 100644 --- a/man/free-piping.Rd +++ b/man/free-piping.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2 (4.0.1): do not edit by hand \name{free-piping} -\alias{\%>>\%} +\alias{\%:>\%} \alias{free-piping} \title{Pipe an object forward as `.` to an expression} \usage{ -. \%>>\% expr +. \%:>\% expr } \arguments{ \item{.}{The object to be piped as represented by \code{.}} @@ -12,18 +12,18 @@ \item{expr}{The expression to evaluate with the piped object referred to as \code{.}} } \description{ -The operator \code{\%>>\%} evaluates the expression on the right-hand side +The operator \code{\%:>\%} evaluates the expression on the right-hand side with the left-hand side object referred to as \code{.}. } \examples{ \dontrun{ -rnorm(100) \%>>\% plot(.) +rnorm(100) \%:>\% plot(.) -rnorm(100) \%>>\% plot(.,col="red") +rnorm(100) \%:>\% plot(.,col="red") -rnorm(1000) \%>>\% sample(.,size=length(.)*0.1,replace=FALSE) +rnorm(1000) \%:>\% sample(.,size=length(.)*0.1,replace=FALSE) -rnorm(1000) \%>>\% +rnorm(1000) \%:>\% sample(.,length(.)*0.1,FALSE) \%>>\% plot(.,main=sprintf("length: \%d",length(.))) }