demagrittr

kind of precompiling from magrittr's syntax to eager evaluation syntax

28
2
R

output: github_document

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "README-"
)

demagrittr

What is this package?

demagrittr() and demagrittr_source() convert magrittr’s syntax to eager
evaluation syntax (by default) for the purpose of:

  • understanding quite complicated and nested piped sentences
  • debugging when an error occurs
  • run-time reduction (if %>% is heavily used inside a long loop)

This is experimental and not fully tested, so I would be glad
if you could inform me of any misunderstandings or mistakes.

Installation

# install.packages("devtools")
devtools::install_github("tobcap/demagrittr")
library("demagrittr")
library(magrittr)
library(demagrittr)

Usage

# NSE
demagrittr(x %>% f %>% g %>% h) # mode = "eager" by default
demagrittr(x %>% f %>% g %>% h, mode = "lazy")
demagrittr(x %>% f %>% g %>% h, mode = "promise")

# Manipulation for a language object
expr0 <- quote(x %>% f %>% g %>% h)
demagrittr(expr0, is_NSE = FALSE)

# The output in `mode = "promise"` seems redundant but is essential in 
# this example.
demagrittr({set.seed(1); rnorm(1) %>% sum(., .)}, mode = "lazy")
demagrittr({set.seed(1); rnorm(1) %>% sum(., .)}, mode = "promise")

Precompiling and evaluation

compiled0 <- demagrittr(1:10 %>% sum %>% log %>% sin)
print(compiled0)
eval(compiled0)

Building (unary) functions

demagrittr(f <- . %>% cos %>% sin)

demagrittr(f <- . %>% cos %>% sin, mode = "lazy")

demagrittr(f <- . %>% cos %>% sin, mode = "promise")

# The resul is just a language object. You need to eval().
eval(demagrittr(f <- . %>% cos %>% sin))
f(1)
sin(cos(1))

Tee operations

demagrittr(
  rnorm(200) %>%
  matrix(ncol = 2) %T>%
  plot %>% # plot usually does not return anything.
  colSums
)

Pipe with exposition of variables

demagrittr({
iris %>%
  subset(Sepal.Length > mean(Sepal.Length)) %$%
  cor(Sepal.Length, Sepal.Width)

data.frame(z = rnorm(100)) %$%
  ts.plot(z)
})

Compound assignment pipe operations

demagrittr({
iris$Sepal.Length <- 
  iris$Sepal.Length %>%
  sqrt
  
iris$Sepal.Length %<>% sqrt   
})

Benchmarking

e <- quote(
 for (i in 1:10000) {
   i %>%
     identity %>%
     identity %>%
     identity %>%
     identity
 }
)

system.time(eval(e))
system.time(eval(demagrittr(e, FALSE)))
library("microbenchmark")
library("magrittr")
library("pipeR")
library("demagrittr")

expr1 <- quote(1:10 %>% sum %>% log %>% sin)
expr2e <- demagrittr(expr1, FALSE, mode = "eager")
expr2l <- demagrittr(expr1, FALSE, mode = "lazy")
expr2p <- demagrittr(expr1, FALSE, mode = "promise")
expr3 <- quote(1:10 %>>% sum %>>% log %>>% sin)

microbenchmark(
    "%>%" = eval(expr1)
  , "demagrittr eager" = eval(expr2e)
  , "demagrittr lazy" = eval(expr2l)
  , "demagrittr promise" = eval(expr2p)
  , "%>>%" = eval(expr3)
  , times = 1e3)

Reduce(function(x, y) if (identical(x, y)) y else FALSE,
       lapply(list(expr1, expr2e, expr2l, expr2p, expr3), eval))
# from http://renkun.me/blog/2014/08/08/difference-between-magrittr-and-pipeR.html#performance

expr4 <- quote({
  set.seed(1)
  lapply(1:100000, function(i) {
    sample(letters, 6, replace = T) %>%
      paste(collapse = "") %>%
      "=="("rstats")
  })
})

expr5e <- demagrittr(expr4, FALSE, mode = "eager")
expr5l <- demagrittr(expr4, FALSE, mode = "lazy")
expr5p <- demagrittr(expr4, FALSE, mode = "promise")

expr6 <- quote({
  set.seed(1)
  lapply(1:100000, function(i) {
    sample(letters, 6, replace = T) %>>%
      paste(collapse = "") %>>%
      "=="("rstats")
  })
})

# My poor laptop takes huge time. The unit is 'seconds'.
microbenchmark(
    "%>%" = eval(expr4)
  , "demagrittr eager" = eval(expr5e)
  , "demagrittr lazy" = eval(expr5l)
  , "demagrittr promise" = eval(expr5p)
  , "%>>%" = eval(expr6)
  , times = 1)

Compiling source code

tmp_dir <- tempdir()
in_path <- file.path(tmp_dir, "test_in.r")
out_path <- file.path(tmp_dir, "test_out.r")

writeLines(
"
x <- data.frame(a = 1:5, b = 6:10)
y <- x %>%
  select(b) %>%
  filter(b >= 8)
", in_path)

demagrittr_source(in_path, out_path, ask = FALSE)
# input file
cat(paste0(readLines(in_path), collapse="\n"))

# output file
cat(paste0(readLines(out_path), collapse="\n"))

Known problems

  • Not guaranteed to preserve the same visibility of a result when evaluating
    (printing the result or not in your console)
  • #{n} is used for the prefix-name of temporary symbols in the converted
    language object. So there will be overwritting if you have already created
    such a symbol in the environment where you want to evaluate a language object
    convertedy by demagrittr(). (hope nobody uses such a tricky name as a symbol)
  • The results where return() appears in middle of pipe stream differs by the mode.
expr_return <- quote(1:10 %>% sum %>% return %>% log)
expr_return

demagrittr(expr_return, is_NSE = FALSE, mode = "eager")

demagrittr(expr_return, is_NSE = FALSE, mode = "lazy")

demagrittr(expr_return, is_NSE = FALSE, mode = "promise")

eval(expr_return)

eval(demagrittr(expr_return, is_NSE = FALSE, mode = "eager"))

eval(demagrittr(expr_return, is_NSE = FALSE, mode = "lazy"))

eval(demagrittr(expr_return, is_NSE = FALSE, mode = "promise"))

## runs but expected output?
1:10 %>% sum %>% return %>% log

# The code below occurs error in console
{
    `#0` <- 1:10
    `#1` <- sum(`#0`)
    `#2` <- return(`#1`)
    log(`#2`)
}
## execute in console 
## Error: no function to return from, jumping to top level

# Also error in console
log(return(sum(1:10)))
## Error: no function to return from, jumping to top level

# runs, but expected result?
(function(`#2`) log(`#2`))((function(`#1`) return(`#1`))((function(`#0`) sum(`#0`))(1:10)))

To-Do

  • Please suggest problems in issue.