Lex, Parse and Evaluate a Simple S-Expression with {flexo}

library(flexo)

{flexo}

flexo is a package containing a simple lexer (to split text into named tokens) and a TokenStream R6 class for manipulating the stream (in order to iterate through the stream to parse it).

LISPy S-Expression

The following code is an S-Expression which evaluates to 21

sexp <- "(+ (* 2 3) (* 3 5))"

Lex the S-Expression into tokens

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Define the regex for each token
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sexp_regexes <- c(
  open       = "\\(",
  close      = "\\)",
  num        = "\\d+",
  whitespace = "\\s+",
  op         = ".+?"
)


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Split the expression into tokens
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
tokens <- lex(sexp, sexp_regexes)
tokens <- tokens[names(tokens) != 'whitespace']

tokens
#>  open    op  open    op   num   num close  open    op   num   num close close 
#>   "("   "+"   "("   "*"   "2"   "3"   ")"   "("   "*"   "3"   "5"   ")"   ")"

Evaluate the expression by interpreting the tokens

Recurisve evaulation of the S-Expression by calculating on the stream of tokens.

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Initialise a token stream
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
stream <- TokenStream$new(tokens)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Recursive function to evaluate a TokenStream full of sexp tokens
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
eval_sexp <- function(stream) {
  name  <- stream$read_names(1)
  
  if (name == 'op') {
    
    # Get the 'op'
    op <- stream$consume(1)
    
    # Gather the args
    args <- list()
    while (stream$read_names(1) != 'close') {
      this_arg <- eval_sexp(stream)
      args     <- c(args, this_arg)
    }
    stream$consume(1)           # Consume the "close" bracket
    result <- do.call(op, args) # Eval the "op"
  } else if (name == "open") {
    stream$consume(1)           # Consume the "open" bracket
    result <- eval_sexp(stream) # Recursive eval
  } else if (name == 'num') {
    result <- as.numeric( stream$consume(1) ) 
  } 
  
  result
}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Calculate the result
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
eval_sexp(stream)
#> [1] 21

Evaluate the expression by translating to R

Translate the S-Expression into a string of R code and then evaluate it.

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Initialise a token stream
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
stream <- TokenStream$new(tokens)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Evaluate a TokenStream full of sexp tokens
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
convert_sexp <- function(stream) {
  name  <- stream$read_names(1)
  
  if (name == 'op') {
    
    # Get the 'op'
    op <- stream$consume(1)
    
    # Gather the args
    args <- list()
    while (stream$read_names(1) != 'close') {
      this_arg <- convert_sexp(stream)
      args     <- c(args, this_arg)
    }
    stream$consume(1)           # Consume the "close" bracket
    
    # Create some R code
    result <- paste0("`", op, "`(", paste(args, collapse = ", "), ")")
  } else if (name == "open") {
    stream$consume(1)           # Consume the "open" bracket
    result <- convert_sexp(stream) # Recursive eval
  } else if (name == 'num') {
    result <- as.numeric( stream$consume(1) ) 
  } 
  
  result
}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Calculate the result
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
r_string <- convert_sexp(stream)
r_string
#> [1] "`+`(`*`(2, 3), `*`(3, 5))"
eval(parse(text = r_string))
#> [1] 21