Friday, September 27, 2013

Scala Parser Combinators - JavaOne 2013 BOF

In this post, I wanted to share my working example that I demonstrated in my JavaOne 2013 BOF session on "Building Small Languages with Scala Parser Combinators". In the interest of a simple demo, I kept the language features to a minimum. For example, the language types are only integer values. In other words, there are no floating point values, booleans, strings, or chars. Also, the language is interpreted, which means that I'm not generating byte codes.

These simplifications aside, there are some cool things that I present in this demonstration, all of which form a good foundation for you to use as a starting point for your own language. For example, there are complex expressions, supporting multiplication, division, addition, subtraction, and modulo. There are also several things you'd expect in a small language, such as if-else statements, loops, variable references, and functions.

Here's an example script that the language can process:

// an example of a function that returns a value
func subtractNumbers( x, y, z )
 return x - y - z
endfunc

// an example of a function that doesn't return anything
func doSomething()
 println 100
endfunc

// main program routine
main:
 var a = 10
 var b = 20
 
 if (a > b) then
  loop 3 times
   var d = subtractNumbers( x = 10, y = 5, z = 3 )
   println d
  endloop
 else
  if (b > 0) then
   loop 2 times
    println b
   endloop
  endif
 endif
 
 println 4
 
 doSomething()
 
 println 1
 println 2


The architecture consists of the following components:

  • A parser written with the Scala parsing library
    • The output of the parser is an AST which describes the structure of the input script
  • An interpreter that walks the resulting AST and processes the nodes accordingly
Here's the source code for the parser:

package net.travisdazell.parsers

import scala.util.parsing.combinator.syntactical.StandardTokenParsers
import net.travisdazell.parsers.model._

// small interpreted language with the following features:
//   - variable definitions and references
//  - if-else statements
//  - loops
//  - error handling
//  - scoping
//  - functions with named arguments
class SmallLanguageParser extends StandardTokenParsers {
  lexical.reserved += ("var", "println", "loop", "times", "endloop", "if",
    "then", "else", "endif", "func", "return", "endfunc", "main")
  lexical.delimiters += ("*", "/", "%", "+", "-", "(", ")", "=",
    "<", ">", "==", "!=", "<=", ">=", ",", ":")

  def program: Parser[Program] = (rep(function) <~ ("main" ~ ":")) ~ codeblock ^^ {
    case f ~ c => new Program(f, c)
  }

  def function: Parser[Function] = ("func" ~> ident) ~ ("(" ~> arguments) ~
    (")" ~> codeblock) ~ opt(returnStatement) <~ "endfunc" ^^ {
      case a ~ b ~ c ~ None => new Function(a, b, c, Number(0))
      case a ~ b ~ c ~ d => new Function(a, b, c, d.get)
    }

  def returnStatement: Parser[Expr] = "return" ~> expr ^^ {
    e => e
  }

  def arguments: Parser[Map[String, Int]] = repsep(ident, ",") ^^ {
    argumentList =>
      {
        (for (a <- argumentList) yield (a -> 0)) toMap
      }
  }

  def codeblock: Parser[List[Statement]] = rep(statement) ^^ { a => a }

  def statement: Parser[Statement] = positioned(variableAssignment | outStatement |
    loopStatement | ifStatement | functionCall | outStatement) ^^ { a => a }

  def variableAssignment: Parser[VariableDefinition] = "var" ~> ident ~ "=" ~
    positioned(functionCall | expr) ^^ { case a ~ "=" ~ b => { new VariableDefinition(a, b) } }

  def outStatement: Parser[PrintStatement] = "println" ~> positioned(expr) ^^ {
    case a => new PrintStatement(a)
  }

  def loopStatement: Parser[LoopStatement] = ("loop" ~> iterations <~ "times") ~
    codeblock <~ "endloop" ^^ {
      case i ~ s => {
        new LoopStatement(i, s)
      }
    }

  def ifStatement: Parser[IfStatement] = conditional ~ codeblock ~
    opt("else" ~> codeblock) <~ "endif" ^^ {
      case a ~ b ~ c => {
        c match {
          case None => new IfStatement(a, b, List())
          case _ => new IfStatement(a, b, c.get)
        }
      }
    }

  def conditional: Parser[Condition] = "if" ~ "(" ~> condition <~ ")" ~ "then"

  def condition: Parser[Condition] = positioned(expr) ~
    ("<" | ">" | "==" | "!=" | "<=" | ">=") ~ positioned(expr) ^^ {
      case a ~ b ~ c => {
        new Condition(b, a, c)
      }
    }

  def iterations: Parser[Int] = numericLit ^^ { _ toInt }

  def functionCall: Parser[FunctionCall] = ((ident) <~ "(") ~
    functionCallArguments <~ ")" ^^ {
      case a ~ l => new FunctionCall(a, l)
    }

  def functionCallArguments: Parser[Map[String, Expr]] =
    repsep(functionArgument, ",") ^^ {
      _ toMap
    }

  def functionArgument: Parser[(String, Expr)] = (ident <~ "=") ~ expr ^^ {
    case a ~ b => (a, b)
  }

  def expr: Parser[Expr] = term ~ rep(("+" | "-") ~ term) ^^ {
    case a ~ List() => a
    case a ~ b => {
      def appendExpression(c: Operator, p: Operator): Operator = {
        p.left = c
        p
      }

      var root: Operator = new Operator(b.head._1, a, b.head._2)

      for (f <- b.tail) {
        var parent =
          f._1 match {
            case "+" => new Operator("+", null, f._2)
            case "-" => Operator("-", null, f._2)
          }

        root = appendExpression(root, parent)
      }

      root
    }
  }

  def term: Parser[Expr] = multiplydividemodulo ^^ { l => l } | factor ^^ {
    a => a
  }

  // note that "rep" returns a List
  def multiplydividemodulo: Parser[Expr] = factor ~ rep(("*" | "/" | "%") ~ factor) ^^ {

    case a ~ List() => a
    case a ~ b => {
      def appendExpression(e: Operator, t: Operator): Operator = {
        t.left = e.right
        e.right = t
        t
      }

      var root: Operator = new Operator(b.head._1, a, b.head._2)
      var current = root

      // for each of these, i'm just building up the parse tree
      for (f <- b.tail) {
        var rightOperator =
          f._1 match {
            case "*" => Operator("*", null, f._2)
            case "/" => Operator("/", null, f._2)
            case "%" => Operator("%", null, f._2)
          }

        current = appendExpression(current, rightOperator)
      }

      root
    }
  }

  def factor: Parser[Expr] = numericLit ^^ { a => Number(a.toInt) } |
    "(" ~> expr <~ ")" ^^ { e => e } |
    ident ^^ { new Identifier(_) }

  def parseAll[T](p: Parser[T], in: String): ParseResult[T] = {
    phrase(p)(new lexical.Scanner(in))
  }
}

Next, let's take a look at the interpreter:

package net.travisdazell.parsers.runtime
import net.travisdazell.parsers.model._

class Interpreter(program: Program) {
  var currentScope = new Scope("global", null)

  def run() {
    walk(program.statements)
  }

  private def getVariable(ident: Identifier): Expr = {
    var s: Scope = currentScope

    while ((!s.name.equals("global")) && !s.variables.contains(ident.name)) {
      s = s.parentScope
    }

    if (s.variables.contains(ident.name)) s.variables(ident.name)
    else {
      sys.error("Error: Undefined variable " + ident.name +
        " at position [" +
        ident.pos.column + "] on line: " +
        ident.pos.line)
    }
  }

  private def calculateExpr(e: Expr): Int = {
    e match {
      case Number(value) => value
      case Identifier(name) => {
        calculateExpr(getVariable(e.asInstanceOf[Identifier]))
      }
      case Operator(op, left, right) => {
        op match {
          case "*" => calculateExpr(left) * calculateExpr(right)
          case "/" => calculateExpr(left) / calculateExpr(right)
          case "%" => calculateExpr(left) % calculateExpr(right)
          case "+" => calculateExpr(left) + calculateExpr(right)
          case "-" => calculateExpr(left) - calculateExpr(right)
        }
      }
    }
  }

  private def isConditionTrue(condition: Condition): Boolean = {
    val a = calculateExpr(condition.left)
    val b = calculateExpr(condition.right)

    condition.op match {
      case "==" => (a == b)
      case "!=" => (a != b)
      case "<=" => (a <= b)
      case "<" => (a < b)
      case ">=" => (a >= b)
      case ">" => (a > b)
    }
  }

  private def executeFunction(f: Function, arguments: Map[String, Expr]) {
    currentScope = new Scope(f.name, currentScope)

    for (v <- arguments) currentScope.variables(v._1) = v._2

    walk(f.statements)

    currentScope = currentScope.parentScope
  }

  private def walk(tree: List[Statement]) {
    if (!tree.isEmpty) {
      tree.head match {
        case FunctionCall(name, values) => {
          val f = program.functions.filter(x => x.name == name)

          if (f.size < 1) sys.error("Error: Undefined function '" +
            name + "' being called at position [" +
            tree.head.pos.column + "] on line: " +
            tree.head.pos.line)
          else {
            executeFunction(f(0), values)

            walk(tree.tail)
          }
        }
        case VariableDefinition(name, value) => {
          // push this variable into scope
          if (value.isInstanceOf[FunctionCall]) {
            val functionCall = value.asInstanceOf[FunctionCall]
            val function = program.functions.filter(x => x.name == functionCall.name)

            // check if proc is defined and if not throw an error
            if (function.size < 1) sys.error("Error: Undefined function '" +
              functionCall.name + "' being called at position [" +
              tree.head.pos.column + "] on line: " +
              tree.head.pos.line)
            else {
              executeFunction(function(0), functionCall.values)

              currentScope = currentScope.parentScope

              // assign the return value of the function to the variable
              currentScope.variables(name) = function(0).returnValue
            }
          } else {
            currentScope.variables(name) = value
          }

          walk(tree.tail)
        }
        case PrintStatement(value) => {
          println(calculateExpr(value))
          walk(tree.tail)
        }
        case LoopStatement(iterations, statements) => {
          currentScope = new Scope("", currentScope)

          for (i <- 0 until iterations) walk(statements)

          currentScope = currentScope.parentScope

          walk(tree.tail)
        }
        case IfStatement(condition, trueBranch, falseBranch) => {
          currentScope = new Scope("", currentScope)

          if (isConditionTrue(condition)) walk(trueBranch) else walk(falseBranch)

          currentScope = currentScope.parentScope

          walk(tree.tail)
        }
        case _ => ()
      }
    }
  }
}


The last thing we need is a main routine that will read our input script and execute our parser:

package net.travisdazell.parsers.runtime

import scala.io.Source

import net.travisdazell.parsers.SmallLanguageParser

object SmallLanguage {
  def main(args: Array[String]) {
    val inputFile = Source.fromFile("scripts/program.small")
    val inputSource = inputFile.mkString

    val parser = new SmallLanguageParser
    parser.parseAll(parser.program, inputSource) match {
      case parser.Success(r, n) => {
        val interpreter = new Interpreter(r)

        try {
          interpreter.run
        } catch {
          case e: RuntimeException => println(e.getMessage)
        }
      }
      case parser.Error(msg, n) => println("Error: " + msg)
      case parser.Failure(msg, n) => println("Error: " + msg)
      case _ =>
    }
  }
}


The one thing I haven't included is the model classes that represent the AST nodes. However, as always, I've provided the full source code on GitHub: https://github.com/travisdazell/javaone-2013-BOF-small-language