## 31 Combinator Parsing

Sample run of chapter's interpreter examples

### 31.1 Example: Arithmetic expressions

// In file combinator-parsing/Arith.scala import scala.util.parsing.combinator._ class Arith extends JavaTokenParsers { def expr: Parser[Any] = term~rep("+"~term | "-"~term) def term: Parser[Any] = factor~rep("*"~factor | "/"~factor) def factor: Parser[Any] = floatingPointNumber | "("~expr~")" }

// In file combinator-parsing/Arith.scala object ParseExpr extends Arith { def main(args: Array[String]) { println("input : "+ args(0)) println(parseAll(expr, args(0))) } }
parseAll(expr, input)
\$ scala ParseExpr "2 * (3 + 7)" input: 2 * (3 + 7) [1.12] parsed: ((2~List((*~(((~((3~List())~List((+ ~(7~List())))))~)))))~List())
\$ scala ParseExpr "2 * (3 + 7))" input: 2 * (3 + 7)) [1.12] failure: `-' expected but `)' found 2 * (3 + 7)) ^

### 31.3 Basic regular expression parsers

object MyParsers extends RegexParsers { val ident: Parser[String] = """[a-zA-Z_]\w*""".r }

### 31.4 Another example: JSON

// In file combinator-parsing/address-book.json { "address book": { "name": "John Smith", "address": { "street": "10 Market Street", "city" : "San Francisco, CA", "zip" : 94111 }, "phone numbers": [ "408 338-4238", "408 111-6892" ] } }
// In file combinator-parsing/JSON.scala import scala.util.parsing.combinator._ class JSON extends JavaTokenParsers { def value : Parser[Any] = obj | arr | stringLiteral | floatingPointNumber | "null" | "true" | "false" def obj : Parser[Any] = "{"~repsep(member, ",")~"}" def arr : Parser[Any] = "["~repsep(value, ",")~"]" def member: Parser[Any] = stringLiteral~":"~value }
// In file combinator-parsing/JSON.scala import java.io.FileReader object ParseJSON extends JSON { def main(args: Array[String]) { val reader = new FileReader(args(0)) println(parseAll(value, reader)) } }
\$ scala ParseJSON address-book.json [13.4] parsed: (({~List((("address book"~:)~(({~List((( "name"~:)~"John Smith"), (("address"~:)~(({~List((( "street"~:)~"10 Market Street"), (("city"~:)~"San Francisco ,CA"), (("zip"~:)~94111)))~})), (("phone numbers"~:)~(([~ List("408 338-4238", "408 111-6892"))~]))))~}))))~})

### 31.5 Parser output

floatingPointNumber ^^ (_.toDouble)
"true" ^^ (x => true)
def obj: Parser[Map[String, Any]] = // Can be improved "{"~repsep(member, ",")~"}" ^^ { case "{"~ms~"}" => Map() ++ ms }
case class ~[+A, +B](x: A, y: B) { override def toString = "("+ x +"~"+ y +")" }
def obj: Parser[Map[String, Any]] = "{"~> repsep(member, ",") <~"}" ^^ (Map() ++ _)
\$ scala JSON1Test address-book.json [14.1] parsed: Map( address book -> Map( name -> John Smith, address -> Map( street -> 10 Market Street, city -> San Francisco, CA, zip -> 94111), phone numbers -> List(408 338-4238, 408 111-6892) ) )
// In file combinator-parsing/JSON1.scala import scala.util.parsing.combinator._ class JSON1 extends JavaTokenParsers { def obj: Parser[Map[String, Any]] = "{"~> repsep(member, ",") <~"}" ^^ (Map() ++ _) def arr: Parser[List[Any]] = "["~> repsep(value, ",") <~"]" def member: Parser[(String, Any)] = stringLiteral~":"~value ^^ { case name~":"~value => (name, value) } def value: Parser[Any] = ( obj | arr | stringLiteral | floatingPointNumber ^^ (_.toDouble) | "null" ^^ (x => null) | "true" ^^ (x => true) | "false" ^^ (x => false) ) }
def value: Parser[Any] = obj | arr | stringLiteral | ...
obj; // semicolon implicitly inserted | arr
class ArithHypothetical extends JavaTokenParsers { def expr: Parser[Any] = term andThen rep(("+" andThen term) orElse ("-" andThen term)) def term: Parser[Any] = factor andThen rep(("*" andThen factor) orElse ("/" andthen factor)) def factor: Parser[Any] = floatingPointNumber orElse ("(" andThen expr andThen ")") }

### 31.6 Implementing combinator parsers

package scala.util.parsing.combinator trait Parsers { ... // code goes here unless otherwise stated }
type Parser[T] = Input => ParseResult[T]
type Elem
sealed abstract class ParseResult[+T] case class Success[T](result: T, in: Input) extends ParseResult[T] case class Failure(msg: String, in: Input) extends ParseResult[Nothing]
abstract class Parser[+T] extends (Input => ParseResult[T]) { p => // An unspecified method that defines // the behavior of this parser. def apply(in: Input): ParseResult[T] def ~ ... def | ... ... }
abstract class Parser[+T] extends ... { p =>
val id = this
class Outer { outer => class Inner { println(Outer.this eq outer) // prints: true } }
def elem(kind: String, p: Elem => Boolean) = new Parser[Elem] { def apply(in: Input) = if (p(in.first)) Success(in.first, in.rest) else Failure(kind +" expected", in) }
abstract class Parser[+T] ... { p => ... def ~ [U](q: => Parser[U]) = new Parser[T~U] { def apply(in: Input) = p(in) match { case Success(x, in1) => q(in1) match { case Success(y, in2) => Success(new ~(x, y), in2) case failure => failure } case failure => failure } }
def <~ [U](q: => Parser[U]): Parser[T] = (p~q) ^^ { case x~y => x } def ~> [U](q: => Parser[U]): Parser[U] = (p~q) ^^ { case x~y => y }
def | (q: => Parser[T]) = new Parser[T] { def apply(in: Input) = p(in) match { case s1 @ Success(_, _) => s1 case failure => q(in) } }
def parens = floatingPointNumber | "("~parens~")"
def ^^ [U](f: T => U): Parser[U] = new Parser[U] { def apply(in: Input) = p(in) match { case Success(x, in1) => Success(f(x), in1) case failure => failure } } } // end Parser
def success[T](v: T) = new Parser[T] { def apply(in: Input) = Success(v, in) } def failure(msg: String) = new Parser[Nothing] { def apply(in: Input) = Failure(msg, in) }
def opt[T](p: => Parser[T]): Parser[Option[T]] = ( p ^^ Some(_) | success(None) ) def rep[T](p: Parser[T]): Parser[List[T]] = ( p~rep(p) ^^ { case x~xs => x :: xs } | success(List()) ) def repsep[T, U](p: Parser[T], q: Parser[U]): Parser[List[T]] = ( p~rep(q~> p) ^^ { case r~rs => r :: rs } | success(List()) ) } // end Parsers

### 31.7 String literals and regular expressions

trait RegexParsers extends Parsers {
type Elem = Char
implicit def literal(s: String): Parser[String] = ... implicit def regex(r: Regex): Parser[String] = ...
protected val whiteSpace = """\s+""".r } // end RegexParsers
object MyParsers extends RegexParsers { override val whiteSpace = "".r ... }

### 31.8 Lexing and parsing

scala.util.parsing.combinator.lexical scala.util.parsing.combinator.syntactical

### 31.9 Error reporting

{ "name": John,
[1.13] failure: "false" expected but identifier John found { "name": John, ^
def value: Parser[Any] = obj | arr | stringLit | floatingPointNumber | "null" | "true" | "false" | failure("illegal start of value")
[1.13] failure: illegal start of value { "name": John, ^
var lastFailure: Option[Failure] = None
case class Failure(msg: String, in: Input) extends ParseResult[Nothing] { if (lastFailure.isDefined && lastFailure.get.in.pos <= in.pos) lastFailure = Some(this) }
def phrase[T](p: Parser[T]) = new Parser[T] { lastFailure = None def apply(in: Input) = p(in) match { case s @ Success(out, in1) => if (in1.atEnd) s else Failure("end of input expected", in1) case f : Failure => lastFailure } }

### 31.10 Backtracking versus LL(1)

def expr : Parser[Any] = term ~! rep("+" ~! term | "-" ~! term) def term : Parser[Any] = factor ~! rep("*" ~! factor | "/" ~! factor) def factor: Parser[Any] = "(" ~! expr ~! ")" | floatingPointNumber

### 31.11 Conclusion

val jsonParser = phrase(value)