Code Examples for

Programming in Scala, Third Edition

Return to chapter index

33 Combinator Parsing

Sample run of chapter's interpreter examples

33.1 Example: Arithmetic expressions


// In file combinator-parsing/Arith.scala import scala.util.parsing.combinator._ class Arith extends JavaTokenParsers { def expr: Parser[Any] = term~rep("+"~term | "-"~term) def term: Parser[Any] = factor~rep("*"~factor | "/"~factor) def factor: Parser[Any] = floatingPointNumber | "("~expr~")" }

33.2 Running your parser


// In file combinator-parsing/Arith.scala object ParseExpr extends Arith { def main(args: Array[String]) = { println("input : " + args(0)) println(parseAll(expr, args(0))) } }
parseAll(expr, input)
$ scala ParseExpr "2 * (3 + 7)" input: 2 * (3 + 7) [1.12] parsed: ((2~List((*~(((~((3~List())~List((+ ~(7~List())))))~)))))~List())
$ scala ParseExpr "2 * (3 + 7))" input: 2 * (3 + 7)) [1.12] failure: `-' expected but `)' found 2 * (3 + 7)) ^

33.3 Basic regular expression parsers


object MyParsers extends RegexParsers { val ident: Parser[String] = """[a-zA-Z_]\w*""".r }

33.4 Another example: JSON


// In file combinator-parsing/address-book.json { "address book": { "name": "John Smith", "address": { "street": "10 Market Street", "city" : "San Francisco, CA", "zip" : 94111 }, "phone numbers": [ "408 338-4238", "408 111-6892" ] } }
// In file combinator-parsing/JSON.scala import scala.util.parsing.combinator._ class JSON extends JavaTokenParsers { def value : Parser[Any] = obj | arr | stringLiteral | floatingPointNumber | "null" | "true" | "false" def obj : Parser[Any] = "{"~repsep(member, ",")~"}" def arr : Parser[Any] = "["~repsep(value, ",")~"]" def member: Parser[Any] = stringLiteral~":"~value }
// In file combinator-parsing/JSON.scala import java.io.FileReader object ParseJSON extends JSON { def main(args: Array[String]) = { val reader = new FileReader(args(0)) println(parseAll(value, reader)) } }
$ scala ParseJSON address-book.json [13.4] parsed: (({~List((("address book"~:)~(({~List((( "name"~:)~"John Smith"), (("address"~:)~(({~List((( "street"~:)~"10 Market Street"), (("city"~:)~"San Francisco ,CA"), (("zip"~:)~94111)))~})), (("phone numbers"~:)~(([~ List("408 338-4238", "408 111-6892"))~]))))~}))))~})

33.5 Parser output


floatingPointNumber ^^ (_.toDouble)
"true" ^^ (x => true)
def obj: Parser[Map[String, Any]] = // Can be improved "{"~repsep(member, ",")~"}" ^^ { case "{"~ms~"}" => Map() ++ ms }
case class ~[+A, +B](x: A, y: B) { override def toString = "(" + x + "~" + y + ")" }
def obj: Parser[Map[String, Any]] = "{"~> repsep(member, ",") <~"}" ^^ (Map() ++ _)
$ scala JSON1Test address-book.json [14.1] parsed: Map( address book -> Map( name -> John Smith, address -> Map( street -> 10 Market Street, city -> San Francisco, CA, zip -> 94111), phone numbers -> List(408 338-4238, 408 111-6892) ) )
// In file combinator-parsing/JSON1.scala import scala.util.parsing.combinator._ class JSON1 extends JavaTokenParsers { def obj: Parser[Map[String, Any]] = "{"~> repsep(member, ",") <~"}" ^^ (Map() ++ _) def arr: Parser[List[Any]] = "["~> repsep(value, ",") <~"]" def member: Parser[(String, Any)] = stringLiteral~":"~value ^^ { case name~":"~value => (name, value) } def value: Parser[Any] = ( obj | arr | stringLiteral | floatingPointNumber ^^ (_.toDouble) | "null" ^^ (x => null) | "true" ^^ (x => true) | "false" ^^ (x => false) ) }
def value: Parser[Any] = obj | arr | stringLiteral | ...
obj; // semicolon implicitly inserted | arr
class ArithHypothetical extends JavaTokenParsers { def expr: Parser[Any] = term andThen rep(("+" andThen term) orElse ("-" andThen term)) def term: Parser[Any] = factor andThen rep(("*" andThen factor) orElse ("/" andThen factor)) def factor: Parser[Any] = floatingPointNumber orElse ("(" andThen expr andThen ")") }

33.6 Implementing combinator parsers


package scala.util.parsing.combinator trait Parsers { ... // code goes here unless otherwise stated }
type Parser[T] = Input => ParseResult[T]
type Input = Reader[Elem]
type Elem
sealed abstract class ParseResult[+T] case class Success[T](result: T, in: Input) extends ParseResult[T] case class Failure(msg: String, in: Input) extends ParseResult[Nothing]
abstract class Parser[+T] extends (Input => ParseResult[T]) { p => // An unspecified method that defines // the behavior of this parser. def apply(in: Input): ParseResult[T] def ~ ... def | ... ... }
abstract class Parser[+T] extends ... { p =>
val id = this
class Outer { outer => class Inner { println(Outer.this eq outer) // prints: true } }
def elem(kind: String, p: Elem => Boolean) = new Parser[Elem] { def apply(in: Input) = if (p(in.first)) Success(in.first, in.rest) else Failure(kind + " expected", in) }
abstract class Parser[+T] ... { p => ... def ~ [U](q: => Parser[U]) = new Parser[T~U] { def apply(in: Input) = p(in) match { case Success(x, in1) => q(in1) match { case Success(y, in2) => Success(new ~(x, y), in2) case failure => failure } case failure => failure } }
def <~ [U](q: => Parser[U]): Parser[T] = (p~q) ^^ { case x~y => x } def ~> [U](q: => Parser[U]): Parser[U] = (p~q) ^^ { case x~y => y }
def | (q: => Parser[T]) = new Parser[T] { def apply(in: Input) = p(in) match { case s1 @ Success(_, _) => s1 case failure => q(in) } }
def parens = floatingPointNumber | "("~parens~")"
def ^^ [U](f: T => U): Parser[U] = new Parser[U] { def apply(in: Input) = p(in) match { case Success(x, in1) => Success(f(x), in1) case failure => failure } } } // end Parser
def success[T](v: T) = new Parser[T] { def apply(in: Input) = Success(v, in) } def failure(msg: String) = new Parser[Nothing] { def apply(in: Input) = Failure(msg, in) }
def opt[T](p: => Parser[T]): Parser[Option[T]] = ( p ^^ Some(_) | success(None) ) def rep[T](p: => Parser[T]): Parser[List[T]] = ( p~rep(p) ^^ { case x~xs => x :: xs } | success(List()) ) def repsep[T](p: => Parser[T], q: => Parser[Any]): Parser[List[T]] = ( p~rep(q~> p) ^^ { case r~rs => r :: rs } | success(List()) ) } // end Parsers

33.7 String literals and regular expressions


trait RegexParsers extends Parsers {
type Elem = Char
implicit def literal(s: String): Parser[String] = ... implicit def regex(r: Regex): Parser[String] = ...
protected val whiteSpace = """\s+""".r } // end RegexParsers
object MyParsers extends RegexParsers { override val whiteSpace = "".r ... }

33.8 Lexing and parsing


scala.util.parsing.combinator.lexical scala.util.parsing.combinator.syntactical

33.9 Error reporting


{ "name": John,
[1.13] failure: "false" expected but identifier John found { "name": John, ^
def value: Parser[Any] = obj | arr | stringLit | floatingPointNumber | "null" | "true" | "false" | failure("illegal start of value")
[1.13] failure: illegal start of value { "name": John, ^
var lastFailure: Option[Failure] = None
case class Failure(msg: String, in: Input) extends ParseResult[Nothing] { if (lastFailure.isDefined && lastFailure.get.in.pos <= in.pos) lastFailure = Some(this) }
def phrase[T](p: Parser[T]) = new Parser[T] { lastFailure = None def apply(in: Input) = p(in) match { case s @ Success(out, in1) => if (in1.atEnd) s else Failure("end of input expected", in1) case f : Failure => lastFailure } }

33.10 Backtracking versus LL(1)


def expr : Parser[Any] = term ~! rep("+" ~! term | "-" ~! term) def term : Parser[Any] = factor ~! rep("*" ~! factor | "/" ~! factor) def factor: Parser[Any] = "(" ~! expr ~! ")" | floatingPointNumber

33.11 Conclusion


val jsonParser = phrase(value)

For more information about Programming in Scala, Third Edition (the "Stairway Book"), please visit:

http://www.artima.com/shop/programming_in_scala_3ed

and:

http://booksites.artima.com/programming_in_scala_3ed

Copyright © 2007-2016 Artima, Inc. All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.