26 Working with XML

26.1 Semi-structured data

26.2 XML overview

// Illegal XML One <pod>, two <pod>, three <pod> zoo
// Also illegal <pod>Three <peas> in the </pod></peas>
<pod>Three <peas></peas> in the </pod>
<pod>Three <peas/> in the </pod>
<pod peas="3" strings="true"/>

26.3 XML literals

scala> <a> | This is some XML. | Here is a tag: <atag/> | </a> res0: scala.xml.Elem = <a> This is some XML. Here is a tag: <atag></atag> </a>
scala> <a> {"hello"+", world"} </a> res1: scala.xml.Elem = <a> hello, world </a>
scala> val yearMade = 1955 yearMade: Int = 1955 scala> <a> { if (yearMade < 2000) <old>{yearMade}</old> | else xml.NodeSeq.Empty } | </a> res2: scala.xml.Elem = <a> <old>1955</old> </a>
scala> <a> {3 + 4} </a> res3: scala.xml.Elem = <a> 7 </a>
scala> <a> {"</a>potential security hole<a>"} </a> res4: scala.xml.Elem = <a> &lt;/a&gt;potential security hole&lt;a&gt; </a>
scala> "<a>" + "</a>potential security hole<a>" + "</a>" res5: java.lang.String = <a></a>potential security hole<a></a>

26.4 Serialization

// In file xml/CCTherm.scala abstract class CCTherm { val description: String val yearMade: Int val dateObtained: String val bookPrice: Int // in US cents val purchasePrice: Int // in US cents val condition: Int // 1 to 10 override def toString = description }
// In file xml/CCTherm.scala abstract class CCTherm { ... def toXML = <cctherm> <description>{description}</description> <yearMade>{yearMade}</yearMade> <dateObtained>{dateObtained}</dateObtained> <bookPrice>{bookPrice}</bookPrice> <purchasePrice>{purchasePrice}</purchasePrice> <condition>{condition}</condition> </cctherm> }
// In file xml/Misc.scala val therm = new CCTherm { val description = "hot dog #5" val yearMade = 1952 val dateObtained = "March 14, 2006" val bookPrice = 2199 val purchasePrice = 500 val condition = 9 }
scala> val therm = new CCTherm { | val description = "hot dog #5" | val yearMade = 1952 | val dateObtained = "March 14, 2006" | val bookPrice = 2199 | val purchasePrice = 500 | val condition = 9 | } therm: CCTherm = hot dog #5 scala> therm.toXML res6: scala.xml.Elem = <cctherm> <description>hot dog #5</description> <yearMade>1952</yearMade> <dateObtained>March 14, 2006</dateObtained> <bookPrice>2199</bookPrice> <purchasePrice>500</purchasePrice> <condition>9</condition> </cctherm>
scala> <a> {{{{brace yourself!}}}} </a> res7: scala.xml.Elem = <a> {{brace yourself!}} </a>

26.5 Taking XML apart

scala> <a>Sounds <tag/> good</a>.text res8: String = Sounds good
scala> <a> input ---&gt; output </a>.text res9: String = input ---> output
scala> <a><b><c>hello</c></b></a> \ "b" res10: scala.xml.NodeSeq = <b><c>hello</c></b>
scala> <a><b><c>hello</c></b></a> \ "c" res11: scala.xml.NodeSeq = scala> <a><b><c>hello</c></b></a> \\ "c" res12: scala.xml.NodeSeq = <c>hello</c> scala> <a><b><c>hello</c></b></a> \ "a" res13: scala.xml.NodeSeq = scala> <a><b><c>hello</c></b></a> \\ "a" res14: scala.xml.NodeSeq = <a><b><c>hello</c></b></a>
scala> val joe = <employee | name="Joe" | rank="code monkey" | serial="123"/> joe: scala.xml.Elem = <employee rank="code monkey" name="Joe" serial="123"></employee> scala> joe \ "@name" res15: scala.xml.NodeSeq = Joe scala> joe \ "@serial" res16: scala.xml.NodeSeq = 123

26.6 Deserialization

// In file xml/CCTherm.scala def fromXML(node: scala.xml.Node): CCTherm = new CCTherm { val description = (node \ "description").text val yearMade = (node \ "yearMade").text.toInt val dateObtained = (node \ "dateObtained").text val bookPrice = (node \ "bookPrice").text.toInt val purchasePrice = (node \ "purchasePrice").text.toInt val condition = (node \ "condition").text.toInt }
scala> val node = therm.toXML node: scala.xml.Elem = <cctherm> <description>hot dog #5</description> <yearMade>1952</yearMade> <dateObtained>March 14, 2006</dateObtained> <bookPrice>2199</bookPrice> <purchasePrice>500</purchasePrice> <condition>9</condition> </cctherm> scala> fromXML(node) res15: CCTherm = hot dog #5

26.7 Loading and saving

// In file xml/Misc.scala scala.xml.XML.saveFull("therm1.xml", node, "UTF-8", true, null)
<?xml version='1.0' encoding='UTF-8'?> <cctherm> <description>hot dog #5</description> <yearMade>1952</yearMade> <dateObtained>March 14, 2006</dateObtained> <bookPrice>2199</bookPrice> <purchasePrice>500</purchasePrice> <condition>9</condition> </cctherm>
// In file xml/Misc.scala val loadnode = xml.XML.loadFile("therm1.xml")
scala> val loadnode = xml.XML.loadFile("therm1.xml") loadnode: scala.xml.Elem = <cctherm> <description>hot dog #5</description> <yearMade>1952</yearMade> <dateObtained>March 14, 2006</dateObtained> <bookPrice>2199</bookPrice> <purchasePrice>500</purchasePrice> <condition>9</condition> </cctherm> scala> fromXML(loadnode) res14: CCTherm = hot dog #5

26.8 Pattern matching on XML

def proc(node: scala.xml.Node): String = node match { case <a>{contents}</a> => "It's an a: "+ contents case <b>{contents}</b> => "It's a b: "+ contents case _ => "It's something else." }
scala> proc(<a>apple</a>) res16: String = It's an a: apple scala> proc(<b>banana</b>) res17: String = It's a b: banana scala> proc(<c>cherry</c>) res18: String = It's something else.
scala> proc(<a>a <em>red</em> apple</a>) res19: String = It's something else. scala> proc(<a/>) res20: String = It's something else.
def proc(node: scala.xml.Node): String = node match { case <a>{contents @ _*}</a> => "It's an a: "+ contents case <b>{contents @ _*}</b> => "It's a b: "+ contents case _ => "It's something else." }
scala> proc(<a>a <em>red</em> apple</a>) res21: String = It's an a: ArrayBuffer(a , <em>red</em>, apple) scala> proc(<a/>) res22: String = It's an a: Array()
// In file xml/CCTherm.scala val catalog = <catalog> <cctherm> <description>hot dog #5</description> <yearMade>1952</yearMade> <dateObtained>March 14, 2006</dateObtained> <bookPrice>2199</bookPrice> <purchasePrice>500</purchasePrice> <condition>9</condition> </cctherm> <cctherm> <description>Sprite Boy</description> <yearMade>1964</yearMade> <dateObtained>April 28, 2003</dateObtained> <bookPrice>1695</bookPrice> <purchasePrice>595</purchasePrice> <condition>5</condition> </cctherm> </catalog>
catalog match { case <catalog>{therms @ _*}</catalog> => for (therm <- therms) println("processing: "+ (therm \ "description").text) }
processing: processing: hot dog #5 processing: processing: Sprite Boy processing:
catalog match { case <catalog>{therms @ _*}</catalog> => for (therm @ <cctherm>{_*}</cctherm> <- therms) println("processing: "+ (therm \ "description").text) }
processing: hot dog #5 processing: Sprite Boy

26.9 Conclusion

