Skip to content

SI-9060: Accept fifth-edition names. #44

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/main/scala/scala/xml/Utility.scala
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,14 @@ object Utility extends AnyRef with parsing.TokenTests {
case i => Some(name.substring(0, i))
}

object Qualified {
/** No ':' yields null namespace; leading or trailing yields empty string. */
def unapply(name: String): Option[(String, String)] = name indexOf ':' match {
case -1 => Some(null, name)
case i => Some(name.substring(0, i), name.substring(i + 1)) // start out of range OK
}
}

/**
* Returns a hashcode for the given constituents of a node
*/
Expand Down
8 changes: 2 additions & 6 deletions src/main/scala/scala/xml/parsing/MarkupParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
theNode = m
}
if (1 != elemCount) {
reportSyntaxError("document must contain exactly one element")
Console.println(children.toList)
reportSyntaxError(s"document must contain exactly one element but has $elemCount")
}

doc.children = children
Expand Down Expand Up @@ -573,10 +572,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
def element1(pscope: NamespaceBinding): NodeSeq = {
val pos = this.pos
val (qname, (aMap, scope)) = xTag(pscope)
val (pre, local) = Utility.prefix(qname) match {
case Some(p) => (p, qname drop p.length + 1)
case _ => (null, qname)
}
val Utility.Qualified(pre, local) = qname
val ts = {
if (ch == '/') { // empty element
xToken("/>")
Expand Down
3 changes: 2 additions & 1 deletion src/main/scala/scala/xml/parsing/MarkupParserCommon.scala
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ private[scala] trait MarkupParserCommon extends TokenTests {
* @param endCh either `'` or `"`
*/
def xAttributeValue(endCh: Char): String = {
require(endCh == '\'' || endCh == '"', s"Expected single or double quote, found $endCh")
val buf = new StringBuilder
while (ch != endCh) {
while (ch != endCh && !eof) {
// well-formedness constraint
if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "")
else if (ch == SU) truncatedError("")
Expand Down
26 changes: 24 additions & 2 deletions src/main/scala/scala/xml/parsing/TokenTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,28 @@ trait TokenTests {
def isAlpha(c: Char) = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
def isAlphaDigit(c: Char) = isAlpha(c) || (c >= '0' && c <= '9')

/**
def isNameChar(c: Char): Boolean = (
isNameStart(c) ||
(c >= '0' && c <= '9') ||
c == '-' ||
c == '.' ||
c == 0xB7 ||
(c >= 0x300 && c <= 0x36F) ||
(c >= 0x203F && c <= 0x2040)
)
def isNameStart(c: Char): Boolean = (
if (c < 0x00C0) isAlpha(c) || c == ':' || c == '_'
else if (c < 0x0300) c != 0xD7 && c != 0xF7
else if (c < 0x2000) c >= 0x370 && c != 0x37E
else if (c < 0x3001) c == 0x200C || c == 0x200D || (0x2070 to 0x218F contains c) ||
(0x2C00 to 0x2FEF contains c)
else if (c < 0xD800) true
else if (c < 0x10000) (0xF900 to 0xFDCF contains c) || (0xFDF0 to 0xFFFD contains c)
else false // codepoint < 0xF0000
)

/* Documenting that Appendix B is obsolete. And nested comments rock.
/*
* {{{
* NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
* | CombiningChar | Extender
Expand All @@ -54,7 +75,7 @@ trait TokenTests {
})
}

/**
/*
* {{{
* NameStart ::= ( Letter | '_' )
* }}}
Expand All @@ -74,6 +95,7 @@ trait TokenTests {
case _ => ch == '_'
}
}
*/

/**
* {{{
Expand Down
4 changes: 3 additions & 1 deletion src/test/scala/scala/xml/ShouldCompile.scala
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ class B {

// SI-5858
object SI_5858 {
new Elem(null, null, Null, TopScope, Nil: _*) // was ambiguous
@deprecated("ignore me", since="")
def e =
new Elem(null, null, Null, TopScope, Nil: _*) // was ambiguous
}

class Floozy {
Expand Down
4 changes: 3 additions & 1 deletion src/test/scala/scala/xml/UtilityTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ class UtilityTest {
@Test
def isNameStart: Unit = {
assertTrue(Utility.isNameStart('b'))
assertFalse(Utility.isNameStart(':'))
// No indication of who relied on this behavior.
//assertFalse(Utility.isNameStart(':'))
assertTrue(Utility.isNameStart(':'))
}

@Test
Expand Down
44 changes: 25 additions & 19 deletions src/test/scala/scala/xml/XMLTest.scala
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
package scala.xml

import org.junit.{Test => UnitTest}
import org.junit.{ Test => UnitTest }
import org.junit.Ignore
import org.junit.runner.RunWith
import org.junit.runners.JUnit4
import org.junit.Assert.assertTrue
import org.junit.Assert.assertFalse
import org.junit.Assert.assertEquals
import scala.xml.parsing.ConstructingParser
import org.junit.Assert._
import java.io.StringWriter
import java.io.BufferedOutputStream
import java.io.ByteArrayOutputStream
import java.io.StringReader
import java.io.PrintStream
import scala.collection.Iterable
import scala.io.{ Source => Input }
import scala.xml.Utility.sort
import scala.xml.parsing.{ ConstructingParser, FatalError }
import scala.util.{ Try, Failure }
import scala.language.postfixOps

object XMLTest {
val e: scala.xml.MetaData = Null //Node.NoAttributes
Expand Down Expand Up @@ -469,23 +471,27 @@ Ours is the portal of hope, come as you are."
}

@UnitTest
def ioPosition = {
val out = new ByteArrayOutputStream
Console.withErr(out) {
Console.withOut(out) {
try {
xml.parsing.ConstructingParser.fromSource(io.Source.fromString("<foo>"), false).document()
} catch {
case e: Exception => println(e.getMessage)
}
def ioPosition(): Unit = {
// io.Source that captures errors
class TestInput(text: String) extends Input {
val out = new ByteArrayOutputStream
val ps = new PrintStream(out, true, "UTF8")
override val iter = Input fromString "<foo>"
override def report(pos: Int, msg: String, out: PrintStream) = super.report(pos, msg, ps)
def result: String = {
ps.flush()
out.toString("UTF8").replaceAllLiterally(0.toChar.toString, "*")
}
}
out.flush()
val src = new TestInput("<foo>")
Try(ConstructingParser.fromSource(src, false).document()) match {
case Failure(e: FatalError) => assertEquals("expected closing tag of foo", e.getMessage)
case _ => fail("expected exception")
}
assertEquals(
""":1:5: '/' expected instead of '' ^
:1:5: name expected, but char '' cannot start a name ^
expected closing tag of foo
""", out.toString)
""":1:5: '/' expected instead of '*' ^
:1:5: name expected, but char '*' cannot start a name ^
""", src.result)
}

def f(s: String) = {
Expand Down
55 changes: 55 additions & 0 deletions src/test/scala/scala/xml/parsing/ConstructingParserTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package scala.xml
package parsing

import org.junit.Test
import org.junit.Ignore
import org.junit.runner.RunWith
import org.junit.runners.JUnit4
import org.junit.Assert
import Assert._
import scala.xml.JUnitAssertsForXML.assertEquals

class ConstructingParserTest {

import scala.io.Source.fromString
import ConstructingParser.fromSource
//import scala.xml.{ NodeSeq, TopScope }
import XML.loadString

private def parse(s:String) = fromSource(fromString(s), false).element(TopScope)

// fifth edition
// ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] |
// [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] |
// [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
// and
// "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
//
@Test def `SI-9060: valid chars in names`(): Unit = {
// promote chars to ints
def c(a: Int) = a to a
def cs(a: Int, b: Int) = a to b
val ranges = List(
c(':'), cs('A','Z'), cs('a','z'), cs('_','_'),
0xC0 to 0x2FF filter { case 0xD7 | 0xF7 => false case _ => true },
0x370 to 0x37D, 0x37F to 0x1FFF, 0x200C to 0x200D, 0x2070 to 0x218F,
0x2C00 to 0x2FEF, 0x3001 to 0xD7FF, 0xF900 to 0xFDCF, 0xFDF0 to 0xFFFD
// , 0x10000 to 0xEFFFF // TODO, code points
)
val others = List[Range](
c('-'), c('.'), '0'.toInt to '9', 0xB7 to 0xB7, 0x300 to 0x36F, 0x203F to 0x2040
)

val tester = new TokenTests { }
for (r <- ranges ; i <- r) assertTrue(f"NameStart: $i%#x (${i.toChar})", tester.isNameStart(i.toChar))
for (r <- others ; i <- r) assertTrue(f"NameChar: $i%#x (${i.toChar})", tester.isNameChar(i.toChar))
}

// notice the middle dot
@Test def `SI-9060: problematic char in name`(): Unit = {
val problematic = """<a xmlns:b·="http://example.com"/>"""
val expected = problematic
assertEquals(expected, parse(problematic))
assertEquals(expected, loadString(problematic))
}
}
36 changes: 22 additions & 14 deletions src/test/scala/scala/xml/pull/XMLEventReaderTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,41 +8,49 @@ import org.junit.Assert.assertTrue
import org.junit.Assert.assertFalse
import org.junit.Assert.assertEquals

import java.io.PrintStream
import scala.io.Source
import scala.xml.parsing.FatalError

class XMLEventReaderTest {

val src = Source.fromString("<hello><world/>!</hello>")
def reading(text: String)(f: XMLEventReader => Unit): Unit = {
val s = new Source {
override protected val iter = Source fromString text
override def report(pos: Int, msg: String, out: PrintStream) = ()
}
val r = new XMLEventReader(s)
try f(r)
finally r.stop()
}

@Test
def pull: Unit = {
val er = new XMLEventReader(src)
assertTrue(er.next match {
def pull() = reading("<hello><world/>!</hello>") { r =>
assertTrue(r.next() match {
case EvElemStart(_, "hello", _, _) => true
case _ => false
})
assertTrue(er.next match {
assertTrue(r.next() match {
case EvElemStart(_, "world", _, _) => true
case _ => false
})
assertTrue(er.next match {
assertTrue(r.next() match {
case EvElemEnd(_, "world") => true
case _ => false
})
assertTrue(er.next match {
assertTrue(r.next() match {
case EvText("!") => true
case _ => false
})
assertTrue(er.next match {
assertTrue(r.next() match {
case EvElemEnd(_, "hello") => true
case _ => false
})
er.stop // allow thread to be garbage-collected
}

@Test(expected = classOf[Exception])
@Test(expected = classOf[FatalError]) //expected closing tag of verbosegc
def missingTagTest: Unit = {
val data=
val data =
"""<?xml version="1.0" ?>
|<verbosegc xmlns="http://www.ibm.com/j9/verbosegc">
|
Expand All @@ -54,8 +62,8 @@ class XMLEventReaderTest {
|</exclusive-start>
|""".stripMargin

val er = new XMLEventReader(Source.fromString(data))
while(er.hasNext) er.next()
er.stop()
reading(data) { r =>
while (r.hasNext) r.next()
}
}
}