#
# Copyright (c) 2001 by Jim Menard
#
# Released under the same license as Ruby. See
# http://www.ruby-lang.org/en/LICENSE.txt.
#
require 'nqxml/tokenizer'
# This class is only used in one test to make sure that Tokenizer can
# handle an argument that is a subclass of String.
class StringSubclass < String
end
# Add a method to the tokenizer
module NQXML
class Tokenzier
def testOnlySetInternalEntities(h); @internalEntites = h; end
end
end
class TokenizerTester < NQXMLTester
# Tests simple predefined entity replacement
def test_predefined_entity_replacement
str = 'x & y = 'z''
assert_equal("x & y = 'z'", NQXML.replacePredefinedEntityRefs(str))
assert_equal('x A x', NQXML.replaceCharacterRefs('x A x'))
assert_equal('x A x', NQXML.replaceCharacterRefs('x A x'))
# make sure non-predefined refs are left alone
assert_equal('x &huh; x',
NQXML.replacePredefinedEntityRefs('x &huh; x'))
end
# Tests the entity replacement routine.
def test_entity_replacement
t = NQXML::Tokenizer.new('')
# add a method that lets us set the internal entity hash
class << t
def testSetInternalEntities(h); @internalEntities = h; end
end
t.testSetInternalEntities({ # as if we saw tags
'a' => '&b;',
'b' => '&foo;',
'foo' => '&bar;',
'bar' => 'bletch',
'ugly' => 'An ampersand (&) may be escaped' +
' numerically (&) or with a general' +
' entity (&).'
})
assert_equal('x <&> x', t.replaceAllRefsButParams('x <&> x'))
assert_equal('x "\' x', t.replaceAllRefsButParams('x "' x'))
assert_equal('x A x', t.replaceAllRefsButParams('x A x'))
assert_equal('x A x', t.replaceAllRefsButParams('x A x'))
assert_equal('&&', t.replaceAllRefsButParams('&&'))
assert_equal('&', t.replaceAllRefsButParams('&'))
assert_equal('&', t.replaceAllRefsButParams('&'))
assert_equal('bletch', t.replaceAllRefsButParams('&bar;'))
assert_equal('bletch', t.replaceAllRefsButParams('&foo;'))
assert_equal('bletch', t.replaceAllRefsButParams('&b;'))
assert_equal('bletch', t.replaceAllRefsButParams('&a;'))
assert_equal('An ampersand (&) may be escaped' +
' numerically (&) or with a general' +
' entity (&).', t.replaceAllRefsButParams('&ugly;'))
# Make sure unknown entities raise an error
expect_error('x &huh; x', /entity reference '&huh;' is undefined/)
end
# Makes sure special character encoding is working
def test_encoding
assert_equal('x & x', NQXML.encode('x & x'))
assert_equal('x ' x', NQXML.encode('x \' x'))
assert_equal('x <"> x', NQXML.encode('x <"> x'))
end
# Make sure names are read correctly.
def test_name_grabbing
text = 'name name2 NameSpace:Name-foo_bar remaining>'
toker = NQXML::Tokenizer.new(text)
assert_equal('name', toker.nextName())
toker.skipSpaces()
assert_equal('name2', toker.nextName())
toker.skipSpaces()
assert_equal('NameSpace:Name-foo_bar', toker.nextName())
assert_equal(' remaining', toker.textUpTo('>', false, false))
assert_equal('>', toker.nextChar())
assert(toker.eof?())
end
# Make sure the tokenizer can handle here documents.
def test_heredoc
begin
toker = NQXML::Tokenizer.new <really
EOS
toker.each { | tok | }
rescue NQXML::ParserError => ex
assert_fail("unexpected parser error on line #{ex.line}: #{$!}")
end
end
# Creates a tokenizer over the xml and sets it to tokenizing. Expect
# a parser error that matches regex. If line, col, or pos is not nil,
# expect the error to occur in the specified position(s).
def expect_error(xml, regex, line = nil, col = nil, pos = nil)
begin
toker = NQXML::Tokenizer.new(xml)
toker.each { | tok | }
assert_fail("expected exception '#{regex.source}'")
rescue NQXML::ParserError => ex
assert_match($!, regex)
assert_equal(line, ex.line()) if line
assert_equal(col, ex.column()) if col
assert_equal(pos, ex.pos()) if pos
end
end
# Make sure we detect missing '?' at the end of processing instructions.
def test_missing_qmark
expect_error('', /missing '?'/, 1)
end
# Disallow two slashes in one tag.
def test_two_slashes
expect_error("\n\n\n", # on two lines due to rubymode.el
/slash appears at both beginning and end/, 4)
end
# Well-formedness check: the same attribute name may not appear twice
# in the same tag.
def test_twin_attrs
expect_error('',
/attribute name '(.*)' appears more than once/)
expect_error('',
/attribute name '(.*)' appears more than once/)
end
# Make sure the tokenizer rejects or accepts the proper inputs.
def test_arguments
expect_error(42, /illegal argument/)
expect_error([], /illegal argument/)
begin
# We already tested tempfile arguments elsewhere (see
# test_from_tempfile).
str = StringSubclass.new('abc') # should be OK
rescue NQXML::ParserError => ex
assert_fail("unexpected parser error on line #{ex.line}: #{$!}")
end
end
# Make sure tokenizer rejects illegal public identifier
def test_public_id_chars
expect_error('',
/DOCTYPE PUBLIC public id literal contains illegal character/)
end
# Make sure entities are tokenized correctly.
def test_entities
sourceWithoutPreproc = 'An ampersand (&) may be escaped numerically (&#38;) or with a general entity (&).
" >
]>'
body = "\n&example;"
toker = NQXML::Tokenizer.new("\n" +
sourceWithoutPreproc + body)
newline = NQXML::Text.new("\n", "\n")
literal = 'An ampersand (&) may be escaped numerically' +
' (&) or with a general entity (&).
'
src = 'An ampersand (&) may be escaped' +
' numerically (&#38;) or with a general entity' +
' (&)." >'
exampleEntity = NQXML::GeneralEntityTag.new('example', literal, nil,
nil, src)
doctype = NQXML::Doctype.new('x', nil, [exampleEntity],
sourceWithoutPreproc)
expected = [
NQXML::XMLDecl.new('xml', {'version'=>'1.0'},
''),
newline,
doctype,
newline,
NQXML::Tag.new('x', {}, false, ''),
NQXML::Tag.new('p', {}, false, ''),
NQXML::Text.new('An ampersand (&) may be escaped numerically' +
' (&) or with a general entity (&).',
'An ampersand (&) may be escaped numerically' +
' (&38;) or with a general entity' +
' (&).'),
NQXML::Tag.new('p', nil, true, '
'),
NQXML::Tag.new('x', nil, true, ''),
newline
]
i = 0
toker.each { | entity |
assert_equal(expected[i], entity)
if entity.instance_of?(NQXML::Doctype)
assert_equal(1, entity.entities.length)
assert_equal(exampleEntity, entity.entities[0])
end
i += 1
}
end
# Make sure entities are tokenized correctly. The example XML is taken
# almost verbatim from the XML specification.
def test_param_entities
toker = NQXML::Tokenizer.new('
\' >
%xx;
]>
This sample shows a &tricky; method.'
)
newline = NQXML::Text.new("\n", "\n")
xx = NQXML::ParameterEntityTag.new('xx', '%zz;', nil,
'')
zzSrc = '\' >'
zz = NQXML::ParameterEntityTag.new('zz',
'',
nil, zzSrc)
yySrc = ''
yy = NQXML::ParameterEntityTag.new('yy',
'',
nil, yySrc)
tricky = NQXML::GeneralEntityTag.new('tricky', 'error-prone', nil, nil,
'')
doctype = NQXML::Doctype.new('test', nil,
[xx, zz, yy, tricky],
'
\' >
%xx;
]>'
)
expected = [
NQXML::XMLDecl.new('xml', {'version'=>'1.0'},
''),
newline,
doctype,
newline,
NQXML::Tag.new('test', {}, false, ''),
NQXML::Text.new('This sample shows a error-prone method.'),
NQXML::Tag.new('test', nil, true, '')
]
i = 0
toker.each { | entity |
assert_equal(expected[i], entity)
if entity.instance_of?(NQXML::Doctype)
assert_equal(4, entity.entities.length)
assert_equal(xx, entity.entities[0])
assert_equal(zz, entity.entities[1])
assert_equal(yy, entity.entities[2])
assert_equal(tricky, entity.entities[3])
end
i += 1
}
end
# Passes a Tempfile to a Tokenizer. Fails if any parser error is
# raised.
def test_from_tempfile
io = Tempfile.open('nqxml')
io.write(@rsrc.xml)
io.close()
io.open() # Re-open tempfile for reading
toker = NQXML::Tokenizer.new(io)
begin
# Don't do anything, just let tokenizer run through the XML
toker.each { | entity | }
rescue NQXML::ParserError => ex
assert_fail("Tokenizer parser error thrown on line #{ex.line} : #{$!}")
ensure
io.close()
end
end
def compare_tokens_with_expected(xml)
# Build expected entities
expectedEntities = [
@rsrc.xmlDecl, @rsrc.newlineTok,
@rsrc.piWithArgs, @rsrc.newlineTok,
@rsrc.piNoArgs, @rsrc.newlineTok,
@rsrc.doctype, @rsrc.newlineTok, @rsrc.comment,
@rsrc.newlineTok, @rsrc.outerStart, @rsrc.textDataWithSub,
@rsrc.simpleTagStart, @rsrc.simpleTextData, @rsrc.simpleTagEnd,
@rsrc.newlineTok, @rsrc.innerTagStart, @rsrc.innerTagEnd,
@rsrc.cdata, @rsrc.newlineTok, @rsrc.pTagStart, @rsrc.textText,
@rsrc.bTagStart, @rsrc.boldText, @rsrc.bTagEnd,
@rsrc.moreTextText, @rsrc.pTagEnd, @rsrc.newlineTok,
@rsrc.outerEnd
]
toker = NQXML::Tokenizer.new(xml)
i = 0
toker.each { | tok |
assert_equal(expectedEntities[i], tok)
i += 1
}
assert(toker.eof?())
assert(i == expectedEntities.length,
'more expected results than XML data')
end
# Tokenizes @rsrc.xml and compares the entities returned with a list
# of expected values.
def test_tokens
compare_tokens_with_expected(@rsrc.xml)
end
def test_different_newlines
compare_tokens_with_expected(@rsrc.xml.gsub("\n", "\r\n"))
compare_tokens_with_expected(@rsrc.xml.gsub("\n", "\r"))
end
# This XML fragment made the tokenizer barf because the < was being
# translated and then seen as a tag open character.
def test_lt
xml = '<= 4'
expectedEntities = [
NQXML::Tag.new('p', {}, false, ''),
NQXML::Tag.new('p', nil, true, ''),
NQXML::Text.new('<= 4')
]
t = NQXML::Tokenizer.new(xml)
i = 0
t.each { | e |
assert_equal(expectedEntities[i], e)
i += 1
}
assert_equal(expectedEntities.length, i)
assert(t.eof?)
end
def test_attr_val_lt_predefined
xml = ''
h = {'name' => "\$\<", 'type' => 'Object'}
tag = NQXML::Tag.new('variable', h, false,
'')
t = NQXML::Tokenizer.new(xml)
i = 0
t.each { | e |
assert_equal(tag, e)
i += 1
}
assert_equal(1, i)
assert(t.eof?)
end
def test_attr_val_lt_char
expect_error('',
/attribute values may not contain '<'/)
end
def test_comment_with_newline_bug
xml = ""
comment = NQXML::Comment.new(xml)
t = NQXML::Tokenizer.new(xml)
i = 0
t.each { | e |
assert_equal(comment, e)
i += 1
}
assert_equal(1, i)
assert(t.eof?)
end
end