mirror of https://github.com/morpheus65535/bazarr
Upgraded some embedded dependencies to be ready for Python 3.10. This doesn't mean that it's fully supported right now.
This commit is contained in:
parent
2d214bfbd5
commit
402c82d84f
|
@ -20,8 +20,8 @@ def check_python_version():
|
||||||
print("Python " + minimum_py3_str + " or greater required. "
|
print("Python " + minimum_py3_str + " or greater required. "
|
||||||
"Current version is " + platform.python_version() + ". Please upgrade Python.")
|
"Current version is " + platform.python_version() + ". Please upgrade Python.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
elif int(python_version[0]) == 3 and int(python_version[1]) == 9:
|
elif int(python_version[0]) == 3 and int(python_version[1]) > 8:
|
||||||
print("Python 3.9.x is unsupported. Current version is " + platform.python_version() +
|
print("Python version greater than 3.8.x is unsupported. Current version is " + platform.python_version() +
|
||||||
". Keep in mind that even if it works, you're on your own.")
|
". Keep in mind that even if it works, you're on your own.")
|
||||||
elif (int(python_version[0]) == minimum_py3_tuple[0] and int(python_version[1]) < minimum_py3_tuple[1]) or \
|
elif (int(python_version[0]) == minimum_py3_tuple[0] and int(python_version[1]) < minimum_py3_tuple[1]) or \
|
||||||
(int(python_version[0]) != minimum_py3_tuple[0]):
|
(int(python_version[0]) != minimum_py3_tuple[0]):
|
||||||
|
|
|
@ -4,12 +4,6 @@
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
# that can be found in the LICENSE file.
|
# that can be found in the LICENSE file.
|
||||||
#
|
#
|
||||||
__title__ = 'babelfish'
|
|
||||||
__version__ = '0.5.5-dev'
|
|
||||||
__author__ = 'Antoine Bertin'
|
|
||||||
__license__ = 'BSD'
|
|
||||||
__copyright__ = 'Copyright 2015 the BabelFish authors'
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
if sys.version_info[0] >= 3:
|
if sys.version_info[0] >= 3:
|
||||||
|
|
|
@ -2,17 +2,22 @@
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
# that can be found in the LICENSE file.
|
# that can be found in the LICENSE file.
|
||||||
#
|
#
|
||||||
import collections
|
|
||||||
from pkg_resources import iter_entry_points, EntryPoint
|
from pkg_resources import iter_entry_points, EntryPoint
|
||||||
from ..exceptions import LanguageConvertError, LanguageReverseError
|
from ..exceptions import LanguageConvertError, LanguageReverseError
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Python 3.3+
|
||||||
|
from collections.abc import Mapping, MutableMapping
|
||||||
|
except ImportError:
|
||||||
|
from collections import Mapping, MutableMapping
|
||||||
|
|
||||||
|
|
||||||
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
|
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
|
||||||
class CaseInsensitiveDict(collections.MutableMapping):
|
class CaseInsensitiveDict(MutableMapping):
|
||||||
"""A case-insensitive ``dict``-like object.
|
"""A case-insensitive ``dict``-like object.
|
||||||
|
|
||||||
Implements all methods and operations of
|
Implements all methods and operations of
|
||||||
``collections.MutableMapping`` as well as dict's ``copy``. Also
|
``collections.abc.MutableMapping`` as well as dict's ``copy``. Also
|
||||||
provides ``lower_items``.
|
provides ``lower_items``.
|
||||||
|
|
||||||
All keys are expected to be strings. The structure remembers the
|
All keys are expected to be strings. The structure remembers the
|
||||||
|
@ -63,7 +68,7 @@ class CaseInsensitiveDict(collections.MutableMapping):
|
||||||
)
|
)
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if isinstance(other, collections.Mapping):
|
if isinstance(other, Mapping):
|
||||||
other = CaseInsensitiveDict(other)
|
other = CaseInsensitiveDict(other)
|
||||||
else:
|
else:
|
||||||
return NotImplemented
|
return NotImplemented
|
||||||
|
|
|
@ -14,9 +14,9 @@ class OpenSubtitlesConverter(LanguageReverseConverter):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.alpha3b_converter = language_converters['alpha3b']
|
self.alpha3b_converter = language_converters['alpha3b']
|
||||||
self.alpha2_converter = language_converters['alpha2']
|
self.alpha2_converter = language_converters['alpha2']
|
||||||
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
|
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne', ('chi', 'TW'): 'zht'}
|
||||||
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
|
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
|
||||||
'scc': ('srp', None), 'mne': ('srp', 'ME')})
|
'scc': ('srp', None), 'mne': ('srp', 'ME'), 'zht': ('zho', 'TW')})
|
||||||
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(self.from_opensubtitles.keys()))
|
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(self.from_opensubtitles.keys()))
|
||||||
|
|
||||||
def convert(self, alpha3, country=None, script=None):
|
def convert(self, alpha3, country=None, script=None):
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
# that can be found in the LICENSE file.
|
# that can be found in the LICENSE file.
|
||||||
#
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from pkg_resources import resource_stream # @UnresolvedImport
|
from pkg_resources import resource_stream # @UnresolvedImport
|
||||||
|
|
|
@ -1,45 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
import os.path
|
|
||||||
import tempfile
|
|
||||||
import zipfile
|
|
||||||
import requests
|
|
||||||
|
|
||||||
|
|
||||||
DATA_DIR = os.path.dirname(__file__)
|
|
||||||
|
|
||||||
# iso-3166-1.txt
|
|
||||||
print('Downloading ISO-3166-1 standard (ISO country codes)...')
|
|
||||||
with open(os.path.join(DATA_DIR, 'iso-3166-1.txt'), 'w') as f:
|
|
||||||
r = requests.get('http://www.iso.org/iso/home/standards/country_codes/country_names_and_code_elements_txt.htm')
|
|
||||||
f.write(r.content.strip())
|
|
||||||
|
|
||||||
# iso-639-3.tab
|
|
||||||
print('Downloading ISO-639-3 standard (ISO language codes)...')
|
|
||||||
with tempfile.TemporaryFile() as f:
|
|
||||||
r = requests.get('http://www-01.sil.org/iso639-3/iso-639-3_Code_Tables_20130531.zip')
|
|
||||||
f.write(r.content)
|
|
||||||
with zipfile.ZipFile(f) as z:
|
|
||||||
z.extract('iso-639-3.tab', DATA_DIR)
|
|
||||||
|
|
||||||
# iso-15924
|
|
||||||
print('Downloading ISO-15924 standard (ISO script codes)...')
|
|
||||||
with tempfile.TemporaryFile() as f:
|
|
||||||
r = requests.get('http://www.unicode.org/iso15924/iso15924.txt.zip')
|
|
||||||
f.write(r.content)
|
|
||||||
with zipfile.ZipFile(f) as z:
|
|
||||||
z.extract('iso15924-utf8-20131012.txt', DATA_DIR)
|
|
||||||
|
|
||||||
# opensubtitles supported languages
|
|
||||||
print('Downloading OpenSubtitles supported languages...')
|
|
||||||
with open(os.path.join(DATA_DIR, 'opensubtitles_languages.txt'), 'w') as f:
|
|
||||||
r = requests.get('http://www.opensubtitles.org/addons/export_languages.php')
|
|
||||||
f.write(r.content)
|
|
||||||
|
|
||||||
print('Done!')
|
|
|
@ -4,6 +4,7 @@
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
# that can be found in the LICENSE file.
|
# that can be found in the LICENSE file.
|
||||||
#
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from pkg_resources import resource_stream # @UnresolvedImport
|
from pkg_resources import resource_stream # @UnresolvedImport
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
# that can be found in the LICENSE file.
|
# that can be found in the LICENSE file.
|
||||||
#
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from pkg_resources import resource_stream # @UnresolvedImport
|
from pkg_resources import resource_stream # @UnresolvedImport
|
||||||
from . import basestr
|
from . import basestr
|
||||||
|
|
|
@ -1,377 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import pickle
|
|
||||||
from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
|
|
||||||
from pkg_resources import resource_stream # @UnresolvedImport
|
|
||||||
from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
|
|
||||||
LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
|
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info[:2] <= (2, 6):
|
|
||||||
_MAX_LENGTH = 80
|
|
||||||
|
|
||||||
def safe_repr(obj, short=False):
|
|
||||||
try:
|
|
||||||
result = repr(obj)
|
|
||||||
except Exception:
|
|
||||||
result = object.__repr__(obj)
|
|
||||||
if not short or len(result) < _MAX_LENGTH:
|
|
||||||
return result
|
|
||||||
return result[:_MAX_LENGTH] + ' [truncated]...'
|
|
||||||
|
|
||||||
class _AssertRaisesContext(object):
|
|
||||||
"""A context manager used to implement TestCase.assertRaises* methods."""
|
|
||||||
|
|
||||||
def __init__(self, expected, test_case, expected_regexp=None):
|
|
||||||
self.expected = expected
|
|
||||||
self.failureException = test_case.failureException
|
|
||||||
self.expected_regexp = expected_regexp
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_value, tb):
|
|
||||||
if exc_type is None:
|
|
||||||
try:
|
|
||||||
exc_name = self.expected.__name__
|
|
||||||
except AttributeError:
|
|
||||||
exc_name = str(self.expected)
|
|
||||||
raise self.failureException(
|
|
||||||
"{0} not raised".format(exc_name))
|
|
||||||
if not issubclass(exc_type, self.expected):
|
|
||||||
# let unexpected exceptions pass through
|
|
||||||
return False
|
|
||||||
self.exception = exc_value # store for later retrieval
|
|
||||||
if self.expected_regexp is None:
|
|
||||||
return True
|
|
||||||
|
|
||||||
expected_regexp = self.expected_regexp
|
|
||||||
if isinstance(expected_regexp, basestring):
|
|
||||||
expected_regexp = re.compile(expected_regexp)
|
|
||||||
if not expected_regexp.search(str(exc_value)):
|
|
||||||
raise self.failureException('"%s" does not match "%s"' %
|
|
||||||
(expected_regexp.pattern, str(exc_value)))
|
|
||||||
return True
|
|
||||||
|
|
||||||
class _Py26FixTestCase(object):
|
|
||||||
def assertIsNone(self, obj, msg=None):
|
|
||||||
"""Same as self.assertTrue(obj is None), with a nicer default message."""
|
|
||||||
if obj is not None:
|
|
||||||
standardMsg = '%s is not None' % (safe_repr(obj),)
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
def assertIsNotNone(self, obj, msg=None):
|
|
||||||
"""Included for symmetry with assertIsNone."""
|
|
||||||
if obj is None:
|
|
||||||
standardMsg = 'unexpectedly None'
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
def assertIn(self, member, container, msg=None):
|
|
||||||
"""Just like self.assertTrue(a in b), but with a nicer default message."""
|
|
||||||
if member not in container:
|
|
||||||
standardMsg = '%s not found in %s' % (safe_repr(member),
|
|
||||||
safe_repr(container))
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
def assertNotIn(self, member, container, msg=None):
|
|
||||||
"""Just like self.assertTrue(a not in b), but with a nicer default message."""
|
|
||||||
if member in container:
|
|
||||||
standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
|
|
||||||
safe_repr(container))
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
def assertIs(self, expr1, expr2, msg=None):
|
|
||||||
"""Just like self.assertTrue(a is b), but with a nicer default message."""
|
|
||||||
if expr1 is not expr2:
|
|
||||||
standardMsg = '%s is not %s' % (safe_repr(expr1),
|
|
||||||
safe_repr(expr2))
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
def assertIsNot(self, expr1, expr2, msg=None):
|
|
||||||
"""Just like self.assertTrue(a is not b), but with a nicer default message."""
|
|
||||||
if expr1 is expr2:
|
|
||||||
standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
else:
|
|
||||||
class _Py26FixTestCase(object):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestScript(TestCase, _Py26FixTestCase):
|
|
||||||
def test_wrong_script(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Script('Azer'))
|
|
||||||
|
|
||||||
def test_eq(self):
|
|
||||||
self.assertEqual(Script('Latn'), Script('Latn'))
|
|
||||||
|
|
||||||
def test_ne(self):
|
|
||||||
self.assertNotEqual(Script('Cyrl'), Script('Latn'))
|
|
||||||
|
|
||||||
def test_hash(self):
|
|
||||||
self.assertEqual(hash(Script('Hira')), hash('Hira'))
|
|
||||||
|
|
||||||
def test_pickle(self):
|
|
||||||
self.assertEqual(pickle.loads(pickle.dumps(Script('Latn'))), Script('Latn'))
|
|
||||||
|
|
||||||
|
|
||||||
class TestCountry(TestCase, _Py26FixTestCase):
|
|
||||||
def test_wrong_country(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Country('ZZ'))
|
|
||||||
|
|
||||||
def test_eq(self):
|
|
||||||
self.assertEqual(Country('US'), Country('US'))
|
|
||||||
|
|
||||||
def test_ne(self):
|
|
||||||
self.assertNotEqual(Country('GB'), Country('US'))
|
|
||||||
self.assertIsNotNone(Country('US'))
|
|
||||||
|
|
||||||
def test_hash(self):
|
|
||||||
self.assertEqual(hash(Country('US')), hash('US'))
|
|
||||||
|
|
||||||
def test_pickle(self):
|
|
||||||
for country in [Country('GB'), Country('US')]:
|
|
||||||
self.assertEqual(pickle.loads(pickle.dumps(country)), country)
|
|
||||||
|
|
||||||
def test_converter_name(self):
|
|
||||||
self.assertEqual(Country('US').name, 'UNITED STATES')
|
|
||||||
self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
|
|
||||||
self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
|
|
||||||
self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
|
|
||||||
self.assertEqual(len(country_converters['name'].codes), 249)
|
|
||||||
|
|
||||||
|
|
||||||
class TestLanguage(TestCase, _Py26FixTestCase):
|
|
||||||
def test_languages(self):
|
|
||||||
self.assertEqual(len(LANGUAGES), 7874)
|
|
||||||
|
|
||||||
def test_wrong_language(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Language('zzz'))
|
|
||||||
|
|
||||||
def test_unknown_language(self):
|
|
||||||
self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
|
|
||||||
|
|
||||||
def test_converter_alpha2(self):
|
|
||||||
self.assertEqual(Language('eng').alpha2, 'en')
|
|
||||||
self.assertEqual(Language.fromalpha2('en'), Language('eng'))
|
|
||||||
self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
|
|
||||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
|
|
||||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
|
|
||||||
self.assertEqual(len(language_converters['alpha2'].codes), 184)
|
|
||||||
|
|
||||||
def test_converter_alpha3b(self):
|
|
||||||
self.assertEqual(Language('fra').alpha3b, 'fre')
|
|
||||||
self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
|
|
||||||
self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
|
|
||||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
|
|
||||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
|
|
||||||
self.assertEqual(len(language_converters['alpha3b'].codes), 418)
|
|
||||||
|
|
||||||
def test_converter_alpha3t(self):
|
|
||||||
self.assertEqual(Language('fra').alpha3t, 'fra')
|
|
||||||
self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
|
|
||||||
self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
|
|
||||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
|
|
||||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
|
|
||||||
self.assertEqual(len(language_converters['alpha3t'].codes), 418)
|
|
||||||
|
|
||||||
def test_converter_name(self):
|
|
||||||
self.assertEqual(Language('eng').name, 'English')
|
|
||||||
self.assertEqual(Language.fromname('English'), Language('eng'))
|
|
||||||
self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
|
|
||||||
self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
|
|
||||||
self.assertEqual(len(language_converters['name'].codes), 7874)
|
|
||||||
|
|
||||||
def test_converter_scope(self):
|
|
||||||
self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
|
|
||||||
self.assertEqual(Language('eng').scope, 'individual')
|
|
||||||
self.assertEqual(Language('und').scope, 'special')
|
|
||||||
|
|
||||||
def test_converter_type(self):
|
|
||||||
self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
|
|
||||||
self.assertEqual(Language('eng').type, 'living')
|
|
||||||
self.assertEqual(Language('und').type, 'special')
|
|
||||||
|
|
||||||
def test_converter_opensubtitles(self):
|
|
||||||
self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
|
|
||||||
self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
|
|
||||||
self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
|
|
||||||
self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
|
|
||||||
self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
|
|
||||||
# Montenegrin is not recognized as an ISO language (yet?) but for now it is
|
|
||||||
# unofficially accepted as Serbian from Montenegro
|
|
||||||
self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
|
|
||||||
self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
|
|
||||||
self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
|
|
||||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
|
|
||||||
self.assertEqual(len(language_converters['opensubtitles'].codes), 607)
|
|
||||||
|
|
||||||
# test with all the LANGUAGES from the opensubtitles api
|
|
||||||
# downloaded from: http://www.opensubtitles.org/addons/export_languages.php
|
|
||||||
f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
|
|
||||||
f.readline()
|
|
||||||
for l in f:
|
|
||||||
idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
|
|
||||||
if not int(upload_enabled) and not int(web_enabled):
|
|
||||||
# do not test LANGUAGES that are too esoteric / not widely available
|
|
||||||
continue
|
|
||||||
self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
|
|
||||||
if alpha2:
|
|
||||||
self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
def test_converter_opensubtitles_codes(self):
|
|
||||||
for code in language_converters['opensubtitles'].from_opensubtitles.keys():
|
|
||||||
self.assertIn(code, language_converters['opensubtitles'].codes)
|
|
||||||
|
|
||||||
def test_fromietf_country_script(self):
|
|
||||||
language = Language.fromietf('fra-FR-Latn')
|
|
||||||
self.assertEqual(language.alpha3, 'fra')
|
|
||||||
self.assertEqual(language.country, Country('FR'))
|
|
||||||
self.assertEqual(language.script, Script('Latn'))
|
|
||||||
|
|
||||||
def test_fromietf_country_no_script(self):
|
|
||||||
language = Language.fromietf('fra-FR')
|
|
||||||
self.assertEqual(language.alpha3, 'fra')
|
|
||||||
self.assertEqual(language.country, Country('FR'))
|
|
||||||
self.assertIsNone(language.script)
|
|
||||||
|
|
||||||
def test_fromietf_no_country_no_script(self):
|
|
||||||
language = Language.fromietf('fra-FR')
|
|
||||||
self.assertEqual(language.alpha3, 'fra')
|
|
||||||
self.assertEqual(language.country, Country('FR'))
|
|
||||||
self.assertIsNone(language.script)
|
|
||||||
|
|
||||||
def test_fromietf_no_country_script(self):
|
|
||||||
language = Language.fromietf('fra-Latn')
|
|
||||||
self.assertEqual(language.alpha3, 'fra')
|
|
||||||
self.assertIsNone(language.country)
|
|
||||||
self.assertEqual(language.script, Script('Latn'))
|
|
||||||
|
|
||||||
def test_fromietf_alpha2_language(self):
|
|
||||||
language = Language.fromietf('fr-Latn')
|
|
||||||
self.assertEqual(language.alpha3, 'fra')
|
|
||||||
self.assertIsNone(language.country)
|
|
||||||
self.assertEqual(language.script, Script('Latn'))
|
|
||||||
|
|
||||||
def test_fromietf_wrong_language(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
|
|
||||||
|
|
||||||
def test_fromietf_wrong_country(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
|
|
||||||
|
|
||||||
def test_fromietf_wrong_script(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
|
|
||||||
|
|
||||||
def test_eq(self):
|
|
||||||
self.assertEqual(Language('eng'), Language('eng'))
|
|
||||||
|
|
||||||
def test_ne(self):
|
|
||||||
self.assertNotEqual(Language('fra'), Language('eng'))
|
|
||||||
self.assertIsNotNone(Language('fra'))
|
|
||||||
|
|
||||||
def test_nonzero(self):
|
|
||||||
self.assertFalse(bool(Language('und')))
|
|
||||||
self.assertTrue(bool(Language('eng')))
|
|
||||||
|
|
||||||
def test_language_hasattr(self):
|
|
||||||
self.assertTrue(hasattr(Language('fra'), 'alpha3'))
|
|
||||||
self.assertTrue(hasattr(Language('fra'), 'alpha2'))
|
|
||||||
self.assertFalse(hasattr(Language('bej'), 'alpha2'))
|
|
||||||
|
|
||||||
def test_country_hasattr(self):
|
|
||||||
self.assertTrue(hasattr(Country('US'), 'name'))
|
|
||||||
self.assertTrue(hasattr(Country('FR'), 'alpha2'))
|
|
||||||
self.assertFalse(hasattr(Country('BE'), 'none'))
|
|
||||||
|
|
||||||
def test_country(self):
|
|
||||||
self.assertEqual(Language('por', 'BR').country, Country('BR'))
|
|
||||||
self.assertEqual(Language('eng', Country('US')).country, Country('US'))
|
|
||||||
|
|
||||||
def test_eq_with_country(self):
|
|
||||||
self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
|
|
||||||
|
|
||||||
def test_ne_with_country(self):
|
|
||||||
self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
|
|
||||||
|
|
||||||
def test_script(self):
|
|
||||||
self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
|
|
||||||
self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
|
|
||||||
|
|
||||||
def test_eq_with_script(self):
|
|
||||||
self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
|
|
||||||
|
|
||||||
def test_ne_with_script(self):
|
|
||||||
self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
|
|
||||||
|
|
||||||
def test_eq_with_country_and_script(self):
|
|
||||||
self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
|
|
||||||
|
|
||||||
def test_ne_with_country_and_script(self):
|
|
||||||
self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
|
|
||||||
|
|
||||||
def test_hash(self):
|
|
||||||
self.assertEqual(hash(Language('fra')), hash('fr'))
|
|
||||||
self.assertEqual(hash(Language('ace')), hash('ace'))
|
|
||||||
self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
|
|
||||||
self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
|
|
||||||
self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
|
|
||||||
|
|
||||||
def test_pickle(self):
|
|
||||||
for lang in [Language('fra'),
|
|
||||||
Language('eng', 'US'),
|
|
||||||
Language('srp', script='Latn'),
|
|
||||||
Language('eng', 'US', 'Latn')]:
|
|
||||||
self.assertEqual(pickle.loads(pickle.dumps(lang)), lang)
|
|
||||||
|
|
||||||
def test_str(self):
|
|
||||||
self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
|
|
||||||
self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
|
|
||||||
self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
|
|
||||||
|
|
||||||
def test_register_converter(self):
|
|
||||||
class TestConverter(LanguageReverseConverter):
|
|
||||||
def __init__(self):
|
|
||||||
self.to_test = {'fra': 'test1', 'eng': 'test2'}
|
|
||||||
self.from_test = {'test1': 'fra', 'test2': 'eng'}
|
|
||||||
|
|
||||||
def convert(self, alpha3, country=None, script=None):
|
|
||||||
if alpha3 not in self.to_test:
|
|
||||||
raise LanguageConvertError(alpha3, country, script)
|
|
||||||
return self.to_test[alpha3]
|
|
||||||
|
|
||||||
def reverse(self, test):
|
|
||||||
if test not in self.from_test:
|
|
||||||
raise LanguageReverseError(test)
|
|
||||||
return (self.from_test[test], None)
|
|
||||||
language = Language('fra')
|
|
||||||
self.assertFalse(hasattr(language, 'test'))
|
|
||||||
language_converters['test'] = TestConverter()
|
|
||||||
self.assertTrue(hasattr(language, 'test'))
|
|
||||||
self.assertIn('test', language_converters)
|
|
||||||
self.assertEqual(Language('fra').test, 'test1')
|
|
||||||
self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
|
|
||||||
del language_converters['test']
|
|
||||||
self.assertNotIn('test', language_converters)
|
|
||||||
self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
|
|
||||||
self.assertRaises(AttributeError, lambda: Language('fra').test)
|
|
||||||
|
|
||||||
|
|
||||||
def suite():
|
|
||||||
suite = TestSuite()
|
|
||||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
|
|
||||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
|
|
||||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
|
|
||||||
return suite
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
TextTestRunner().run(suite())
|
|
|
@ -11,10 +11,12 @@ from werkzeug.wrappers import Response as ResponseBase
|
||||||
from flask_restful.utils import http_status_message, unpack, OrderedDict
|
from flask_restful.utils import http_status_message, unpack, OrderedDict
|
||||||
from flask_restful.representations.json import output_json
|
from flask_restful.representations.json import output_json
|
||||||
import sys
|
import sys
|
||||||
from flask.helpers import _endpoint_from_view_func
|
|
||||||
from types import MethodType
|
from types import MethodType
|
||||||
import operator
|
import operator
|
||||||
from collections import Mapping
|
try:
|
||||||
|
from collections.abc import Mapping
|
||||||
|
except ImportError:
|
||||||
|
from collections import Mapping
|
||||||
|
|
||||||
|
|
||||||
__all__ = ('Api', 'Resource', 'marshal', 'marshal_with', 'marshal_with_field', 'abort')
|
__all__ = ('Api', 'Resource', 'marshal', 'marshal_with', 'marshal_with_field', 'abort')
|
||||||
|
@ -58,7 +60,7 @@ class Api(object):
|
||||||
to handle 404 errors throughout your app
|
to handle 404 errors throughout your app
|
||||||
:param serve_challenge_on_401: Whether to serve a challenge response to
|
:param serve_challenge_on_401: Whether to serve a challenge response to
|
||||||
clients on receiving 401. This usually leads to a username/password
|
clients on receiving 401. This usually leads to a username/password
|
||||||
popup in web browers.
|
popup in web browsers.
|
||||||
:param url_part_order: A string that controls the order that the pieces
|
:param url_part_order: A string that controls the order that the pieces
|
||||||
of the url are concatenated when the full url is constructed. 'b'
|
of the url are concatenated when the full url is constructed. 'b'
|
||||||
is the blueprint (or blueprint registration) prefix, 'a' is the api
|
is the blueprint (or blueprint registration) prefix, 'a' is the api
|
||||||
|
@ -153,7 +155,7 @@ class Api(object):
|
||||||
rule = blueprint_setup.url_prefix + rule
|
rule = blueprint_setup.url_prefix + rule
|
||||||
options.setdefault('subdomain', blueprint_setup.subdomain)
|
options.setdefault('subdomain', blueprint_setup.subdomain)
|
||||||
if endpoint is None:
|
if endpoint is None:
|
||||||
endpoint = _endpoint_from_view_func(view_func)
|
endpoint = view_func.__name__
|
||||||
defaults = blueprint_setup.url_defaults
|
defaults = blueprint_setup.url_defaults
|
||||||
if 'defaults' in options:
|
if 'defaults' in options:
|
||||||
defaults = dict(defaults, **options.pop('defaults'))
|
defaults = dict(defaults, **options.pop('defaults'))
|
||||||
|
@ -287,6 +289,13 @@ class Api(object):
|
||||||
|
|
||||||
headers = Headers()
|
headers = Headers()
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
|
if e.response is not None:
|
||||||
|
# If HTTPException is initialized with a response, then return e.get_response().
|
||||||
|
# This prevents specified error response from being overridden.
|
||||||
|
# eg. HTTPException(response=Response("Hello World"))
|
||||||
|
resp = e.get_response()
|
||||||
|
return resp
|
||||||
|
|
||||||
code = e.code
|
code = e.code
|
||||||
default_data = {
|
default_data = {
|
||||||
'message': getattr(e, 'description', http_status_message(code))
|
'message': getattr(e, 'description', http_status_message(code))
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__version__ = '0.3.7'
|
__version__ = '0.3.9'
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
from datetime import datetime
|
|
||||||
from calendar import timegm
|
from calendar import timegm
|
||||||
import pytz
|
|
||||||
from decimal import Decimal as MyDecimal, ROUND_HALF_EVEN
|
from decimal import Decimal as MyDecimal, ROUND_HALF_EVEN
|
||||||
from email.utils import formatdate
|
from email.utils import formatdate
|
||||||
import six
|
import six
|
||||||
|
@ -9,8 +7,7 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# python3
|
# python3
|
||||||
from urllib.parse import urlparse, urlunparse
|
from urllib.parse import urlparse, urlunparse
|
||||||
|
from flask_restful import marshal
|
||||||
from flask_restful import inputs, marshal
|
|
||||||
from flask import url_for, request
|
from flask import url_for, request
|
||||||
|
|
||||||
__all__ = ["String", "FormattedString", "Url", "DateTime", "Float",
|
__all__ = ["String", "FormattedString", "Url", "DateTime", "Float",
|
||||||
|
|
|
@ -269,7 +269,7 @@ def datetime_from_rfc822(datetime_str):
|
||||||
|
|
||||||
|
|
||||||
def datetime_from_iso8601(datetime_str):
|
def datetime_from_iso8601(datetime_str):
|
||||||
"""Turns an ISO8601 formatted date into a datetime object.
|
"""Turns an ISO8601 formatted datetime into a datetime object.
|
||||||
|
|
||||||
Example::
|
Example::
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
import collections
|
try:
|
||||||
|
from collections.abc import MutableSequence
|
||||||
|
except ImportError:
|
||||||
|
from collections import MutableSequence
|
||||||
from flask import current_app, request
|
from flask import current_app, request
|
||||||
from werkzeug.datastructures import MultiDict, FileStorage
|
from werkzeug.datastructures import MultiDict, FileStorage
|
||||||
from werkzeug import exceptions
|
from werkzeug import exceptions
|
||||||
|
@ -146,7 +149,7 @@ class Argument(object):
|
||||||
except TypeError:
|
except TypeError:
|
||||||
try:
|
try:
|
||||||
if self.type is decimal.Decimal:
|
if self.type is decimal.Decimal:
|
||||||
return self.type(str(value), self.name)
|
return self.type(str(value))
|
||||||
else:
|
else:
|
||||||
return self.type(value, self.name)
|
return self.type(value, self.name)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
|
@ -194,7 +197,7 @@ class Argument(object):
|
||||||
values = source.getlist(name)
|
values = source.getlist(name)
|
||||||
else:
|
else:
|
||||||
values = source.get(name)
|
values = source.get(name)
|
||||||
if not (isinstance(values, collections.MutableSequence) and self.action == 'append'):
|
if not (isinstance(values, MutableSequence) and self.action == 'append'):
|
||||||
values = [values]
|
values = [values]
|
||||||
|
|
||||||
for value in values:
|
for value in values:
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from collections import OrderedDict
|
from collections.abc import OrderedDict
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from ordereddict import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
from werkzeug.http import HTTP_STATUS_CODES
|
from werkzeug.http import HTTP_STATUS_CODES
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,7 @@ See: http://python-future.org
|
||||||
Credits
|
Credits
|
||||||
-------
|
-------
|
||||||
|
|
||||||
:Author: Ed Schofield
|
:Author: Ed Schofield, Jordan M. Adler, et al
|
||||||
:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte
|
:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte
|
||||||
Ltd, Singapore. http://pythoncharmers.com
|
Ltd, Singapore. http://pythoncharmers.com
|
||||||
:Others: See docs/credits.rst or http://python-future.org/credits.html
|
:Others: See docs/credits.rst or http://python-future.org/credits.html
|
||||||
|
@ -76,7 +76,7 @@ Credits
|
||||||
|
|
||||||
Licensing
|
Licensing
|
||||||
---------
|
---------
|
||||||
Copyright 2013-2018 Python Charmers Pty Ltd, Australia.
|
Copyright 2013-2019 Python Charmers Pty Ltd, Australia.
|
||||||
The software is distributed under an MIT licence. See LICENSE.txt.
|
The software is distributed under an MIT licence. See LICENSE.txt.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -84,10 +84,10 @@ The software is distributed under an MIT licence. See LICENSE.txt.
|
||||||
__title__ = 'future'
|
__title__ = 'future'
|
||||||
__author__ = 'Ed Schofield'
|
__author__ = 'Ed Schofield'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__copyright__ = 'Copyright 2013-2018 Python Charmers Pty Ltd'
|
__copyright__ = 'Copyright 2013-2019 Python Charmers Pty Ltd'
|
||||||
__ver_major__ = 0
|
__ver_major__ = 0
|
||||||
__ver_minor__ = 17
|
__ver_minor__ = 18
|
||||||
__ver_patch__ = 0
|
__ver_patch__ = 2
|
||||||
__ver_sub__ = ''
|
__ver_sub__ = ''
|
||||||
__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__,
|
__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__,
|
||||||
__ver_patch__, __ver_sub__)
|
__ver_patch__, __ver_sub__)
|
||||||
|
|
|
@ -10,7 +10,7 @@ __future_module__ = True
|
||||||
from future.standard_library import import_top_level_modules
|
from future.standard_library import import_top_level_modules
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info[0] == 3:
|
if sys.version_info[0] >= 3:
|
||||||
import_top_level_modules()
|
import_top_level_modules()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -800,7 +800,7 @@ class Message(object):
|
||||||
# There was no Content-Type header, and we don't know what type
|
# There was no Content-Type header, and we don't know what type
|
||||||
# to set it to, so raise an exception.
|
# to set it to, so raise an exception.
|
||||||
raise errors.HeaderParseError('No Content-Type header found')
|
raise errors.HeaderParseError('No Content-Type header found')
|
||||||
newparams = []
|
newparams = list()
|
||||||
foundp = False
|
foundp = False
|
||||||
for pk, pv in params:
|
for pk, pv in params:
|
||||||
if pk.lower() == 'boundary':
|
if pk.lower() == 'boundary':
|
||||||
|
@ -814,10 +814,10 @@ class Message(object):
|
||||||
# instead???
|
# instead???
|
||||||
newparams.append(('boundary', '"%s"' % boundary))
|
newparams.append(('boundary', '"%s"' % boundary))
|
||||||
# Replace the existing Content-Type header with the new value
|
# Replace the existing Content-Type header with the new value
|
||||||
newheaders = []
|
newheaders = list()
|
||||||
for h, v in self._headers:
|
for h, v in self._headers:
|
||||||
if h.lower() == 'content-type':
|
if h.lower() == 'content-type':
|
||||||
parts = []
|
parts = list()
|
||||||
for k, v in newparams:
|
for k, v in newparams:
|
||||||
if v == '':
|
if v == '':
|
||||||
parts.append(k)
|
parts.append(k)
|
||||||
|
|
|
@ -79,11 +79,15 @@ from future.backports.misc import create_connection as socket_create_connection
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
import collections
|
|
||||||
from future.backports.urllib.parse import urlsplit
|
from future.backports.urllib.parse import urlsplit
|
||||||
import warnings
|
import warnings
|
||||||
from array import array
|
from array import array
|
||||||
|
|
||||||
|
if PY2:
|
||||||
|
from collections import Iterable
|
||||||
|
else:
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
__all__ = ["HTTPResponse", "HTTPConnection",
|
__all__ = ["HTTPResponse", "HTTPConnection",
|
||||||
"HTTPException", "NotConnected", "UnknownProtocol",
|
"HTTPException", "NotConnected", "UnknownProtocol",
|
||||||
"UnknownTransferEncoding", "UnimplementedFileMode",
|
"UnknownTransferEncoding", "UnimplementedFileMode",
|
||||||
|
@ -696,9 +700,19 @@ class HTTPResponse(io.RawIOBase):
|
||||||
while total_bytes < len(b):
|
while total_bytes < len(b):
|
||||||
if MAXAMOUNT < len(mvb):
|
if MAXAMOUNT < len(mvb):
|
||||||
temp_mvb = mvb[0:MAXAMOUNT]
|
temp_mvb = mvb[0:MAXAMOUNT]
|
||||||
n = self.fp.readinto(temp_mvb)
|
if PY2:
|
||||||
|
data = self.fp.read(len(temp_mvb))
|
||||||
|
n = len(data)
|
||||||
|
temp_mvb[:n] = data
|
||||||
|
else:
|
||||||
|
n = self.fp.readinto(temp_mvb)
|
||||||
else:
|
else:
|
||||||
n = self.fp.readinto(mvb)
|
if PY2:
|
||||||
|
data = self.fp.read(len(mvb))
|
||||||
|
n = len(data)
|
||||||
|
mvb[:n] = data
|
||||||
|
else:
|
||||||
|
n = self.fp.readinto(mvb)
|
||||||
if not n:
|
if not n:
|
||||||
raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
|
raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
|
||||||
mvb = mvb[n:]
|
mvb = mvb[n:]
|
||||||
|
@ -892,7 +906,7 @@ class HTTPConnection(object):
|
||||||
try:
|
try:
|
||||||
self.sock.sendall(data)
|
self.sock.sendall(data)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
if isinstance(data, collections.Iterable):
|
if isinstance(data, Iterable):
|
||||||
for d in data:
|
for d in data:
|
||||||
self.sock.sendall(d)
|
self.sock.sendall(d)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -33,7 +33,7 @@ from __future__ import print_function
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from future.builtins import filter, int, map, open, str
|
from future.builtins import filter, int, map, open, str
|
||||||
from future.utils import as_native_str
|
from future.utils import as_native_str, PY2
|
||||||
|
|
||||||
__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
|
__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
|
||||||
'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
|
'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
|
||||||
|
@ -41,7 +41,8 @@ __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
|
||||||
import copy
|
import copy
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
re.ASCII = 0
|
if PY2:
|
||||||
|
re.ASCII = 0
|
||||||
import time
|
import time
|
||||||
from future.backports.urllib.parse import urlparse, urlsplit, quote
|
from future.backports.urllib.parse import urlparse, urlsplit, quote
|
||||||
from future.backports.http.client import HTTP_PORT
|
from future.backports.http.client import HTTP_PORT
|
||||||
|
|
|
@ -138,7 +138,8 @@ from future.utils import PY2, as_native_str
|
||||||
# Import our required modules
|
# Import our required modules
|
||||||
#
|
#
|
||||||
import re
|
import re
|
||||||
re.ASCII = 0 # for py2 compatibility
|
if PY2:
|
||||||
|
re.ASCII = 0 # for py2 compatibility
|
||||||
import string
|
import string
|
||||||
|
|
||||||
__all__ = ["CookieError", "BaseCookie", "SimpleCookie"]
|
__all__ = ["CookieError", "BaseCookie", "SimpleCookie"]
|
||||||
|
|
|
@ -16,7 +16,6 @@ from __future__ import absolute_import
|
||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
from math import ceil as oldceil
|
from math import ceil as oldceil
|
||||||
from collections import Mapping, MutableMapping
|
|
||||||
|
|
||||||
from operator import itemgetter as _itemgetter, eq as _eq
|
from operator import itemgetter as _itemgetter, eq as _eq
|
||||||
import sys
|
import sys
|
||||||
|
@ -25,7 +24,12 @@ from _weakref import proxy as _proxy
|
||||||
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
|
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
|
||||||
from socket import getaddrinfo, SOCK_STREAM, error, socket
|
from socket import getaddrinfo, SOCK_STREAM, error, socket
|
||||||
|
|
||||||
from future.utils import iteritems, itervalues, PY26, PY3
|
from future.utils import iteritems, itervalues, PY2, PY26, PY3
|
||||||
|
|
||||||
|
if PY2:
|
||||||
|
from collections import Mapping, MutableMapping
|
||||||
|
else:
|
||||||
|
from collections.abc import Mapping, MutableMapping
|
||||||
|
|
||||||
|
|
||||||
def ceil(x):
|
def ceil(x):
|
||||||
|
|
|
@ -109,11 +109,17 @@ import re
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import collections
|
|
||||||
import tempfile
|
import tempfile
|
||||||
import contextlib
|
import contextlib
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
|
from future.utils import PY2
|
||||||
|
|
||||||
|
if PY2:
|
||||||
|
from collections import Iterable
|
||||||
|
else:
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
# check for SSL
|
# check for SSL
|
||||||
try:
|
try:
|
||||||
import ssl
|
import ssl
|
||||||
|
@ -1221,7 +1227,7 @@ class AbstractHTTPHandler(BaseHandler):
|
||||||
mv = memoryview(data)
|
mv = memoryview(data)
|
||||||
size = len(mv) * mv.itemsize
|
size = len(mv) * mv.itemsize
|
||||||
except TypeError:
|
except TypeError:
|
||||||
if isinstance(data, collections.Iterable):
|
if isinstance(data, Iterable):
|
||||||
raise ValueError("Content-Length should be specified "
|
raise ValueError("Content-Length should be specified "
|
||||||
"for iterable data of type %r %r" % (type(data),
|
"for iterable data of type %r %r" % (type(data),
|
||||||
data))
|
data))
|
||||||
|
|
|
@ -11,7 +11,7 @@ from future.builtins.iterators import (filter, map, zip)
|
||||||
# The isinstance import is no longer needed. We provide it only for
|
# The isinstance import is no longer needed. We provide it only for
|
||||||
# backward-compatibility with future v0.8.2. It will be removed in future v1.0.
|
# backward-compatibility with future v0.8.2. It will be removed in future v1.0.
|
||||||
from future.builtins.misc import (ascii, chr, hex, input, isinstance, next,
|
from future.builtins.misc import (ascii, chr, hex, input, isinstance, next,
|
||||||
oct, open, pow, round, super)
|
oct, open, pow, round, super, max, min)
|
||||||
from future.utils import PY3
|
from future.utils import PY3
|
||||||
|
|
||||||
if PY3:
|
if PY3:
|
||||||
|
@ -43,7 +43,7 @@ if not utils.PY3:
|
||||||
__all__ = ['filter', 'map', 'zip',
|
__all__ = ['filter', 'map', 'zip',
|
||||||
'ascii', 'chr', 'hex', 'input', 'next', 'oct', 'open', 'pow',
|
'ascii', 'chr', 'hex', 'input', 'next', 'oct', 'open', 'pow',
|
||||||
'round', 'super',
|
'round', 'super',
|
||||||
'bytes', 'dict', 'int', 'list', 'object', 'range', 'str',
|
'bytes', 'dict', 'int', 'list', 'object', 'range', 'str', 'max', 'min'
|
||||||
]
|
]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -13,6 +13,8 @@ The builtin functions are:
|
||||||
- ``open`` (equivalent to io.open on Py2)
|
- ``open`` (equivalent to io.open on Py2)
|
||||||
- ``super`` (backport of Py3's magic zero-argument super() function
|
- ``super`` (backport of Py3's magic zero-argument super() function
|
||||||
- ``round`` (new "Banker's Rounding" behaviour from Py3)
|
- ``round`` (new "Banker's Rounding" behaviour from Py3)
|
||||||
|
- ``max`` (new default option from Py3.4)
|
||||||
|
- ``min`` (new default option from Py3.4)
|
||||||
|
|
||||||
``isinstance`` is also currently exported for backwards compatibility
|
``isinstance`` is also currently exported for backwards compatibility
|
||||||
with v0.8.2, although this has been deprecated since v0.9.
|
with v0.8.2, although this has been deprecated since v0.9.
|
||||||
|
@ -59,6 +61,8 @@ if utils.PY2:
|
||||||
from future.builtins.newnext import newnext as next
|
from future.builtins.newnext import newnext as next
|
||||||
from future.builtins.newround import newround as round
|
from future.builtins.newround import newround as round
|
||||||
from future.builtins.newsuper import newsuper as super
|
from future.builtins.newsuper import newsuper as super
|
||||||
|
from future.builtins.new_min_max import newmax as max
|
||||||
|
from future.builtins.new_min_max import newmin as min
|
||||||
from future.types.newint import newint
|
from future.types.newint import newint
|
||||||
|
|
||||||
_SENTINEL = object()
|
_SENTINEL = object()
|
||||||
|
@ -89,11 +93,12 @@ if utils.PY2:
|
||||||
else:
|
else:
|
||||||
return _builtin_pow(x+0j, y, z)
|
return _builtin_pow(x+0j, y, z)
|
||||||
|
|
||||||
|
|
||||||
# ``future`` doesn't support Py3.0/3.1. If we ever did, we'd add this:
|
# ``future`` doesn't support Py3.0/3.1. If we ever did, we'd add this:
|
||||||
# callable = __builtin__.callable
|
# callable = __builtin__.callable
|
||||||
|
|
||||||
__all__ = ['ascii', 'chr', 'hex', 'input', 'isinstance', 'next', 'oct',
|
__all__ = ['ascii', 'chr', 'hex', 'input', 'isinstance', 'next', 'oct',
|
||||||
'open', 'pow', 'round', 'super']
|
'open', 'pow', 'round', 'super', 'max', 'min']
|
||||||
|
|
||||||
else:
|
else:
|
||||||
import builtins
|
import builtins
|
||||||
|
@ -109,8 +114,14 @@ else:
|
||||||
pow = builtins.pow
|
pow = builtins.pow
|
||||||
round = builtins.round
|
round = builtins.round
|
||||||
super = builtins.super
|
super = builtins.super
|
||||||
|
if utils.PY34_PLUS:
|
||||||
__all__ = []
|
max = builtins.max
|
||||||
|
min = builtins.min
|
||||||
|
__all__ = []
|
||||||
|
else:
|
||||||
|
from future.builtins.new_min_max import newmax as max
|
||||||
|
from future.builtins.new_min_max import newmin as min
|
||||||
|
__all__ = ['min', 'max']
|
||||||
|
|
||||||
# The callable() function was removed from Py3.0 and 3.1 and
|
# The callable() function was removed from Py3.0 and 3.1 and
|
||||||
# reintroduced into Py3.2+. ``future`` doesn't support Py3.0/3.1. If we ever
|
# reintroduced into Py3.2+. ``future`` doesn't support Py3.0/3.1. If we ever
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
from future import utils
|
||||||
|
if utils.PY2:
|
||||||
|
from __builtin__ import max as _builtin_max, min as _builtin_min
|
||||||
|
else:
|
||||||
|
from builtins import max as _builtin_max, min as _builtin_min
|
||||||
|
|
||||||
|
_SENTINEL = object()
|
||||||
|
|
||||||
|
|
||||||
|
def newmin(*args, **kwargs):
|
||||||
|
return new_min_max(_builtin_min, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def newmax(*args, **kwargs):
|
||||||
|
return new_min_max(_builtin_max, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def new_min_max(_builtin_func, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
To support the argument "default" introduced in python 3.4 for min and max
|
||||||
|
:param _builtin_func: builtin min or builtin max
|
||||||
|
:param args:
|
||||||
|
:param kwargs:
|
||||||
|
:return: returns the min or max based on the arguments passed
|
||||||
|
"""
|
||||||
|
|
||||||
|
for key, _ in kwargs.items():
|
||||||
|
if key not in set(['key', 'default']):
|
||||||
|
raise TypeError('Illegal argument %s', key)
|
||||||
|
|
||||||
|
if len(args) == 0:
|
||||||
|
raise TypeError
|
||||||
|
|
||||||
|
if len(args) != 1 and kwargs.get('default', _SENTINEL) is not _SENTINEL:
|
||||||
|
raise TypeError
|
||||||
|
|
||||||
|
if len(args) == 1:
|
||||||
|
iterator = iter(args[0])
|
||||||
|
try:
|
||||||
|
first = next(iterator)
|
||||||
|
except StopIteration:
|
||||||
|
if kwargs.get('default', _SENTINEL) is not _SENTINEL:
|
||||||
|
return kwargs.get('default')
|
||||||
|
else:
|
||||||
|
raise ValueError('{}() arg is an empty sequence'.format(_builtin_func.__name__))
|
||||||
|
else:
|
||||||
|
iterator = itertools.chain([first], iterator)
|
||||||
|
if kwargs.get('key') is not None:
|
||||||
|
return _builtin_func(iterator, key=kwargs.get('key'))
|
||||||
|
else:
|
||||||
|
return _builtin_func(iterator)
|
||||||
|
|
||||||
|
if len(args) > 1:
|
||||||
|
if kwargs.get('key') is not None:
|
||||||
|
return _builtin_func(args, key=kwargs.get('key'))
|
||||||
|
else:
|
||||||
|
return _builtin_func(args)
|
|
@ -38,11 +38,14 @@ def newround(number, ndigits=None):
|
||||||
if 'numpy' in repr(type(number)):
|
if 'numpy' in repr(type(number)):
|
||||||
number = float(number)
|
number = float(number)
|
||||||
|
|
||||||
if not PY26:
|
if isinstance(number, Decimal):
|
||||||
d = Decimal.from_float(number).quantize(exponent,
|
d = number
|
||||||
rounding=ROUND_HALF_EVEN)
|
|
||||||
else:
|
else:
|
||||||
d = from_float_26(number).quantize(exponent, rounding=ROUND_HALF_EVEN)
|
if not PY26:
|
||||||
|
d = Decimal.from_float(number).quantize(exponent,
|
||||||
|
rounding=ROUND_HALF_EVEN)
|
||||||
|
else:
|
||||||
|
d = from_float_26(number).quantize(exponent, rounding=ROUND_HALF_EVEN)
|
||||||
|
|
||||||
if return_int:
|
if return_int:
|
||||||
return int(d)
|
return int(d)
|
||||||
|
|
|
@ -4,5 +4,5 @@ import sys
|
||||||
__future_module__ = True
|
__future_module__ = True
|
||||||
from future.standard_library import import_top_level_modules
|
from future.standard_library import import_top_level_modules
|
||||||
|
|
||||||
if sys.version_info[0] == 3:
|
if sys.version_info[0] >= 3:
|
||||||
import_top_level_modules()
|
import_top_level_modules()
|
||||||
|
|
|
@ -2,7 +2,11 @@ from __future__ import absolute_import
|
||||||
from future.utils import PY3
|
from future.utils import PY3
|
||||||
|
|
||||||
if PY3:
|
if PY3:
|
||||||
from copyreg import *
|
import copyreg, sys
|
||||||
|
# A "*" import uses Python 3's copyreg.__all__ which does not include
|
||||||
|
# all public names in the API surface for copyreg, this avoids that
|
||||||
|
# problem by just making our module _be_ a reference to the actual module.
|
||||||
|
sys.modules['future.moves.copyreg'] = copyreg
|
||||||
else:
|
else:
|
||||||
__future_module__ = True
|
__future_module__ = True
|
||||||
from copy_reg import *
|
from copy_reg import *
|
||||||
|
|
|
@ -11,19 +11,8 @@ if PY3:
|
||||||
proxy_bypass,
|
proxy_bypass,
|
||||||
quote,
|
quote,
|
||||||
request_host,
|
request_host,
|
||||||
splitattr,
|
|
||||||
splithost,
|
|
||||||
splitpasswd,
|
|
||||||
splitport,
|
|
||||||
splitquery,
|
|
||||||
splittag,
|
|
||||||
splittype,
|
|
||||||
splituser,
|
|
||||||
splitvalue,
|
|
||||||
thishost,
|
thishost,
|
||||||
to_bytes,
|
|
||||||
unquote,
|
unquote,
|
||||||
unwrap,
|
|
||||||
url2pathname,
|
url2pathname,
|
||||||
urlcleanup,
|
urlcleanup,
|
||||||
urljoin,
|
urljoin,
|
||||||
|
@ -32,6 +21,18 @@ if PY3:
|
||||||
urlretrieve,
|
urlretrieve,
|
||||||
urlsplit,
|
urlsplit,
|
||||||
urlunparse)
|
urlunparse)
|
||||||
|
|
||||||
|
from urllib.parse import (splitattr,
|
||||||
|
splithost,
|
||||||
|
splitpasswd,
|
||||||
|
splitport,
|
||||||
|
splitquery,
|
||||||
|
splittag,
|
||||||
|
splittype,
|
||||||
|
splituser,
|
||||||
|
splitvalue,
|
||||||
|
to_bytes,
|
||||||
|
unwrap)
|
||||||
else:
|
else:
|
||||||
__future_module__ = True
|
__future_module__ = True
|
||||||
with suspend_hooks():
|
with suspend_hooks():
|
||||||
|
|
|
@ -272,7 +272,11 @@ class CodeHandler(unittest.TestCase):
|
||||||
else:
|
else:
|
||||||
headers = ''
|
headers = ''
|
||||||
|
|
||||||
self.compare(output, headers + reformat_code(expected),
|
reformatted = reformat_code(expected)
|
||||||
|
if headers in reformatted:
|
||||||
|
headers = ''
|
||||||
|
|
||||||
|
self.compare(output, headers + reformatted,
|
||||||
ignore_imports=ignore_imports)
|
ignore_imports=ignore_imports)
|
||||||
|
|
||||||
def unchanged(self, code, **kwargs):
|
def unchanged(self, code, **kwargs):
|
||||||
|
@ -338,6 +342,10 @@ class CodeHandler(unittest.TestCase):
|
||||||
'----\n%s\n----' % f.read(),
|
'----\n%s\n----' % f.read(),
|
||||||
)
|
)
|
||||||
ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError)
|
ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError)
|
||||||
|
|
||||||
|
if not hasattr(e, 'output'):
|
||||||
|
# The attribute CalledProcessError.output doesn't exist on Py2.6
|
||||||
|
e.output = None
|
||||||
raise ErrorClass(msg, e.returncode, e.cmd, output=e.output)
|
raise ErrorClass(msg, e.returncode, e.cmd, output=e.output)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
|
@ -5,15 +5,19 @@ Why do this? Without it, the Python 2 bytes object is a very, very
|
||||||
different beast to the Python 3 bytes object.
|
different beast to the Python 3 bytes object.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from collections import Iterable
|
|
||||||
from numbers import Integral
|
from numbers import Integral
|
||||||
import string
|
import string
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
from future.utils import istext, isbytes, PY3, with_metaclass
|
from future.utils import istext, isbytes, PY2, PY3, with_metaclass
|
||||||
from future.types import no, issubset
|
from future.types import no, issubset
|
||||||
from future.types.newobject import newobject
|
from future.types.newobject import newobject
|
||||||
|
|
||||||
|
if PY2:
|
||||||
|
from collections import Iterable
|
||||||
|
else:
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
|
||||||
_builtin_bytes = bytes
|
_builtin_bytes = bytes
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,6 @@ They are very similar. The most notable difference is:
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
|
|
||||||
import struct
|
import struct
|
||||||
import collections
|
|
||||||
|
|
||||||
from future.types.newbytes import newbytes
|
from future.types.newbytes import newbytes
|
||||||
from future.types.newobject import newobject
|
from future.types.newobject import newobject
|
||||||
|
@ -17,6 +16,9 @@ from future.utils import PY3, isint, istext, isbytes, with_metaclass, native
|
||||||
|
|
||||||
if PY3:
|
if PY3:
|
||||||
long = int
|
long = int
|
||||||
|
from collections.abc import Iterable
|
||||||
|
else:
|
||||||
|
from collections import Iterable
|
||||||
|
|
||||||
|
|
||||||
class BaseNewInt(type):
|
class BaseNewInt(type):
|
||||||
|
@ -356,7 +358,7 @@ class newint(with_metaclass(BaseNewInt, long)):
|
||||||
raise TypeError("cannot convert unicode objects to bytes")
|
raise TypeError("cannot convert unicode objects to bytes")
|
||||||
# mybytes can also be passed as a sequence of integers on Py3.
|
# mybytes can also be passed as a sequence of integers on Py3.
|
||||||
# Test for this:
|
# Test for this:
|
||||||
elif isinstance(mybytes, collections.Iterable):
|
elif isinstance(mybytes, Iterable):
|
||||||
mybytes = newbytes(mybytes)
|
mybytes = newbytes(mybytes)
|
||||||
b = mybytes if byteorder == 'big' else mybytes[::-1]
|
b = mybytes if byteorder == 'big' else mybytes[::-1]
|
||||||
if len(b) == 0:
|
if len(b) == 0:
|
||||||
|
|
|
@ -1,14 +1,16 @@
|
||||||
"""
|
"""
|
||||||
A pretty lame implementation of a memoryview object for Python 2.6.
|
A pretty lame implementation of a memoryview object for Python 2.6.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from collections import Iterable
|
|
||||||
from numbers import Integral
|
from numbers import Integral
|
||||||
import string
|
import string
|
||||||
|
|
||||||
from future.utils import istext, isbytes, PY3, with_metaclass
|
from future.utils import istext, isbytes, PY2, with_metaclass
|
||||||
from future.types import no, issubset
|
from future.types import no, issubset
|
||||||
|
|
||||||
|
if PY2:
|
||||||
|
from collections import Iterable
|
||||||
|
else:
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
# class BaseNewBytes(type):
|
# class BaseNewBytes(type):
|
||||||
# def __instancecheck__(cls, instance):
|
# def __instancecheck__(cls, instance):
|
||||||
|
|
|
@ -112,5 +112,6 @@ class newobject(object):
|
||||||
"""
|
"""
|
||||||
return object(self)
|
return object(self)
|
||||||
|
|
||||||
|
__slots__ = []
|
||||||
|
|
||||||
__all__ = ['newobject']
|
__all__ = ['newobject']
|
||||||
|
|
|
@ -19,7 +19,12 @@ From Dan Crosta's README:
|
||||||
"""
|
"""
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
from collections import Sequence, Iterator
|
from future.utils import PY2
|
||||||
|
|
||||||
|
if PY2:
|
||||||
|
from collections import Sequence, Iterator
|
||||||
|
else:
|
||||||
|
from collections.abc import Sequence, Iterator
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
|
|
||||||
from future.backports.misc import count # with step parameter on Py2.6
|
from future.backports.misc import count # with step parameter on Py2.6
|
||||||
|
|
|
@ -40,7 +40,6 @@ representations of your objects portably across Py3 and Py2, use the
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from collections import Iterable
|
|
||||||
from numbers import Number
|
from numbers import Number
|
||||||
|
|
||||||
from future.utils import PY3, istext, with_metaclass, isnewbytes
|
from future.utils import PY3, istext, with_metaclass, isnewbytes
|
||||||
|
@ -51,6 +50,9 @@ from future.types.newobject import newobject
|
||||||
if PY3:
|
if PY3:
|
||||||
# We'll probably never use newstr on Py3 anyway...
|
# We'll probably never use newstr on Py3 anyway...
|
||||||
unicode = str
|
unicode = str
|
||||||
|
from collections.abc import Iterable
|
||||||
|
else:
|
||||||
|
from collections import Iterable
|
||||||
|
|
||||||
|
|
||||||
class BaseNewStr(type):
|
class BaseNewStr(type):
|
||||||
|
@ -105,6 +107,7 @@ class newstr(with_metaclass(BaseNewStr, unicode)):
|
||||||
"""
|
"""
|
||||||
Without the u prefix
|
Without the u prefix
|
||||||
"""
|
"""
|
||||||
|
|
||||||
value = super(newstr, self).__repr__()
|
value = super(newstr, self).__repr__()
|
||||||
# assert value[0] == u'u'
|
# assert value[0] == u'u'
|
||||||
return value[1:]
|
return value[1:]
|
||||||
|
@ -290,7 +293,14 @@ class newstr(with_metaclass(BaseNewStr, unicode)):
|
||||||
isinstance(other, bytes) and not isnewbytes(other)):
|
isinstance(other, bytes) and not isnewbytes(other)):
|
||||||
return super(newstr, self).__eq__(other)
|
return super(newstr, self).__eq__(other)
|
||||||
else:
|
else:
|
||||||
return False
|
return NotImplemented
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
if (isinstance(self, unicode) or
|
||||||
|
isinstance(self, bytes) and not isnewbytes(self)):
|
||||||
|
return super(newstr, self).__hash__()
|
||||||
|
else:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
def __ne__(self, other):
|
def __ne__(self, other):
|
||||||
if (isinstance(other, unicode) or
|
if (isinstance(other, unicode) or
|
||||||
|
|
|
@ -18,8 +18,10 @@ This module exports useful functions for 2/3 compatible code:
|
||||||
* types:
|
* types:
|
||||||
|
|
||||||
* text_type: unicode in Python 2, str in Python 3
|
* text_type: unicode in Python 2, str in Python 3
|
||||||
* binary_type: str in Python 2, bytes in Python 3
|
|
||||||
* string_types: basestring in Python 2, str in Python 3
|
* string_types: basestring in Python 2, str in Python 3
|
||||||
|
* binary_type: str in Python 2, bytes in Python 3
|
||||||
|
* integer_types: (int, long) in Python 2, int in Python 3
|
||||||
|
* class_types: (type, types.ClassType) in Python 2, type in Python 3
|
||||||
|
|
||||||
* bchr(c):
|
* bchr(c):
|
||||||
Take an integer and make a 1-character byte string
|
Take an integer and make a 1-character byte string
|
||||||
|
@ -55,7 +57,8 @@ import copy
|
||||||
import inspect
|
import inspect
|
||||||
|
|
||||||
|
|
||||||
PY3 = sys.version_info[0] == 3
|
PY3 = sys.version_info[0] >= 3
|
||||||
|
PY34_PLUS = sys.version_info[0:2] >= (3, 4)
|
||||||
PY35_PLUS = sys.version_info[0:2] >= (3, 5)
|
PY35_PLUS = sys.version_info[0:2] >= (3, 5)
|
||||||
PY36_PLUS = sys.version_info[0:2] >= (3, 6)
|
PY36_PLUS = sys.version_info[0:2] >= (3, 6)
|
||||||
PY2 = sys.version_info[0] == 2
|
PY2 = sys.version_info[0] == 2
|
||||||
|
@ -405,12 +408,34 @@ if PY3:
|
||||||
allows re-raising exceptions with the cls value and traceback on
|
allows re-raising exceptions with the cls value and traceback on
|
||||||
Python 2 and 3.
|
Python 2 and 3.
|
||||||
"""
|
"""
|
||||||
if value is not None and isinstance(tp, Exception):
|
if isinstance(tp, BaseException):
|
||||||
raise TypeError("instance exception may not have a separate value")
|
# If the first object is an instance, the type of the exception
|
||||||
if value is not None:
|
# is the class of the instance, the instance itself is the value,
|
||||||
exc = tp(value)
|
# and the second object must be None.
|
||||||
else:
|
if value is not None:
|
||||||
|
raise TypeError("instance exception may not have a separate value")
|
||||||
exc = tp
|
exc = tp
|
||||||
|
elif isinstance(tp, type) and not issubclass(tp, BaseException):
|
||||||
|
# If the first object is a class, it becomes the type of the
|
||||||
|
# exception.
|
||||||
|
raise TypeError("class must derive from BaseException, not %s" % tp.__name__)
|
||||||
|
else:
|
||||||
|
# The second object is used to determine the exception value: If it
|
||||||
|
# is an instance of the class, the instance becomes the exception
|
||||||
|
# value. If the second object is a tuple, it is used as the argument
|
||||||
|
# list for the class constructor; if it is None, an empty argument
|
||||||
|
# list is used, and any other object is treated as a single argument
|
||||||
|
# to the constructor. The instance so created by calling the
|
||||||
|
# constructor is used as the exception value.
|
||||||
|
if isinstance(value, tp):
|
||||||
|
exc = value
|
||||||
|
elif isinstance(value, tuple):
|
||||||
|
exc = tp(*value)
|
||||||
|
elif value is None:
|
||||||
|
exc = tp()
|
||||||
|
else:
|
||||||
|
exc = tp(value)
|
||||||
|
|
||||||
if exc.__traceback__ is not tb:
|
if exc.__traceback__ is not tb:
|
||||||
raise exc.with_traceback(tb)
|
raise exc.with_traceback(tb)
|
||||||
raise exc
|
raise exc
|
||||||
|
@ -443,12 +468,14 @@ else:
|
||||||
e.__suppress_context__ = False
|
e.__suppress_context__ = False
|
||||||
if isinstance(cause, type) and issubclass(cause, Exception):
|
if isinstance(cause, type) and issubclass(cause, Exception):
|
||||||
e.__cause__ = cause()
|
e.__cause__ = cause()
|
||||||
|
e.__cause__.__traceback__ = sys.exc_info()[2]
|
||||||
e.__suppress_context__ = True
|
e.__suppress_context__ = True
|
||||||
elif cause is None:
|
elif cause is None:
|
||||||
e.__cause__ = None
|
e.__cause__ = None
|
||||||
e.__suppress_context__ = True
|
e.__suppress_context__ = True
|
||||||
elif isinstance(cause, BaseException):
|
elif isinstance(cause, BaseException):
|
||||||
e.__cause__ = cause
|
e.__cause__ = cause
|
||||||
|
object.__setattr__(e.__cause__, '__traceback__', sys.exc_info()[2])
|
||||||
e.__suppress_context__ = True
|
e.__suppress_context__ = True
|
||||||
else:
|
else:
|
||||||
raise TypeError("exception causes must derive from BaseException")
|
raise TypeError("exception causes must derive from BaseException")
|
||||||
|
@ -552,15 +579,14 @@ def isbytes(obj):
|
||||||
|
|
||||||
def isnewbytes(obj):
|
def isnewbytes(obj):
|
||||||
"""
|
"""
|
||||||
Equivalent to the result of ``isinstance(obj, newbytes)`` were
|
Equivalent to the result of ``type(obj) == type(newbytes)``
|
||||||
``__instancecheck__`` not overridden on the newbytes subclass. In
|
in other words, it is REALLY a newbytes instance, not a Py2 native str
|
||||||
other words, it is REALLY a newbytes instance, not a Py2 native str
|
|
||||||
object?
|
object?
|
||||||
|
|
||||||
|
Note that this does not cover subclasses of newbytes, and it is not
|
||||||
|
equivalent to ininstance(obj, newbytes)
|
||||||
"""
|
"""
|
||||||
# TODO: generalize this so that it works with subclasses of newbytes
|
return type(obj).__name__ == 'newbytes'
|
||||||
# Import is here to avoid circular imports:
|
|
||||||
from future.types.newbytes import newbytes
|
|
||||||
return type(obj) == newbytes
|
|
||||||
|
|
||||||
|
|
||||||
def isint(obj):
|
def isint(obj):
|
||||||
|
@ -726,16 +752,16 @@ else:
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['PY2', 'PY26', 'PY3', 'PYPY',
|
__all__ = ['PY2', 'PY26', 'PY3', 'PYPY',
|
||||||
'as_native_str', 'bind_method', 'bord', 'bstr',
|
'as_native_str', 'binary_type', 'bind_method', 'bord', 'bstr',
|
||||||
'bytes_to_native_str', 'encode_filename', 'ensure_new_type',
|
'bytes_to_native_str', 'class_types', 'encode_filename',
|
||||||
'exec_', 'get_next', 'getexception', 'implements_iterator',
|
'ensure_new_type', 'exec_', 'get_next', 'getexception',
|
||||||
'is_new_style', 'isbytes', 'isidentifier', 'isint',
|
'implements_iterator', 'integer_types', 'is_new_style', 'isbytes',
|
||||||
'isnewbytes', 'istext', 'iteritems', 'iterkeys', 'itervalues',
|
'isidentifier', 'isint', 'isnewbytes', 'istext', 'iteritems',
|
||||||
'lfilter', 'listitems', 'listvalues', 'lmap', 'lrange',
|
'iterkeys', 'itervalues', 'lfilter', 'listitems', 'listvalues',
|
||||||
'lzip', 'native', 'native_bytes', 'native_str',
|
'lmap', 'lrange', 'lzip', 'native', 'native_bytes', 'native_str',
|
||||||
'native_str_to_bytes', 'old_div',
|
'native_str_to_bytes', 'old_div',
|
||||||
'python_2_unicode_compatible', 'raise_',
|
'python_2_unicode_compatible', 'raise_',
|
||||||
'raise_with_traceback', 'reraise', 'text_to_native_str',
|
'raise_with_traceback', 'reraise', 'string_types',
|
||||||
'tobytes', 'viewitems', 'viewkeys', 'viewvalues',
|
'text_to_native_str', 'text_type', 'tobytes', 'viewitems',
|
||||||
'with_metaclass'
|
'viewkeys', 'viewvalues', 'with_metaclass'
|
||||||
]
|
]
|
||||||
|
|
|
@ -32,4 +32,4 @@ __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
||||||
|
|
||||||
# this has to be at the top level, see how setup.py parses this
|
# this has to be at the top level, see how setup.py parses this
|
||||||
#: Distribution version number.
|
#: Distribution version number.
|
||||||
__version__ = "1.0.1"
|
__version__ = "1.1"
|
||||||
|
|
|
@ -136,6 +136,7 @@ def normaliseCharList(charList):
|
||||||
i += j
|
i += j
|
||||||
return rv
|
return rv
|
||||||
|
|
||||||
|
|
||||||
# We don't really support characters above the BMP :(
|
# We don't really support characters above the BMP :(
|
||||||
max_unicode = int("FFFF", 16)
|
max_unicode = int("FFFF", 16)
|
||||||
|
|
||||||
|
@ -254,7 +255,7 @@ class InfosetFilter(object):
|
||||||
nameRest = name[1:]
|
nameRest = name[1:]
|
||||||
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
|
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
|
||||||
if m:
|
if m:
|
||||||
warnings.warn("Coercing non-XML name", DataLossWarning)
|
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
||||||
nameFirstOutput = self.getReplacementCharacter(nameFirst)
|
nameFirstOutput = self.getReplacementCharacter(nameFirst)
|
||||||
else:
|
else:
|
||||||
nameFirstOutput = nameFirst
|
nameFirstOutput = nameFirst
|
||||||
|
@ -262,7 +263,7 @@ class InfosetFilter(object):
|
||||||
nameRestOutput = nameRest
|
nameRestOutput = nameRest
|
||||||
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
|
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
|
||||||
for char in replaceChars:
|
for char in replaceChars:
|
||||||
warnings.warn("Coercing non-XML name", DataLossWarning)
|
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
||||||
replacement = self.getReplacementCharacter(char)
|
replacement = self.getReplacementCharacter(char)
|
||||||
nameRestOutput = nameRestOutput.replace(char, replacement)
|
nameRestOutput = nameRestOutput.replace(char, replacement)
|
||||||
return nameFirstOutput + nameRestOutput
|
return nameFirstOutput + nameRestOutput
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
from six import text_type, binary_type
|
from six import text_type
|
||||||
from six.moves import http_client, urllib
|
from six.moves import http_client, urllib
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import re
|
import re
|
||||||
|
from io import BytesIO, StringIO
|
||||||
|
|
||||||
import webencodings
|
import webencodings
|
||||||
|
|
||||||
|
@ -12,13 +13,6 @@ from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
|
||||||
from .constants import _ReparseException
|
from .constants import _ReparseException
|
||||||
from . import _utils
|
from . import _utils
|
||||||
|
|
||||||
from io import StringIO
|
|
||||||
|
|
||||||
try:
|
|
||||||
from io import BytesIO
|
|
||||||
except ImportError:
|
|
||||||
BytesIO = StringIO
|
|
||||||
|
|
||||||
# Non-unicode versions of constants for use in the pre-parser
|
# Non-unicode versions of constants for use in the pre-parser
|
||||||
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
|
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
|
||||||
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
|
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
|
||||||
|
@ -40,13 +34,13 @@ if _utils.supports_lone_surrogates:
|
||||||
else:
|
else:
|
||||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
|
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
|
||||||
|
|
||||||
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||||
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
||||||
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
|
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
|
||||||
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
|
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
|
||||||
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
|
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
|
||||||
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
||||||
0x10FFFE, 0x10FFFF])
|
0x10FFFE, 0x10FFFF}
|
||||||
|
|
||||||
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
|
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
|
||||||
|
|
||||||
|
@ -367,7 +361,7 @@ class HTMLUnicodeInputStream(object):
|
||||||
def unget(self, char):
|
def unget(self, char):
|
||||||
# Only one character is allowed to be ungotten at once - it must
|
# Only one character is allowed to be ungotten at once - it must
|
||||||
# be consumed again before any further call to unget
|
# be consumed again before any further call to unget
|
||||||
if char is not None:
|
if char is not EOF:
|
||||||
if self.chunkOffset == 0:
|
if self.chunkOffset == 0:
|
||||||
# unget is called quite rarely, so it's a good idea to do
|
# unget is called quite rarely, so it's a good idea to do
|
||||||
# more work here if it saves a bit of work in the frequently
|
# more work here if it saves a bit of work in the frequently
|
||||||
|
@ -449,7 +443,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
stream.seek(stream.tell())
|
stream.seek(stream.tell())
|
||||||
except: # pylint:disable=bare-except
|
except Exception:
|
||||||
stream = BufferedStream(stream)
|
stream = BufferedStream(stream)
|
||||||
|
|
||||||
return stream
|
return stream
|
||||||
|
@ -461,7 +455,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||||
if charEncoding[0] is not None:
|
if charEncoding[0] is not None:
|
||||||
return charEncoding
|
return charEncoding
|
||||||
|
|
||||||
# If we've been overriden, we've been overriden
|
# If we've been overridden, we've been overridden
|
||||||
charEncoding = lookupEncoding(self.override_encoding), "certain"
|
charEncoding = lookupEncoding(self.override_encoding), "certain"
|
||||||
if charEncoding[0] is not None:
|
if charEncoding[0] is not None:
|
||||||
return charEncoding
|
return charEncoding
|
||||||
|
@ -664,9 +658,7 @@ class EncodingBytes(bytes):
|
||||||
"""Look for a sequence of bytes at the start of a string. If the bytes
|
"""Look for a sequence of bytes at the start of a string. If the bytes
|
||||||
are found return True and advance the position to the byte after the
|
are found return True and advance the position to the byte after the
|
||||||
match. Otherwise return False and leave the position alone"""
|
match. Otherwise return False and leave the position alone"""
|
||||||
p = self.position
|
rv = self.startswith(bytes, self.position)
|
||||||
data = self[p:p + len(bytes)]
|
|
||||||
rv = data.startswith(bytes)
|
|
||||||
if rv:
|
if rv:
|
||||||
self.position += len(bytes)
|
self.position += len(bytes)
|
||||||
return rv
|
return rv
|
||||||
|
@ -674,15 +666,11 @@ class EncodingBytes(bytes):
|
||||||
def jumpTo(self, bytes):
|
def jumpTo(self, bytes):
|
||||||
"""Look for the next sequence of bytes matching a given sequence. If
|
"""Look for the next sequence of bytes matching a given sequence. If
|
||||||
a match is found advance the position to the last byte of the match"""
|
a match is found advance the position to the last byte of the match"""
|
||||||
newPosition = self[self.position:].find(bytes)
|
try:
|
||||||
if newPosition > -1:
|
self._position = self.index(bytes, self.position) + len(bytes) - 1
|
||||||
# XXX: This is ugly, but I can't see a nicer way to fix this.
|
except ValueError:
|
||||||
if self._position == -1:
|
|
||||||
self._position = 0
|
|
||||||
self._position += (newPosition + len(bytes) - 1)
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class EncodingParser(object):
|
class EncodingParser(object):
|
||||||
|
@ -694,6 +682,9 @@ class EncodingParser(object):
|
||||||
self.encoding = None
|
self.encoding = None
|
||||||
|
|
||||||
def getEncoding(self):
|
def getEncoding(self):
|
||||||
|
if b"<meta" not in self.data:
|
||||||
|
return None
|
||||||
|
|
||||||
methodDispatch = (
|
methodDispatch = (
|
||||||
(b"<!--", self.handleComment),
|
(b"<!--", self.handleComment),
|
||||||
(b"<meta", self.handleMeta),
|
(b"<meta", self.handleMeta),
|
||||||
|
@ -703,6 +694,10 @@ class EncodingParser(object):
|
||||||
(b"<", self.handlePossibleStartTag))
|
(b"<", self.handlePossibleStartTag))
|
||||||
for _ in self.data:
|
for _ in self.data:
|
||||||
keepParsing = True
|
keepParsing = True
|
||||||
|
try:
|
||||||
|
self.data.jumpTo(b"<")
|
||||||
|
except StopIteration:
|
||||||
|
break
|
||||||
for key, method in methodDispatch:
|
for key, method in methodDispatch:
|
||||||
if self.data.matchBytes(key):
|
if self.data.matchBytes(key):
|
||||||
try:
|
try:
|
||||||
|
@ -908,7 +903,7 @@ class ContentAttrParser(object):
|
||||||
def lookupEncoding(encoding):
|
def lookupEncoding(encoding):
|
||||||
"""Return the python codec name corresponding to an encoding or None if the
|
"""Return the python codec name corresponding to an encoding or None if the
|
||||||
string doesn't correspond to a valid encoding."""
|
string doesn't correspond to a valid encoding."""
|
||||||
if isinstance(encoding, binary_type):
|
if isinstance(encoding, bytes):
|
||||||
try:
|
try:
|
||||||
encoding = encoding.decode("ascii")
|
encoding = encoding.decode("ascii")
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
|
|
|
@ -2,7 +2,8 @@ from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
from six import unichr as chr
|
from six import unichr as chr
|
||||||
|
|
||||||
from collections import deque
|
from collections import deque, OrderedDict
|
||||||
|
from sys import version_info
|
||||||
|
|
||||||
from .constants import spaceCharacters
|
from .constants import spaceCharacters
|
||||||
from .constants import entities
|
from .constants import entities
|
||||||
|
@ -17,6 +18,11 @@ from ._trie import Trie
|
||||||
|
|
||||||
entitiesTrie = Trie(entities)
|
entitiesTrie = Trie(entities)
|
||||||
|
|
||||||
|
if version_info >= (3, 7):
|
||||||
|
attributeMap = dict
|
||||||
|
else:
|
||||||
|
attributeMap = OrderedDict
|
||||||
|
|
||||||
|
|
||||||
class HTMLTokenizer(object):
|
class HTMLTokenizer(object):
|
||||||
""" This class takes care of tokenizing HTML.
|
""" This class takes care of tokenizing HTML.
|
||||||
|
@ -228,6 +234,14 @@ class HTMLTokenizer(object):
|
||||||
# Add token to the queue to be yielded
|
# Add token to the queue to be yielded
|
||||||
if (token["type"] in tagTokenTypes):
|
if (token["type"] in tagTokenTypes):
|
||||||
token["name"] = token["name"].translate(asciiUpper2Lower)
|
token["name"] = token["name"].translate(asciiUpper2Lower)
|
||||||
|
if token["type"] == tokenTypes["StartTag"]:
|
||||||
|
raw = token["data"]
|
||||||
|
data = attributeMap(raw)
|
||||||
|
if len(raw) > len(data):
|
||||||
|
# we had some duplicated attribute, fix so first wins
|
||||||
|
data.update(raw[::-1])
|
||||||
|
token["data"] = data
|
||||||
|
|
||||||
if token["type"] == tokenTypes["EndTag"]:
|
if token["type"] == tokenTypes["EndTag"]:
|
||||||
if token["data"]:
|
if token["data"]:
|
||||||
self.tokenQueue.append({"type": tokenTypes["ParseError"],
|
self.tokenQueue.append({"type": tokenTypes["ParseError"],
|
||||||
|
|
|
@ -1,14 +1,5 @@
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
from .py import Trie as PyTrie
|
from .py import Trie
|
||||||
|
|
||||||
Trie = PyTrie
|
__all__ = ["Trie"]
|
||||||
|
|
||||||
# pylint:disable=wrong-import-position
|
|
||||||
try:
|
|
||||||
from .datrie import Trie as DATrie
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
Trie = DATrie
|
|
||||||
# pylint:enable=wrong-import-position
|
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
from collections import Mapping
|
try:
|
||||||
|
from collections.abc import Mapping
|
||||||
|
except ImportError: # Python 2.7
|
||||||
|
from collections import Mapping
|
||||||
|
|
||||||
|
|
||||||
class Trie(Mapping):
|
class Trie(Mapping):
|
||||||
|
|
|
@ -1,44 +0,0 @@
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from datrie import Trie as DATrie
|
|
||||||
from six import text_type
|
|
||||||
|
|
||||||
from ._base import Trie as ABCTrie
|
|
||||||
|
|
||||||
|
|
||||||
class Trie(ABCTrie):
|
|
||||||
def __init__(self, data):
|
|
||||||
chars = set()
|
|
||||||
for key in data.keys():
|
|
||||||
if not isinstance(key, text_type):
|
|
||||||
raise TypeError("All keys must be strings")
|
|
||||||
for char in key:
|
|
||||||
chars.add(char)
|
|
||||||
|
|
||||||
self._data = DATrie("".join(chars))
|
|
||||||
for key, value in data.items():
|
|
||||||
self._data[key] = value
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
|
||||||
return key in self._data
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self._data)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
return self._data[key]
|
|
||||||
|
|
||||||
def keys(self, prefix=None):
|
|
||||||
return self._data.keys(prefix)
|
|
||||||
|
|
||||||
def has_keys_with_prefix(self, prefix):
|
|
||||||
return self._data.has_keys_with_prefix(prefix)
|
|
||||||
|
|
||||||
def longest_prefix(self, prefix):
|
|
||||||
return self._data.longest_prefix(prefix)
|
|
||||||
|
|
||||||
def longest_prefix_item(self, prefix):
|
|
||||||
return self._data.longest_prefix_item(prefix)
|
|
|
@ -2,12 +2,20 @@ from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
from types import ModuleType
|
from types import ModuleType
|
||||||
|
|
||||||
from six import text_type
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import xml.etree.cElementTree as default_etree
|
from collections.abc import Mapping
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
from collections import Mapping
|
||||||
|
|
||||||
|
from six import text_type, PY3
|
||||||
|
|
||||||
|
if PY3:
|
||||||
import xml.etree.ElementTree as default_etree
|
import xml.etree.ElementTree as default_etree
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
import xml.etree.cElementTree as default_etree
|
||||||
|
except ImportError:
|
||||||
|
import xml.etree.ElementTree as default_etree
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
|
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
|
||||||
|
@ -27,7 +35,7 @@ try:
|
||||||
# We need this with u"" because of http://bugs.jython.org/issue2039
|
# We need this with u"" because of http://bugs.jython.org/issue2039
|
||||||
_x = eval('u"\\uD800"') # pylint:disable=eval-used
|
_x = eval('u"\\uD800"') # pylint:disable=eval-used
|
||||||
assert isinstance(_x, text_type)
|
assert isinstance(_x, text_type)
|
||||||
except: # pylint:disable=bare-except
|
except Exception:
|
||||||
supports_lone_surrogates = False
|
supports_lone_surrogates = False
|
||||||
else:
|
else:
|
||||||
supports_lone_surrogates = True
|
supports_lone_surrogates = True
|
||||||
|
@ -47,9 +55,6 @@ class MethodDispatcher(dict):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, items=()):
|
def __init__(self, items=()):
|
||||||
# Using _dictEntries instead of directly assigning to self is about
|
|
||||||
# twice as fast. Please do careful performance testing before changing
|
|
||||||
# anything here.
|
|
||||||
_dictEntries = []
|
_dictEntries = []
|
||||||
for name, value in items:
|
for name, value in items:
|
||||||
if isinstance(name, (list, tuple, frozenset, set)):
|
if isinstance(name, (list, tuple, frozenset, set)):
|
||||||
|
@ -64,6 +69,36 @@ class MethodDispatcher(dict):
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
return dict.get(self, key, self.default)
|
return dict.get(self, key, self.default)
|
||||||
|
|
||||||
|
def __get__(self, instance, owner=None):
|
||||||
|
return BoundMethodDispatcher(instance, self)
|
||||||
|
|
||||||
|
|
||||||
|
class BoundMethodDispatcher(Mapping):
|
||||||
|
"""Wraps a MethodDispatcher, binding its return values to `instance`"""
|
||||||
|
def __init__(self, instance, dispatcher):
|
||||||
|
self.instance = instance
|
||||||
|
self.dispatcher = dispatcher
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
# see https://docs.python.org/3/reference/datamodel.html#object.__get__
|
||||||
|
# on a function, __get__ is used to bind a function to an instance as a bound method
|
||||||
|
return self.dispatcher[key].__get__(self.instance)
|
||||||
|
|
||||||
|
def get(self, key, default):
|
||||||
|
if key in self.dispatcher:
|
||||||
|
return self[key]
|
||||||
|
else:
|
||||||
|
return default
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.dispatcher)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.dispatcher)
|
||||||
|
|
||||||
|
def __contains__(self, key):
|
||||||
|
return key in self.dispatcher
|
||||||
|
|
||||||
|
|
||||||
# Some utility functions to deal with weirdness around UCS2 vs UCS4
|
# Some utility functions to deal with weirdness around UCS2 vs UCS4
|
||||||
# python builds
|
# python builds
|
||||||
|
|
|
@ -519,8 +519,8 @@ adjustForeignAttributes = {
|
||||||
"xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
|
"xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
|
||||||
}
|
}
|
||||||
|
|
||||||
unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
|
unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in
|
||||||
adjustForeignAttributes.items()])
|
adjustForeignAttributes.items()}
|
||||||
|
|
||||||
spaceCharacters = frozenset([
|
spaceCharacters = frozenset([
|
||||||
"\t",
|
"\t",
|
||||||
|
@ -544,8 +544,7 @@ asciiLetters = frozenset(string.ascii_letters)
|
||||||
digits = frozenset(string.digits)
|
digits = frozenset(string.digits)
|
||||||
hexDigits = frozenset(string.hexdigits)
|
hexDigits = frozenset(string.hexdigits)
|
||||||
|
|
||||||
asciiUpper2Lower = dict([(ord(c), ord(c.lower()))
|
asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}
|
||||||
for c in string.ascii_uppercase])
|
|
||||||
|
|
||||||
# Heading elements need to be ordered
|
# Heading elements need to be ordered
|
||||||
headingElements = (
|
headingElements = (
|
||||||
|
@ -2934,7 +2933,7 @@ tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
|
||||||
tokenTypes["EmptyTag"]])
|
tokenTypes["EmptyTag"]])
|
||||||
|
|
||||||
|
|
||||||
prefixes = dict([(v, k) for k, v in namespaces.items()])
|
prefixes = {v: k for k, v in namespaces.items()}
|
||||||
prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
|
prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,15 @@
|
||||||
|
"""Deprecated from html5lib 1.1.
|
||||||
|
|
||||||
|
See `here <https://github.com/html5lib/html5lib-python/issues/443>`_ for
|
||||||
|
information about its deprecation; `Bleach <https://github.com/mozilla/bleach>`_
|
||||||
|
is recommended as a replacement. Please let us know in the aforementioned issue
|
||||||
|
if Bleach is unsuitable for your needs.
|
||||||
|
|
||||||
|
"""
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import warnings
|
||||||
from xml.sax.saxutils import escape, unescape
|
from xml.sax.saxutils import escape, unescape
|
||||||
|
|
||||||
from six.moves import urllib_parse as urlparse
|
from six.moves import urllib_parse as urlparse
|
||||||
|
@ -11,6 +20,14 @@ from ..constants import namespaces, prefixes
|
||||||
__all__ = ["Filter"]
|
__all__ = ["Filter"]
|
||||||
|
|
||||||
|
|
||||||
|
_deprecation_msg = (
|
||||||
|
"html5lib's sanitizer is deprecated; see " +
|
||||||
|
"https://github.com/html5lib/html5lib-python/issues/443 and please let " +
|
||||||
|
"us know if Bleach is unsuitable for your needs"
|
||||||
|
)
|
||||||
|
|
||||||
|
warnings.warn(_deprecation_msg, DeprecationWarning)
|
||||||
|
|
||||||
allowed_elements = frozenset((
|
allowed_elements = frozenset((
|
||||||
(namespaces['html'], 'a'),
|
(namespaces['html'], 'a'),
|
||||||
(namespaces['html'], 'abbr'),
|
(namespaces['html'], 'abbr'),
|
||||||
|
@ -750,6 +767,9 @@ class Filter(base.Filter):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
super(Filter, self).__init__(source)
|
super(Filter, self).__init__(source)
|
||||||
|
|
||||||
|
warnings.warn(_deprecation_msg, DeprecationWarning)
|
||||||
|
|
||||||
self.allowed_elements = allowed_elements
|
self.allowed_elements = allowed_elements
|
||||||
self.allowed_attributes = allowed_attributes
|
self.allowed_attributes = allowed_attributes
|
||||||
self.allowed_css_properties = allowed_css_properties
|
self.allowed_css_properties = allowed_css_properties
|
||||||
|
|
|
@ -2,7 +2,6 @@ from __future__ import absolute_import, division, unicode_literals
|
||||||
from six import with_metaclass, viewkeys
|
from six import with_metaclass, viewkeys
|
||||||
|
|
||||||
import types
|
import types
|
||||||
from collections import OrderedDict
|
|
||||||
|
|
||||||
from . import _inputstream
|
from . import _inputstream
|
||||||
from . import _tokenizer
|
from . import _tokenizer
|
||||||
|
@ -119,8 +118,8 @@ class HTMLParser(object):
|
||||||
self.tree = tree(namespaceHTMLElements)
|
self.tree = tree(namespaceHTMLElements)
|
||||||
self.errors = []
|
self.errors = []
|
||||||
|
|
||||||
self.phases = dict([(name, cls(self, self.tree)) for name, cls in
|
self.phases = {name: cls(self, self.tree) for name, cls in
|
||||||
getPhases(debug).items()])
|
getPhases(debug).items()}
|
||||||
|
|
||||||
def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
|
def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
|
||||||
|
|
||||||
|
@ -202,7 +201,7 @@ class HTMLParser(object):
|
||||||
DoctypeToken = tokenTypes["Doctype"]
|
DoctypeToken = tokenTypes["Doctype"]
|
||||||
ParseErrorToken = tokenTypes["ParseError"]
|
ParseErrorToken = tokenTypes["ParseError"]
|
||||||
|
|
||||||
for token in self.normalizedTokens():
|
for token in self.tokenizer:
|
||||||
prev_token = None
|
prev_token = None
|
||||||
new_token = token
|
new_token = token
|
||||||
while new_token is not None:
|
while new_token is not None:
|
||||||
|
@ -260,10 +259,6 @@ class HTMLParser(object):
|
||||||
if reprocess:
|
if reprocess:
|
||||||
assert self.phase not in phases
|
assert self.phase not in phases
|
||||||
|
|
||||||
def normalizedTokens(self):
|
|
||||||
for token in self.tokenizer:
|
|
||||||
yield self.normalizeToken(token)
|
|
||||||
|
|
||||||
def parse(self, stream, *args, **kwargs):
|
def parse(self, stream, *args, **kwargs):
|
||||||
"""Parse a HTML document into a well-formed tree
|
"""Parse a HTML document into a well-formed tree
|
||||||
|
|
||||||
|
@ -325,17 +320,6 @@ class HTMLParser(object):
|
||||||
if self.strict:
|
if self.strict:
|
||||||
raise ParseError(E[errorcode] % datavars)
|
raise ParseError(E[errorcode] % datavars)
|
||||||
|
|
||||||
def normalizeToken(self, token):
|
|
||||||
# HTML5 specific normalizations to the token stream
|
|
||||||
if token["type"] == tokenTypes["StartTag"]:
|
|
||||||
raw = token["data"]
|
|
||||||
token["data"] = OrderedDict(raw)
|
|
||||||
if len(raw) > len(token["data"]):
|
|
||||||
# we had some duplicated attribute, fix so first wins
|
|
||||||
token["data"].update(raw[::-1])
|
|
||||||
|
|
||||||
return token
|
|
||||||
|
|
||||||
def adjustMathMLAttributes(self, token):
|
def adjustMathMLAttributes(self, token):
|
||||||
adjust_attributes(token, adjustMathMLAttributes)
|
adjust_attributes(token, adjustMathMLAttributes)
|
||||||
|
|
||||||
|
@ -413,16 +397,12 @@ class HTMLParser(object):
|
||||||
def getPhases(debug):
|
def getPhases(debug):
|
||||||
def log(function):
|
def log(function):
|
||||||
"""Logger that records which phase processes each token"""
|
"""Logger that records which phase processes each token"""
|
||||||
type_names = dict((value, key) for key, value in
|
type_names = {value: key for key, value in tokenTypes.items()}
|
||||||
tokenTypes.items())
|
|
||||||
|
|
||||||
def wrapped(self, *args, **kwargs):
|
def wrapped(self, *args, **kwargs):
|
||||||
if function.__name__.startswith("process") and len(args) > 0:
|
if function.__name__.startswith("process") and len(args) > 0:
|
||||||
token = args[0]
|
token = args[0]
|
||||||
try:
|
info = {"type": type_names[token['type']]}
|
||||||
info = {"type": type_names[token['type']]}
|
|
||||||
except:
|
|
||||||
raise
|
|
||||||
if token['type'] in tagTokenTypes:
|
if token['type'] in tagTokenTypes:
|
||||||
info["name"] = token['name']
|
info["name"] = token['name']
|
||||||
|
|
||||||
|
@ -446,10 +426,13 @@ def getPhases(debug):
|
||||||
class Phase(with_metaclass(getMetaclass(debug, log))):
|
class Phase(with_metaclass(getMetaclass(debug, log))):
|
||||||
"""Base class for helper object that implements each phase of processing
|
"""Base class for helper object that implements each phase of processing
|
||||||
"""
|
"""
|
||||||
|
__slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
|
||||||
|
|
||||||
def __init__(self, parser, tree):
|
def __init__(self, parser, tree):
|
||||||
self.parser = parser
|
self.parser = parser
|
||||||
self.tree = tree
|
self.tree = tree
|
||||||
|
self.__startTagCache = {}
|
||||||
|
self.__endTagCache = {}
|
||||||
|
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
@ -469,7 +452,21 @@ def getPhases(debug):
|
||||||
self.tree.insertText(token["data"])
|
self.tree.insertText(token["data"])
|
||||||
|
|
||||||
def processStartTag(self, token):
|
def processStartTag(self, token):
|
||||||
return self.startTagHandler[token["name"]](token)
|
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
|
||||||
|
# requires a circular reference to the Phase, and this ends up with a significant
|
||||||
|
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
|
||||||
|
name = token["name"]
|
||||||
|
# In Py2, using `in` is quicker in general than try/except KeyError
|
||||||
|
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
|
||||||
|
if name in self.__startTagCache:
|
||||||
|
func = self.__startTagCache[name]
|
||||||
|
else:
|
||||||
|
func = self.__startTagCache[name] = self.startTagHandler[name]
|
||||||
|
# bound the cache size in case we get loads of unknown tags
|
||||||
|
while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
|
||||||
|
# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
|
||||||
|
self.__startTagCache.pop(next(iter(self.__startTagCache)))
|
||||||
|
return func(token)
|
||||||
|
|
||||||
def startTagHtml(self, token):
|
def startTagHtml(self, token):
|
||||||
if not self.parser.firstStartTag and token["name"] == "html":
|
if not self.parser.firstStartTag and token["name"] == "html":
|
||||||
|
@ -482,9 +479,25 @@ def getPhases(debug):
|
||||||
self.parser.firstStartTag = False
|
self.parser.firstStartTag = False
|
||||||
|
|
||||||
def processEndTag(self, token):
|
def processEndTag(self, token):
|
||||||
return self.endTagHandler[token["name"]](token)
|
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
|
||||||
|
# requires a circular reference to the Phase, and this ends up with a significant
|
||||||
|
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
|
||||||
|
name = token["name"]
|
||||||
|
# In Py2, using `in` is quicker in general than try/except KeyError
|
||||||
|
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
|
||||||
|
if name in self.__endTagCache:
|
||||||
|
func = self.__endTagCache[name]
|
||||||
|
else:
|
||||||
|
func = self.__endTagCache[name] = self.endTagHandler[name]
|
||||||
|
# bound the cache size in case we get loads of unknown tags
|
||||||
|
while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
|
||||||
|
# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
|
||||||
|
self.__endTagCache.pop(next(iter(self.__endTagCache)))
|
||||||
|
return func(token)
|
||||||
|
|
||||||
class InitialPhase(Phase):
|
class InitialPhase(Phase):
|
||||||
|
__slots__ = tuple()
|
||||||
|
|
||||||
def processSpaceCharacters(self, token):
|
def processSpaceCharacters(self, token):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -613,6 +626,8 @@ def getPhases(debug):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
class BeforeHtmlPhase(Phase):
|
class BeforeHtmlPhase(Phase):
|
||||||
|
__slots__ = tuple()
|
||||||
|
|
||||||
# helper methods
|
# helper methods
|
||||||
def insertHtmlElement(self):
|
def insertHtmlElement(self):
|
||||||
self.tree.insertRoot(impliedTagToken("html", "StartTag"))
|
self.tree.insertRoot(impliedTagToken("html", "StartTag"))
|
||||||
|
@ -648,19 +663,7 @@ def getPhases(debug):
|
||||||
return token
|
return token
|
||||||
|
|
||||||
class BeforeHeadPhase(Phase):
|
class BeforeHeadPhase(Phase):
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
("head", self.startTagHead)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
(("head", "body", "html", "br"), self.endTagImplyHead)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
self.startTagHead(impliedTagToken("head", "StartTag"))
|
self.startTagHead(impliedTagToken("head", "StartTag"))
|
||||||
|
@ -693,28 +696,19 @@ def getPhases(debug):
|
||||||
self.parser.parseError("end-tag-after-implied-root",
|
self.parser.parseError("end-tag-after-implied-root",
|
||||||
{"name": token["name"]})
|
{"name": token["name"]})
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", startTagHtml),
|
||||||
|
("head", startTagHead)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
(("head", "body", "html", "br"), endTagImplyHead)
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InHeadPhase(Phase):
|
class InHeadPhase(Phase):
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
("title", self.startTagTitle),
|
|
||||||
(("noframes", "style"), self.startTagNoFramesStyle),
|
|
||||||
("noscript", self.startTagNoscript),
|
|
||||||
("script", self.startTagScript),
|
|
||||||
(("base", "basefont", "bgsound", "command", "link"),
|
|
||||||
self.startTagBaseLinkCommand),
|
|
||||||
("meta", self.startTagMeta),
|
|
||||||
("head", self.startTagHead)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
("head", self.endTagHead),
|
|
||||||
(("br", "html", "body"), self.endTagHtmlBodyBr)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
# the real thing
|
# the real thing
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
|
@ -796,22 +790,27 @@ def getPhases(debug):
|
||||||
def anythingElse(self):
|
def anythingElse(self):
|
||||||
self.endTagHead(impliedTagToken("head"))
|
self.endTagHead(impliedTagToken("head"))
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", startTagHtml),
|
||||||
|
("title", startTagTitle),
|
||||||
|
(("noframes", "style"), startTagNoFramesStyle),
|
||||||
|
("noscript", startTagNoscript),
|
||||||
|
("script", startTagScript),
|
||||||
|
(("base", "basefont", "bgsound", "command", "link"),
|
||||||
|
startTagBaseLinkCommand),
|
||||||
|
("meta", startTagMeta),
|
||||||
|
("head", startTagHead)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
("head", endTagHead),
|
||||||
|
(("br", "html", "body"), endTagHtmlBodyBr)
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InHeadNoscriptPhase(Phase):
|
class InHeadNoscriptPhase(Phase):
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
(("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand),
|
|
||||||
(("head", "noscript"), self.startTagHeadNoscript),
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
("noscript", self.endTagNoscript),
|
|
||||||
("br", self.endTagBr),
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
self.parser.parseError("eof-in-head-noscript")
|
self.parser.parseError("eof-in-head-noscript")
|
||||||
|
@ -860,23 +859,21 @@ def getPhases(debug):
|
||||||
# Caller must raise parse error first!
|
# Caller must raise parse error first!
|
||||||
self.endTagNoscript(impliedTagToken("noscript"))
|
self.endTagNoscript(impliedTagToken("noscript"))
|
||||||
|
|
||||||
class AfterHeadPhase(Phase):
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
def __init__(self, parser, tree):
|
("html", startTagHtml),
|
||||||
Phase.__init__(self, parser, tree)
|
(("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
|
||||||
|
(("head", "noscript"), startTagHeadNoscript),
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
("html", self.startTagHtml),
|
("noscript", endTagNoscript),
|
||||||
("body", self.startTagBody),
|
("br", endTagBr),
|
||||||
("frameset", self.startTagFrameset),
|
])
|
||||||
(("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
|
endTagHandler.default = endTagOther
|
||||||
"style", "title"),
|
|
||||||
self.startTagFromHead),
|
class AfterHeadPhase(Phase):
|
||||||
("head", self.startTagHead)
|
__slots__ = tuple()
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
|
|
||||||
self.endTagHtmlBodyBr)])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
self.anythingElse()
|
self.anythingElse()
|
||||||
|
@ -927,80 +924,30 @@ def getPhases(debug):
|
||||||
self.parser.phase = self.parser.phases["inBody"]
|
self.parser.phase = self.parser.phases["inBody"]
|
||||||
self.parser.framesetOK = True
|
self.parser.framesetOK = True
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", startTagHtml),
|
||||||
|
("body", startTagBody),
|
||||||
|
("frameset", startTagFrameset),
|
||||||
|
(("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
|
||||||
|
"style", "title"),
|
||||||
|
startTagFromHead),
|
||||||
|
("head", startTagHead)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
|
||||||
|
endTagHtmlBodyBr)])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InBodyPhase(Phase):
|
class InBodyPhase(Phase):
|
||||||
# http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
|
# http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
|
||||||
# the really-really-really-very crazy mode
|
# the really-really-really-very crazy mode
|
||||||
def __init__(self, parser, tree):
|
__slots__ = ("processSpaceCharacters",)
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(InBodyPhase, self).__init__(*args, **kwargs)
|
||||||
# Set this to the default handler
|
# Set this to the default handler
|
||||||
self.processSpaceCharacters = self.processSpaceCharactersNonPre
|
self.processSpaceCharacters = self.processSpaceCharactersNonPre
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
(("base", "basefont", "bgsound", "command", "link", "meta",
|
|
||||||
"script", "style", "title"),
|
|
||||||
self.startTagProcessInHead),
|
|
||||||
("body", self.startTagBody),
|
|
||||||
("frameset", self.startTagFrameset),
|
|
||||||
(("address", "article", "aside", "blockquote", "center", "details",
|
|
||||||
"dir", "div", "dl", "fieldset", "figcaption", "figure",
|
|
||||||
"footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
|
|
||||||
"section", "summary", "ul"),
|
|
||||||
self.startTagCloseP),
|
|
||||||
(headingElements, self.startTagHeading),
|
|
||||||
(("pre", "listing"), self.startTagPreListing),
|
|
||||||
("form", self.startTagForm),
|
|
||||||
(("li", "dd", "dt"), self.startTagListItem),
|
|
||||||
("plaintext", self.startTagPlaintext),
|
|
||||||
("a", self.startTagA),
|
|
||||||
(("b", "big", "code", "em", "font", "i", "s", "small", "strike",
|
|
||||||
"strong", "tt", "u"), self.startTagFormatting),
|
|
||||||
("nobr", self.startTagNobr),
|
|
||||||
("button", self.startTagButton),
|
|
||||||
(("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
|
|
||||||
("xmp", self.startTagXmp),
|
|
||||||
("table", self.startTagTable),
|
|
||||||
(("area", "br", "embed", "img", "keygen", "wbr"),
|
|
||||||
self.startTagVoidFormatting),
|
|
||||||
(("param", "source", "track"), self.startTagParamSource),
|
|
||||||
("input", self.startTagInput),
|
|
||||||
("hr", self.startTagHr),
|
|
||||||
("image", self.startTagImage),
|
|
||||||
("isindex", self.startTagIsIndex),
|
|
||||||
("textarea", self.startTagTextarea),
|
|
||||||
("iframe", self.startTagIFrame),
|
|
||||||
("noscript", self.startTagNoscript),
|
|
||||||
(("noembed", "noframes"), self.startTagRawtext),
|
|
||||||
("select", self.startTagSelect),
|
|
||||||
(("rp", "rt"), self.startTagRpRt),
|
|
||||||
(("option", "optgroup"), self.startTagOpt),
|
|
||||||
(("math"), self.startTagMath),
|
|
||||||
(("svg"), self.startTagSvg),
|
|
||||||
(("caption", "col", "colgroup", "frame", "head",
|
|
||||||
"tbody", "td", "tfoot", "th", "thead",
|
|
||||||
"tr"), self.startTagMisplaced)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
("body", self.endTagBody),
|
|
||||||
("html", self.endTagHtml),
|
|
||||||
(("address", "article", "aside", "blockquote", "button", "center",
|
|
||||||
"details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
|
|
||||||
"footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
|
|
||||||
"section", "summary", "ul"), self.endTagBlock),
|
|
||||||
("form", self.endTagForm),
|
|
||||||
("p", self.endTagP),
|
|
||||||
(("dd", "dt", "li"), self.endTagListItem),
|
|
||||||
(headingElements, self.endTagHeading),
|
|
||||||
(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
|
|
||||||
"strike", "strong", "tt", "u"), self.endTagFormatting),
|
|
||||||
(("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
|
|
||||||
("br", self.endTagBr),
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
def isMatchingFormattingElement(self, node1, node2):
|
def isMatchingFormattingElement(self, node1, node2):
|
||||||
return (node1.name == node2.name and
|
return (node1.name == node2.name and
|
||||||
node1.namespace == node2.namespace and
|
node1.namespace == node2.namespace and
|
||||||
|
@ -1650,14 +1597,73 @@ def getPhases(debug):
|
||||||
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
||||||
break
|
break
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", Phase.startTagHtml),
|
||||||
|
(("base", "basefont", "bgsound", "command", "link", "meta",
|
||||||
|
"script", "style", "title"),
|
||||||
|
startTagProcessInHead),
|
||||||
|
("body", startTagBody),
|
||||||
|
("frameset", startTagFrameset),
|
||||||
|
(("address", "article", "aside", "blockquote", "center", "details",
|
||||||
|
"dir", "div", "dl", "fieldset", "figcaption", "figure",
|
||||||
|
"footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
|
||||||
|
"section", "summary", "ul"),
|
||||||
|
startTagCloseP),
|
||||||
|
(headingElements, startTagHeading),
|
||||||
|
(("pre", "listing"), startTagPreListing),
|
||||||
|
("form", startTagForm),
|
||||||
|
(("li", "dd", "dt"), startTagListItem),
|
||||||
|
("plaintext", startTagPlaintext),
|
||||||
|
("a", startTagA),
|
||||||
|
(("b", "big", "code", "em", "font", "i", "s", "small", "strike",
|
||||||
|
"strong", "tt", "u"), startTagFormatting),
|
||||||
|
("nobr", startTagNobr),
|
||||||
|
("button", startTagButton),
|
||||||
|
(("applet", "marquee", "object"), startTagAppletMarqueeObject),
|
||||||
|
("xmp", startTagXmp),
|
||||||
|
("table", startTagTable),
|
||||||
|
(("area", "br", "embed", "img", "keygen", "wbr"),
|
||||||
|
startTagVoidFormatting),
|
||||||
|
(("param", "source", "track"), startTagParamSource),
|
||||||
|
("input", startTagInput),
|
||||||
|
("hr", startTagHr),
|
||||||
|
("image", startTagImage),
|
||||||
|
("isindex", startTagIsIndex),
|
||||||
|
("textarea", startTagTextarea),
|
||||||
|
("iframe", startTagIFrame),
|
||||||
|
("noscript", startTagNoscript),
|
||||||
|
(("noembed", "noframes"), startTagRawtext),
|
||||||
|
("select", startTagSelect),
|
||||||
|
(("rp", "rt"), startTagRpRt),
|
||||||
|
(("option", "optgroup"), startTagOpt),
|
||||||
|
(("math"), startTagMath),
|
||||||
|
(("svg"), startTagSvg),
|
||||||
|
(("caption", "col", "colgroup", "frame", "head",
|
||||||
|
"tbody", "td", "tfoot", "th", "thead",
|
||||||
|
"tr"), startTagMisplaced)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
("body", endTagBody),
|
||||||
|
("html", endTagHtml),
|
||||||
|
(("address", "article", "aside", "blockquote", "button", "center",
|
||||||
|
"details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
|
||||||
|
"footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
|
||||||
|
"section", "summary", "ul"), endTagBlock),
|
||||||
|
("form", endTagForm),
|
||||||
|
("p", endTagP),
|
||||||
|
(("dd", "dt", "li"), endTagListItem),
|
||||||
|
(headingElements, endTagHeading),
|
||||||
|
(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
|
||||||
|
"strike", "strong", "tt", "u"), endTagFormatting),
|
||||||
|
(("applet", "marquee", "object"), endTagAppletMarqueeObject),
|
||||||
|
("br", endTagBr),
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class TextPhase(Phase):
|
class TextPhase(Phase):
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
("script", self.endTagScript)])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
def processCharacters(self, token):
|
def processCharacters(self, token):
|
||||||
self.tree.insertText(token["data"])
|
self.tree.insertText(token["data"])
|
||||||
|
@ -1683,30 +1689,15 @@ def getPhases(debug):
|
||||||
self.tree.openElements.pop()
|
self.tree.openElements.pop()
|
||||||
self.parser.phase = self.parser.originalPhase
|
self.parser.phase = self.parser.originalPhase
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
("script", endTagScript)])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InTablePhase(Phase):
|
class InTablePhase(Phase):
|
||||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
("caption", self.startTagCaption),
|
|
||||||
("colgroup", self.startTagColgroup),
|
|
||||||
("col", self.startTagCol),
|
|
||||||
(("tbody", "tfoot", "thead"), self.startTagRowGroup),
|
|
||||||
(("td", "th", "tr"), self.startTagImplyTbody),
|
|
||||||
("table", self.startTagTable),
|
|
||||||
(("style", "script"), self.startTagStyleScript),
|
|
||||||
("input", self.startTagInput),
|
|
||||||
("form", self.startTagForm)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
("table", self.endTagTable),
|
|
||||||
(("body", "caption", "col", "colgroup", "html", "tbody", "td",
|
|
||||||
"tfoot", "th", "thead", "tr"), self.endTagIgnore)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
# helper methods
|
# helper methods
|
||||||
def clearStackToTableContext(self):
|
def clearStackToTableContext(self):
|
||||||
|
@ -1828,9 +1819,32 @@ def getPhases(debug):
|
||||||
self.parser.phases["inBody"].processEndTag(token)
|
self.parser.phases["inBody"].processEndTag(token)
|
||||||
self.tree.insertFromTable = False
|
self.tree.insertFromTable = False
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", Phase.startTagHtml),
|
||||||
|
("caption", startTagCaption),
|
||||||
|
("colgroup", startTagColgroup),
|
||||||
|
("col", startTagCol),
|
||||||
|
(("tbody", "tfoot", "thead"), startTagRowGroup),
|
||||||
|
(("td", "th", "tr"), startTagImplyTbody),
|
||||||
|
("table", startTagTable),
|
||||||
|
(("style", "script"), startTagStyleScript),
|
||||||
|
("input", startTagInput),
|
||||||
|
("form", startTagForm)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
("table", endTagTable),
|
||||||
|
(("body", "caption", "col", "colgroup", "html", "tbody", "td",
|
||||||
|
"tfoot", "th", "thead", "tr"), endTagIgnore)
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InTableTextPhase(Phase):
|
class InTableTextPhase(Phase):
|
||||||
def __init__(self, parser, tree):
|
__slots__ = ("originalPhase", "characterTokens")
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(InTableTextPhase, self).__init__(*args, **kwargs)
|
||||||
self.originalPhase = None
|
self.originalPhase = None
|
||||||
self.characterTokens = []
|
self.characterTokens = []
|
||||||
|
|
||||||
|
@ -1875,23 +1889,7 @@ def getPhases(debug):
|
||||||
|
|
||||||
class InCaptionPhase(Phase):
|
class InCaptionPhase(Phase):
|
||||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
|
||||||
"thead", "tr"), self.startTagTableElement)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
("caption", self.endTagCaption),
|
|
||||||
("table", self.endTagTable),
|
|
||||||
(("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
|
|
||||||
"thead", "tr"), self.endTagIgnore)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
def ignoreEndTagCaption(self):
|
def ignoreEndTagCaption(self):
|
||||||
return not self.tree.elementInScope("caption", variant="table")
|
return not self.tree.elementInScope("caption", variant="table")
|
||||||
|
@ -1944,23 +1942,24 @@ def getPhases(debug):
|
||||||
def endTagOther(self, token):
|
def endTagOther(self, token):
|
||||||
return self.parser.phases["inBody"].processEndTag(token)
|
return self.parser.phases["inBody"].processEndTag(token)
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", Phase.startTagHtml),
|
||||||
|
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
||||||
|
"thead", "tr"), startTagTableElement)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
("caption", endTagCaption),
|
||||||
|
("table", endTagTable),
|
||||||
|
(("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
|
||||||
|
"thead", "tr"), endTagIgnore)
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InColumnGroupPhase(Phase):
|
class InColumnGroupPhase(Phase):
|
||||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
|
||||||
|
__slots__ = tuple()
|
||||||
def __init__(self, parser, tree):
|
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
("col", self.startTagCol)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
("colgroup", self.endTagColgroup),
|
|
||||||
("col", self.endTagCol)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
def ignoreEndTagColgroup(self):
|
def ignoreEndTagColgroup(self):
|
||||||
return self.tree.openElements[-1].name == "html"
|
return self.tree.openElements[-1].name == "html"
|
||||||
|
@ -2010,26 +2009,21 @@ def getPhases(debug):
|
||||||
if not ignoreEndTag:
|
if not ignoreEndTag:
|
||||||
return token
|
return token
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", Phase.startTagHtml),
|
||||||
|
("col", startTagCol)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
("colgroup", endTagColgroup),
|
||||||
|
("col", endTagCol)
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InTableBodyPhase(Phase):
|
class InTableBodyPhase(Phase):
|
||||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
("tr", self.startTagTr),
|
|
||||||
(("td", "th"), self.startTagTableCell),
|
|
||||||
(("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
|
|
||||||
self.startTagTableOther)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
|
|
||||||
("table", self.endTagTable),
|
|
||||||
(("body", "caption", "col", "colgroup", "html", "td", "th",
|
|
||||||
"tr"), self.endTagIgnore)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
# helper methods
|
# helper methods
|
||||||
def clearStackToTableBodyContext(self):
|
def clearStackToTableBodyContext(self):
|
||||||
|
@ -2108,26 +2102,26 @@ def getPhases(debug):
|
||||||
def endTagOther(self, token):
|
def endTagOther(self, token):
|
||||||
return self.parser.phases["inTable"].processEndTag(token)
|
return self.parser.phases["inTable"].processEndTag(token)
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", Phase.startTagHtml),
|
||||||
|
("tr", startTagTr),
|
||||||
|
(("td", "th"), startTagTableCell),
|
||||||
|
(("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
|
||||||
|
startTagTableOther)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
(("tbody", "tfoot", "thead"), endTagTableRowGroup),
|
||||||
|
("table", endTagTable),
|
||||||
|
(("body", "caption", "col", "colgroup", "html", "td", "th",
|
||||||
|
"tr"), endTagIgnore)
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InRowPhase(Phase):
|
class InRowPhase(Phase):
|
||||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
(("td", "th"), self.startTagTableCell),
|
|
||||||
(("caption", "col", "colgroup", "tbody", "tfoot", "thead",
|
|
||||||
"tr"), self.startTagTableOther)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
("tr", self.endTagTr),
|
|
||||||
("table", self.endTagTable),
|
|
||||||
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
|
|
||||||
(("body", "caption", "col", "colgroup", "html", "td", "th"),
|
|
||||||
self.endTagIgnore)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
# helper methods (XXX unify this with other table helper methods)
|
# helper methods (XXX unify this with other table helper methods)
|
||||||
def clearStackToTableRowContext(self):
|
def clearStackToTableRowContext(self):
|
||||||
|
@ -2197,23 +2191,26 @@ def getPhases(debug):
|
||||||
def endTagOther(self, token):
|
def endTagOther(self, token):
|
||||||
return self.parser.phases["inTable"].processEndTag(token)
|
return self.parser.phases["inTable"].processEndTag(token)
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", Phase.startTagHtml),
|
||||||
|
(("td", "th"), startTagTableCell),
|
||||||
|
(("caption", "col", "colgroup", "tbody", "tfoot", "thead",
|
||||||
|
"tr"), startTagTableOther)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
("tr", endTagTr),
|
||||||
|
("table", endTagTable),
|
||||||
|
(("tbody", "tfoot", "thead"), endTagTableRowGroup),
|
||||||
|
(("body", "caption", "col", "colgroup", "html", "td", "th"),
|
||||||
|
endTagIgnore)
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InCellPhase(Phase):
|
class InCellPhase(Phase):
|
||||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
|
||||||
"thead", "tr"), self.startTagTableOther)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
(("td", "th"), self.endTagTableCell),
|
|
||||||
(("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
|
|
||||||
(("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
# helper
|
# helper
|
||||||
def closeCell(self):
|
def closeCell(self):
|
||||||
|
@ -2273,26 +2270,22 @@ def getPhases(debug):
|
||||||
def endTagOther(self, token):
|
def endTagOther(self, token):
|
||||||
return self.parser.phases["inBody"].processEndTag(token)
|
return self.parser.phases["inBody"].processEndTag(token)
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", Phase.startTagHtml),
|
||||||
|
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
||||||
|
"thead", "tr"), startTagTableOther)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
(("td", "th"), endTagTableCell),
|
||||||
|
(("body", "caption", "col", "colgroup", "html"), endTagIgnore),
|
||||||
|
(("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InSelectPhase(Phase):
|
class InSelectPhase(Phase):
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
("option", self.startTagOption),
|
|
||||||
("optgroup", self.startTagOptgroup),
|
|
||||||
("select", self.startTagSelect),
|
|
||||||
(("input", "keygen", "textarea"), self.startTagInput),
|
|
||||||
("script", self.startTagScript)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
("option", self.endTagOption),
|
|
||||||
("optgroup", self.endTagOptgroup),
|
|
||||||
("select", self.endTagSelect)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
|
@ -2373,21 +2366,25 @@ def getPhases(debug):
|
||||||
self.parser.parseError("unexpected-end-tag-in-select",
|
self.parser.parseError("unexpected-end-tag-in-select",
|
||||||
{"name": token["name"]})
|
{"name": token["name"]})
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", Phase.startTagHtml),
|
||||||
|
("option", startTagOption),
|
||||||
|
("optgroup", startTagOptgroup),
|
||||||
|
("select", startTagSelect),
|
||||||
|
(("input", "keygen", "textarea"), startTagInput),
|
||||||
|
("script", startTagScript)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
("option", endTagOption),
|
||||||
|
("optgroup", endTagOptgroup),
|
||||||
|
("select", endTagSelect)
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InSelectInTablePhase(Phase):
|
class InSelectInTablePhase(Phase):
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
|
||||||
self.startTagTable)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
|
||||||
self.endTagTable)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
self.parser.phases["inSelect"].processEOF()
|
self.parser.phases["inSelect"].processEOF()
|
||||||
|
@ -2412,7 +2409,21 @@ def getPhases(debug):
|
||||||
def endTagOther(self, token):
|
def endTagOther(self, token):
|
||||||
return self.parser.phases["inSelect"].processEndTag(token)
|
return self.parser.phases["inSelect"].processEndTag(token)
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
||||||
|
startTagTable)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
||||||
|
endTagTable)
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InForeignContentPhase(Phase):
|
class InForeignContentPhase(Phase):
|
||||||
|
__slots__ = tuple()
|
||||||
|
|
||||||
breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
|
breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
|
||||||
"center", "code", "dd", "div", "dl", "dt",
|
"center", "code", "dd", "div", "dl", "dt",
|
||||||
"em", "embed", "h1", "h2", "h3",
|
"em", "embed", "h1", "h2", "h3",
|
||||||
|
@ -2422,9 +2433,6 @@ def getPhases(debug):
|
||||||
"span", "strong", "strike", "sub", "sup",
|
"span", "strong", "strike", "sub", "sup",
|
||||||
"table", "tt", "u", "ul", "var"])
|
"table", "tt", "u", "ul", "var"])
|
||||||
|
|
||||||
def __init__(self, parser, tree):
|
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
def adjustSVGTagNames(self, token):
|
def adjustSVGTagNames(self, token):
|
||||||
replacements = {"altglyph": "altGlyph",
|
replacements = {"altglyph": "altGlyph",
|
||||||
"altglyphdef": "altGlyphDef",
|
"altglyphdef": "altGlyphDef",
|
||||||
|
@ -2478,7 +2486,7 @@ def getPhases(debug):
|
||||||
currentNode = self.tree.openElements[-1]
|
currentNode = self.tree.openElements[-1]
|
||||||
if (token["name"] in self.breakoutElements or
|
if (token["name"] in self.breakoutElements or
|
||||||
(token["name"] == "font" and
|
(token["name"] == "font" and
|
||||||
set(token["data"].keys()) & set(["color", "face", "size"]))):
|
set(token["data"].keys()) & {"color", "face", "size"})):
|
||||||
self.parser.parseError("unexpected-html-element-in-foreign-content",
|
self.parser.parseError("unexpected-html-element-in-foreign-content",
|
||||||
{"name": token["name"]})
|
{"name": token["name"]})
|
||||||
while (self.tree.openElements[-1].namespace !=
|
while (self.tree.openElements[-1].namespace !=
|
||||||
|
@ -2528,16 +2536,7 @@ def getPhases(debug):
|
||||||
return new_token
|
return new_token
|
||||||
|
|
||||||
class AfterBodyPhase(Phase):
|
class AfterBodyPhase(Phase):
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
# Stop parsing
|
# Stop parsing
|
||||||
|
@ -2574,23 +2573,17 @@ def getPhases(debug):
|
||||||
self.parser.phase = self.parser.phases["inBody"]
|
self.parser.phase = self.parser.phases["inBody"]
|
||||||
return token
|
return token
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", startTagHtml)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class InFramesetPhase(Phase):
|
class InFramesetPhase(Phase):
|
||||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
("frameset", self.startTagFrameset),
|
|
||||||
("frame", self.startTagFrame),
|
|
||||||
("noframes", self.startTagNoframes)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
("frameset", self.endTagFrameset)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
if self.tree.openElements[-1].name != "html":
|
if self.tree.openElements[-1].name != "html":
|
||||||
|
@ -2631,21 +2624,22 @@ def getPhases(debug):
|
||||||
self.parser.parseError("unexpected-end-tag-in-frameset",
|
self.parser.parseError("unexpected-end-tag-in-frameset",
|
||||||
{"name": token["name"]})
|
{"name": token["name"]})
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", Phase.startTagHtml),
|
||||||
|
("frameset", startTagFrameset),
|
||||||
|
("frame", startTagFrame),
|
||||||
|
("noframes", startTagNoframes)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
|
("frameset", endTagFrameset)
|
||||||
|
])
|
||||||
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
class AfterFramesetPhase(Phase):
|
class AfterFramesetPhase(Phase):
|
||||||
# http://www.whatwg.org/specs/web-apps/current-work/#after3
|
# http://www.whatwg.org/specs/web-apps/current-work/#after3
|
||||||
def __init__(self, parser, tree):
|
__slots__ = tuple()
|
||||||
Phase.__init__(self, parser, tree)
|
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.startTagHtml),
|
|
||||||
("noframes", self.startTagNoframes)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
self.endTagHandler = _utils.MethodDispatcher([
|
|
||||||
("html", self.endTagHtml)
|
|
||||||
])
|
|
||||||
self.endTagHandler.default = self.endTagOther
|
|
||||||
|
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
# Stop parsing
|
# Stop parsing
|
||||||
|
@ -2668,14 +2662,19 @@ def getPhases(debug):
|
||||||
self.parser.parseError("unexpected-end-tag-after-frameset",
|
self.parser.parseError("unexpected-end-tag-after-frameset",
|
||||||
{"name": token["name"]})
|
{"name": token["name"]})
|
||||||
|
|
||||||
class AfterAfterBodyPhase(Phase):
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
def __init__(self, parser, tree):
|
("html", Phase.startTagHtml),
|
||||||
Phase.__init__(self, parser, tree)
|
("noframes", startTagNoframes)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
endTagHandler = _utils.MethodDispatcher([
|
||||||
("html", self.startTagHtml)
|
("html", endTagHtml)
|
||||||
])
|
])
|
||||||
self.startTagHandler.default = self.startTagOther
|
endTagHandler.default = endTagOther
|
||||||
|
|
||||||
|
class AfterAfterBodyPhase(Phase):
|
||||||
|
__slots__ = tuple()
|
||||||
|
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
pass
|
pass
|
||||||
|
@ -2706,15 +2705,13 @@ def getPhases(debug):
|
||||||
self.parser.phase = self.parser.phases["inBody"]
|
self.parser.phase = self.parser.phases["inBody"]
|
||||||
return token
|
return token
|
||||||
|
|
||||||
class AfterAfterFramesetPhase(Phase):
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
def __init__(self, parser, tree):
|
("html", startTagHtml)
|
||||||
Phase.__init__(self, parser, tree)
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
self.startTagHandler = _utils.MethodDispatcher([
|
class AfterAfterFramesetPhase(Phase):
|
||||||
("html", self.startTagHtml),
|
__slots__ = tuple()
|
||||||
("noframes", self.startTagNoFrames)
|
|
||||||
])
|
|
||||||
self.startTagHandler.default = self.startTagOther
|
|
||||||
|
|
||||||
def processEOF(self):
|
def processEOF(self):
|
||||||
pass
|
pass
|
||||||
|
@ -2741,6 +2738,13 @@ def getPhases(debug):
|
||||||
def processEndTag(self, token):
|
def processEndTag(self, token):
|
||||||
self.parser.parseError("expected-eof-but-got-end-tag",
|
self.parser.parseError("expected-eof-but-got-end-tag",
|
||||||
{"name": token["name"]})
|
{"name": token["name"]})
|
||||||
|
|
||||||
|
startTagHandler = _utils.MethodDispatcher([
|
||||||
|
("html", startTagHtml),
|
||||||
|
("noframes", startTagNoFrames)
|
||||||
|
])
|
||||||
|
startTagHandler.default = startTagOther
|
||||||
|
|
||||||
# pylint:enable=unused-argument
|
# pylint:enable=unused-argument
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -2774,8 +2778,8 @@ def getPhases(debug):
|
||||||
def adjust_attributes(token, replacements):
|
def adjust_attributes(token, replacements):
|
||||||
needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
|
needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
|
||||||
if needs_adjustment:
|
if needs_adjustment:
|
||||||
token['data'] = OrderedDict((replacements.get(k, k), v)
|
token['data'] = type(token['data'])((replacements.get(k, k), v)
|
||||||
for k, v in token['data'].items())
|
for k, v in token['data'].items())
|
||||||
|
|
||||||
|
|
||||||
def impliedTagToken(name, type="EndTag", attributes=None,
|
def impliedTagToken(name, type="EndTag", attributes=None,
|
||||||
|
|
|
@ -274,7 +274,7 @@ class HTMLSerializer(object):
|
||||||
if token["systemId"]:
|
if token["systemId"]:
|
||||||
if token["systemId"].find('"') >= 0:
|
if token["systemId"].find('"') >= 0:
|
||||||
if token["systemId"].find("'") >= 0:
|
if token["systemId"].find("'") >= 0:
|
||||||
self.serializeError("System identifer contains both single and double quote characters")
|
self.serializeError("System identifier contains both single and double quote characters")
|
||||||
quote_char = "'"
|
quote_char = "'"
|
||||||
else:
|
else:
|
||||||
quote_char = '"'
|
quote_char = '"'
|
||||||
|
|
|
@ -0,0 +1,433 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "IE_Comments",
|
||||||
|
"input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
|
||||||
|
"output": ""
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "IE_Comments_2",
|
||||||
|
"input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
|
||||||
|
"output": "<script>alert('XSS');</script>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "allow_colons_in_path_component",
|
||||||
|
"input": "<a href=\"./this:that\">foo</a>",
|
||||||
|
"output": "<a href='./this:that'>foo</a>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "background_attribute",
|
||||||
|
"input": "<div background=\"javascript:alert('XSS')\"></div>",
|
||||||
|
"output": "<div></div>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "bgsound",
|
||||||
|
"input": "<bgsound src=\"javascript:alert('XSS');\" />",
|
||||||
|
"output": "<bgsound src=\"javascript:alert('XSS');\"></bgsound>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "div_background_image_unicode_encoded",
|
||||||
|
"input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
|
||||||
|
"output": "<div style=''>foo</div>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "div_expression",
|
||||||
|
"input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
|
||||||
|
"output": "<div style=''>foo</div>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "double_open_angle_brackets",
|
||||||
|
"input": "<img src=http://ha.ckers.org/scriptlet.html <",
|
||||||
|
"output": ""
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "double_open_angle_brackets_2",
|
||||||
|
"input": "<script src=http://ha.ckers.org/scriptlet.html <",
|
||||||
|
"output": ""
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "grave_accents",
|
||||||
|
"input": "<img src=`javascript:alert('XSS')` />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "img_dynsrc_lowsrc",
|
||||||
|
"input": "<img dynsrc=\"javascript:alert('XSS')\" />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "img_vbscript",
|
||||||
|
"input": "<img src='vbscript:msgbox(\"XSS\")' />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "input_image",
|
||||||
|
"input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
|
||||||
|
"output": "<input type='image'/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "link_stylesheets",
|
||||||
|
"input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
|
||||||
|
"output": "<link href=\"javascript:alert('XSS');\" rel=\"stylesheet\">"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "link_stylesheets_2",
|
||||||
|
"input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
|
||||||
|
"output": "<link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\">"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "list_style_image",
|
||||||
|
"input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
|
||||||
|
"output": "<li style=''>foo</li>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "no_closing_script_tags",
|
||||||
|
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
|
||||||
|
"output": "<script src=\"http://ha.ckers.org/xss.js?&lt;b\"></script>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "non_alpha_non_digit",
|
||||||
|
"input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
|
||||||
|
"output": "<script src=\"http://ha.ckers.org/xss.js\" xss=\"\"></script>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "non_alpha_non_digit_2",
|
||||||
|
"input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
|
||||||
|
"output": "<a>foo</a>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "non_alpha_non_digit_3",
|
||||||
|
"input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
|
||||||
|
"output": "<img src='http://ha.ckers.org/xss.js'/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "non_alpha_non_digit_II",
|
||||||
|
"input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
|
||||||
|
"output": "<a>foo</a>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "non_alpha_non_digit_III",
|
||||||
|
"input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
|
||||||
|
"output": "<a>foo</a>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "platypus",
|
||||||
|
"input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
|
||||||
|
"output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "protocol_resolution_in_script_tag",
|
||||||
|
"input": "<script src=//ha.ckers.org/.j></script>",
|
||||||
|
"output": "<script src=\"//ha.ckers.org/.j\"></script>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_allow_anchors",
|
||||||
|
"input": "<a href='foo' onclick='bar'><script>baz</script></a>",
|
||||||
|
"output": "<a href='foo'><script>baz</script></a>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_allow_image_alt_attribute",
|
||||||
|
"input": "<img alt='foo' onclick='bar' />",
|
||||||
|
"output": "<img alt='foo'/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_allow_image_height_attribute",
|
||||||
|
"input": "<img height='foo' onclick='bar' />",
|
||||||
|
"output": "<img height='foo'/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_allow_image_src_attribute",
|
||||||
|
"input": "<img src='foo' onclick='bar' />",
|
||||||
|
"output": "<img src='foo'/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_allow_image_width_attribute",
|
||||||
|
"input": "<img width='foo' onclick='bar' />",
|
||||||
|
"output": "<img width='foo'/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_handle_blank_text",
|
||||||
|
"input": "",
|
||||||
|
"output": ""
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_handle_malformed_image_tags",
|
||||||
|
"input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
|
||||||
|
"output": "<img/><script>alert(\"XSS\")</script>\">"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_handle_non_html",
|
||||||
|
"input": "abc",
|
||||||
|
"output": "abc"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_ridiculous_hack",
|
||||||
|
"input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_0",
|
||||||
|
"input": "<img src=\"javascript:alert('XSS');\" />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_1",
|
||||||
|
"input": "<img src=javascript:alert('XSS') />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_10",
|
||||||
|
"input": "<img src=\"jav
ascript:alert('XSS');\" />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_11",
|
||||||
|
"input": "<img src=\"jav
ascript:alert('XSS');\" />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_12",
|
||||||
|
"input": "<img src=\"  javascript:alert('XSS');\" />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_13",
|
||||||
|
"input": "<img src=\" javascript:alert('XSS');\" />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_14",
|
||||||
|
"input": "<img src=\" javascript:alert('XSS');\" />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_2",
|
||||||
|
"input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_3",
|
||||||
|
"input": "<img src='javascript:alert("XSS")' />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_4",
|
||||||
|
"input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_5",
|
||||||
|
"input": "<img src='javascript:alert('XSS')' />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_6",
|
||||||
|
"input": "<img src='javascript:alert('XSS')' />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_7",
|
||||||
|
"input": "<img src='javascript:alert('XSS')' />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_8",
|
||||||
|
"input": "<img src=\"jav\tascript:alert('XSS');\" />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_not_fall_for_xss_image_hack_9",
|
||||||
|
"input": "<img src=\"jav	ascript:alert('XSS');\" />",
|
||||||
|
"output": "<img/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_sanitize_half_open_scripts",
|
||||||
|
"input": "<img src=\"javascript:alert('XSS')\"",
|
||||||
|
"output": ""
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_sanitize_invalid_script_tag",
|
||||||
|
"input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
|
||||||
|
"output": "<script src=\"http://ha.ckers.org/xss.js\" xss=\"\"></script>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_sanitize_script_tag_with_multiple_open_brackets",
|
||||||
|
"input": "<<script>alert(\"XSS\");//<</script>",
|
||||||
|
"output": "<<script>alert(\"XSS\");//<</script>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
|
||||||
|
"input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
|
||||||
|
"output": ""
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_sanitize_tag_broken_up_by_null",
|
||||||
|
"input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
|
||||||
|
"output": "<scr\ufffdipt>alert(\"XSS\")</scr\ufffdipt>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_sanitize_unclosed_script",
|
||||||
|
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
|
||||||
|
"output": "<script src=\"http://ha.ckers.org/xss.js?&lt;b\"></script>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_strip_href_attribute_in_a_with_bad_protocols",
|
||||||
|
"input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
|
||||||
|
"output": "<a title='1'>boo</a>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
|
||||||
|
"input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
|
||||||
|
"output": "<a title='1'>boo</a>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_strip_src_attribute_in_img_with_bad_protocols",
|
||||||
|
"input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
|
||||||
|
"output": "<img title='1'/>boo"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
|
||||||
|
"input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
|
||||||
|
"output": "<img title='1'/>boo"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "xml_base",
|
||||||
|
"input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
|
||||||
|
"output": "<div>foo</div>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "xul",
|
||||||
|
"input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
|
||||||
|
"output": "<p style=''>fubar</p>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "quotes_in_attributes",
|
||||||
|
"input": "<img src='foo' title='\"foo\" bar' />",
|
||||||
|
"output": "<img src='foo' title='\"foo\" bar'/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "uri_refs_in_svg_attributes",
|
||||||
|
"input": "<svg><rect fill='url(#foo)' />",
|
||||||
|
"output": "<svg><rect fill='url(#foo)'></rect></svg>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "absolute_uri_refs_in_svg_attributes",
|
||||||
|
"input": "<svg><rect fill='url(http://bad.com/) #fff' />",
|
||||||
|
"output": "<svg><rect fill=' #fff'></rect></svg>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "uri_ref_with_space_in svg_attribute",
|
||||||
|
"input": "<svg><rect fill='url(\n#foo)' />",
|
||||||
|
"output": "<svg><rect fill='url(\n#foo)'></rect></svg>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "absolute_uri_ref_with_space_in svg_attribute",
|
||||||
|
"input": "<svg><rect fill=\"url(\nhttp://bad.com/)\" />",
|
||||||
|
"output": "<svg><rect fill=' '></rect></svg>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "allow_html5_image_tag",
|
||||||
|
"input": "<image src='foo' />",
|
||||||
|
"output": "<img src='foo'/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "style_attr_end_with_nothing",
|
||||||
|
"input": "<div style=\"color: blue\" />",
|
||||||
|
"output": "<div style='color: blue;'></div>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "style_attr_end_with_space",
|
||||||
|
"input": "<div style=\"color: blue \" />",
|
||||||
|
"output": "<div style='color: blue ;'></div>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "style_attr_end_with_semicolon",
|
||||||
|
"input": "<div style=\"color: blue;\" />",
|
||||||
|
"output": "<div style='color: blue;'></div>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "style_attr_end_with_semicolon_space",
|
||||||
|
"input": "<div style=\"color: blue; \" />",
|
||||||
|
"output": "<div style='color: blue;'></div>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "attributes_with_embedded_quotes",
|
||||||
|
"input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
|
||||||
|
"output": "<img src='doesntexist.jpg\"'onerror=\"alert(1)'/>"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "attributes_with_embedded_quotes_II",
|
||||||
|
"input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />",
|
||||||
|
"output": "<img src='notthere.jpg\"\"onerror=\"alert(2)'/>"
|
||||||
|
}
|
||||||
|
]
|
|
@ -27,14 +27,15 @@ class SanitizerTest(pytest.Item):
|
||||||
expected = self.test["output"]
|
expected = self.test["output"]
|
||||||
|
|
||||||
parsed = parseFragment(input)
|
parsed = parseFragment(input)
|
||||||
serialized = serialize(parsed,
|
with pytest.deprecated_call():
|
||||||
sanitize=True,
|
serialized = serialize(parsed,
|
||||||
omit_optional_tags=False,
|
sanitize=True,
|
||||||
use_trailing_solidus=True,
|
omit_optional_tags=False,
|
||||||
space_before_trailing_solidus=False,
|
use_trailing_solidus=True,
|
||||||
quote_attr_values="always",
|
space_before_trailing_solidus=False,
|
||||||
quote_char="'",
|
quote_attr_values="always",
|
||||||
alphabetical_attributes=True)
|
quote_char="'",
|
||||||
|
alphabetical_attributes=True)
|
||||||
errorMsg = "\n".join(["\n\nInput:", input,
|
errorMsg = "\n".join(["\n\nInput:", input,
|
||||||
"\nExpected:", expected,
|
"\nExpected:", expected,
|
||||||
"\nReceived:", serialized])
|
"\nReceived:", serialized])
|
||||||
|
|
|
@ -0,0 +1,395 @@
|
||||||
|
{
|
||||||
|
"tests": [
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title='test \"with\" &quot;'>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "test \"with\" ""
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value escaping"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=foo>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value non-quoting"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=\"foo<bar\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo<bar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value non-quoting (with <)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=\"foo=bar\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo=bar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value quoting (with =)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=\"foo>bar\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo>bar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value quoting (with >)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title='foo\"bar'>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo\"bar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value quoting (with \")"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=\"foo'bar\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo'bar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value quoting (with ')"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=\"foo'bar"baz\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo'bar\"baz"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value quoting (with both \" and ')"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=\"foo bar\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo bar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value quoting (with space)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=\"foo\tbar\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo\tbar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value quoting (with tab)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=\"foo\nbar\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo\nbar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value quoting (with LF)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=\"foo\rbar\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo\rbar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value quoting (with CR)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=\"foo\u000bbar\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo\u000bbar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value non-quoting (with linetab)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title=\"foo\fbar\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "foo\fbar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "proper attribute value quoting (with form feed)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<img>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"EmptyTag",
|
||||||
|
"img",
|
||||||
|
{}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "void element (as EmptyTag token)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<!DOCTYPE foo>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"Doctype",
|
||||||
|
"foo"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "doctype in error"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"a<b>c&d"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"a<b>c&d"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "character data",
|
||||||
|
"options": {
|
||||||
|
"encoding": "utf-8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<script>a<b>c&d"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"script",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"a<b>c&d"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "rcdata"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<!DOCTYPE HTML>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"Doctype",
|
||||||
|
"HTML"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "doctype"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"Doctype",
|
||||||
|
"HTML",
|
||||||
|
"-//W3C//DTD HTML 4.01//EN",
|
||||||
|
"http://www.w3.org/TR/html4/strict.dtd"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "HTML 4.01 DOCTYPE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"Doctype",
|
||||||
|
"HTML",
|
||||||
|
"-//W3C//DTD HTML 4.01//EN"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "HTML 4.01 DOCTYPE without system identifier"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"Doctype",
|
||||||
|
"html",
|
||||||
|
"",
|
||||||
|
"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "IBM DOCTYPE without public identifier"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,350 @@
|
||||||
|
{
|
||||||
|
"tests": [
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "no encoding",
|
||||||
|
"options": {
|
||||||
|
"inject_meta_charset": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<meta charset=utf-8>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "empytag head",
|
||||||
|
"options": {
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"inject_meta_charset": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<meta charset=utf-8><title>foo</title>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"title",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"foo"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"title"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "head w/title",
|
||||||
|
"options": {
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"inject_meta_charset": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<meta charset=utf-8>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EmptyTag",
|
||||||
|
"meta",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "charset",
|
||||||
|
"value": "ascii"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "head w/meta-charset",
|
||||||
|
"options": {
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"inject_meta_charset": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<meta charset=utf-8><meta charset=utf-8>",
|
||||||
|
"<head><meta charset=utf-8><meta charset=ascii>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EmptyTag",
|
||||||
|
"meta",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "charset",
|
||||||
|
"value": "ascii"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EmptyTag",
|
||||||
|
"meta",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "charset",
|
||||||
|
"value": "ascii"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "head w/ two meta-charset",
|
||||||
|
"options": {
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"inject_meta_charset": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<meta charset=utf-8><meta content=noindex name=robots>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EmptyTag",
|
||||||
|
"meta",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "name",
|
||||||
|
"value": "robots"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "content",
|
||||||
|
"value": "noindex"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "head w/robots",
|
||||||
|
"options": {
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"inject_meta_charset": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<meta content=noindex name=robots><meta charset=utf-8>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EmptyTag",
|
||||||
|
"meta",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "name",
|
||||||
|
"value": "robots"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "content",
|
||||||
|
"value": "noindex"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EmptyTag",
|
||||||
|
"meta",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "charset",
|
||||||
|
"value": "ascii"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "head w/robots & charset",
|
||||||
|
"options": {
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"inject_meta_charset": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EmptyTag",
|
||||||
|
"meta",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "http-equiv",
|
||||||
|
"value": "content-type"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "content",
|
||||||
|
"value": "text/html; charset=ascii"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "head w/ charset in http-equiv content-type",
|
||||||
|
"options": {
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"inject_meta_charset": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EmptyTag",
|
||||||
|
"meta",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "name",
|
||||||
|
"value": "robots"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "content",
|
||||||
|
"value": "noindex"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EmptyTag",
|
||||||
|
"meta",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "http-equiv",
|
||||||
|
"value": "content-type"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "content",
|
||||||
|
"value": "text/html; charset=ascii"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"head"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "head w/robots & charset in http-equiv content-type",
|
||||||
|
"options": {
|
||||||
|
"encoding": "utf-8",
|
||||||
|
"inject_meta_charset": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,334 @@
|
||||||
|
{
|
||||||
|
"tests": [
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<span title='test 'with' quote_char'>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "test 'with' quote_char"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "quote_char=\"'\"",
|
||||||
|
"options": {
|
||||||
|
"quote_char": "'"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<button disabled>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"button",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "disabled",
|
||||||
|
"value": "disabled"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "quote_attr_values='always'",
|
||||||
|
"options": {
|
||||||
|
"quote_attr_values": "always"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<div itemscope>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"div",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "itemscope",
|
||||||
|
"value": "itemscope"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "quote_attr_values='always' with itemscope",
|
||||||
|
"options": {
|
||||||
|
"quote_attr_values": "always"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<div irrelevant>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"div",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "irrelevant",
|
||||||
|
"value": "irrelevant"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "quote_attr_values='always' with irrelevant",
|
||||||
|
"options": {
|
||||||
|
"quote_attr_values": "always"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<div class=\"foo\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"div",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "class",
|
||||||
|
"value": "foo"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "non-minimized quote_attr_values='always'",
|
||||||
|
"options": {
|
||||||
|
"quote_attr_values": "always"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<div class=foo>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"div",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "class",
|
||||||
|
"value": "foo"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "non-minimized quote_attr_values='legacy'",
|
||||||
|
"options": {
|
||||||
|
"quote_attr_values": "legacy"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<div class=foo>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"div",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "class",
|
||||||
|
"value": "foo"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "non-minimized quote_attr_values='spec'",
|
||||||
|
"options": {
|
||||||
|
"quote_attr_values": "spec"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<img />"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"EmptyTag",
|
||||||
|
"img",
|
||||||
|
{}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "use_trailing_solidus=true with void element",
|
||||||
|
"options": {
|
||||||
|
"use_trailing_solidus": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<div>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"div",
|
||||||
|
{}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "use_trailing_solidus=true with non-void element",
|
||||||
|
"options": {
|
||||||
|
"use_trailing_solidus": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<div itemscope=itemscope>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"div",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "itemscope",
|
||||||
|
"value": "itemscope"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "minimize_boolean_attributes=false",
|
||||||
|
"options": {
|
||||||
|
"minimize_boolean_attributes": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<div irrelevant=irrelevant>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"div",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "irrelevant",
|
||||||
|
"value": "irrelevant"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "minimize_boolean_attributes=false",
|
||||||
|
"options": {
|
||||||
|
"minimize_boolean_attributes": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<div itemscope=\"\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"div",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "itemscope",
|
||||||
|
"value": ""
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "minimize_boolean_attributes=false with empty value",
|
||||||
|
"options": {
|
||||||
|
"minimize_boolean_attributes": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<div irrelevant=\"\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"div",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "irrelevant",
|
||||||
|
"value": ""
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "minimize_boolean_attributes=false with empty value",
|
||||||
|
"options": {
|
||||||
|
"minimize_boolean_attributes": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<a title=\"a<b>c&d\">"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"a",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"namespace": null,
|
||||||
|
"name": "title",
|
||||||
|
"value": "a<b>c&d"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "escape less than signs in attribute values",
|
||||||
|
"options": {
|
||||||
|
"escape_lt_in_attrs": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<script>a<b>c&d"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"script",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"a<b>c&d"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "rcdata",
|
||||||
|
"options": {
|
||||||
|
"escape_rcdata": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,198 @@
|
||||||
|
{
|
||||||
|
"tests": [
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
" foo"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"\t\r\n\f foo"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "bare text with leading spaces",
|
||||||
|
"options": {
|
||||||
|
"strip_whitespace": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"foo "
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"foo \t\r\n\f"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "bare text with trailing spaces",
|
||||||
|
"options": {
|
||||||
|
"strip_whitespace": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"foo bar"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"foo \t\r\n\f bar"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "bare text with inner spaces",
|
||||||
|
"options": {
|
||||||
|
"strip_whitespace": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<pre>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</pre>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"pre",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"pre"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "text within <pre>",
|
||||||
|
"options": {
|
||||||
|
"strip_whitespace": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<pre>\t\r\n\f fo<span>o \t\r\n\f b</span>ar \t\r\n\f</pre>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"pre",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"\t\r\n\f fo"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"o \t\r\n\f b"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"span"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"ar \t\r\n\f"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"pre"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "text within <pre>, with inner markup",
|
||||||
|
"options": {
|
||||||
|
"strip_whitespace": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<textarea>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</textarea>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"textarea",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"textarea"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "text within <textarea>",
|
||||||
|
"options": {
|
||||||
|
"strip_whitespace": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<script>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</script>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"script",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"script"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "text within <script>",
|
||||||
|
"options": {
|
||||||
|
"strip_whitespace": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expected": [
|
||||||
|
"<style>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</style>"
|
||||||
|
],
|
||||||
|
"input": [
|
||||||
|
[
|
||||||
|
"StartTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"style",
|
||||||
|
{}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"Characters",
|
||||||
|
"\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"EndTag",
|
||||||
|
"http://www.w3.org/1999/xhtml",
|
||||||
|
"style"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"description": "text within <style>",
|
||||||
|
"options": {
|
||||||
|
"strip_whitespace": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -143,11 +143,12 @@ def convert(stripChars):
|
||||||
return "\n".join(rv)
|
return "\n".join(rv)
|
||||||
return convertData
|
return convertData
|
||||||
|
|
||||||
|
|
||||||
convertExpected = convert(2)
|
convertExpected = convert(2)
|
||||||
|
|
||||||
|
|
||||||
def errorMessage(input, expected, actual):
|
def errorMessage(input, expected, actual):
|
||||||
msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" %
|
msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" %
|
||||||
(repr(input), repr(expected), repr(actual)))
|
(repr(input), repr(expected), repr(actual)))
|
||||||
if sys.version_info[0] == 2:
|
if sys.version_info[0] == 2:
|
||||||
msg = msg.encode("ascii", "backslashreplace")
|
msg = msg.encode("ascii", "backslashreplace")
|
||||||
|
|
|
@ -75,7 +75,15 @@ def test_parser_args_raises(kwargs):
|
||||||
assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
|
assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
|
||||||
|
|
||||||
|
|
||||||
def runParserEncodingTest(data, encoding):
|
def param_encoding():
|
||||||
|
for filename in get_data_files("encoding"):
|
||||||
|
tests = _TestData(filename, b"data", encoding=None)
|
||||||
|
for test in tests:
|
||||||
|
yield test[b'data'], test[b'encoding']
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("data, encoding", param_encoding())
|
||||||
|
def test_parser_encoding(data, encoding):
|
||||||
p = HTMLParser()
|
p = HTMLParser()
|
||||||
assert p.documentEncoding is None
|
assert p.documentEncoding is None
|
||||||
p.parse(data, useChardet=False)
|
p.parse(data, useChardet=False)
|
||||||
|
@ -84,7 +92,8 @@ def runParserEncodingTest(data, encoding):
|
||||||
assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
|
assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
|
||||||
|
|
||||||
|
|
||||||
def runPreScanEncodingTest(data, encoding):
|
@pytest.mark.parametrize("data, encoding", param_encoding())
|
||||||
|
def test_prescan_encoding(data, encoding):
|
||||||
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
|
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
|
||||||
encoding = encoding.lower().decode("ascii")
|
encoding = encoding.lower().decode("ascii")
|
||||||
|
|
||||||
|
@ -95,14 +104,6 @@ def runPreScanEncodingTest(data, encoding):
|
||||||
assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
|
assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
|
||||||
|
|
||||||
|
|
||||||
def test_encoding():
|
|
||||||
for filename in get_data_files("encoding"):
|
|
||||||
tests = _TestData(filename, b"data", encoding=None)
|
|
||||||
for test in tests:
|
|
||||||
yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
|
|
||||||
yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
|
|
||||||
|
|
||||||
|
|
||||||
# pylint:disable=wrong-import-position
|
# pylint:disable=wrong-import-position
|
||||||
try:
|
try:
|
||||||
import chardet # noqa
|
import chardet # noqa
|
||||||
|
|
|
@ -28,10 +28,10 @@ def test_errorMessage():
|
||||||
|
|
||||||
# Assertions!
|
# Assertions!
|
||||||
if six.PY2:
|
if six.PY2:
|
||||||
assert b"Input:\n1\nExpected:\n2\nRecieved\n3\n" == r
|
assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
|
||||||
else:
|
else:
|
||||||
assert six.PY3
|
assert six.PY3
|
||||||
assert "Input:\n1\nExpected:\n2\nRecieved\n3\n" == r
|
assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
|
||||||
|
|
||||||
assert input.__repr__.call_count == 1
|
assert input.__repr__.call_count == 1
|
||||||
assert expected.__repr__.call_count == 1
|
assert expected.__repr__.call_count == 1
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
from six import PY2, text_type, unichr
|
from six import PY2, text_type
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
|
||||||
from . import support # noqa
|
from . import support # noqa
|
||||||
|
|
||||||
from html5lib.constants import namespaces, tokenTypes
|
from html5lib.constants import namespaces
|
||||||
from html5lib import parse, parseFragment, HTMLParser
|
from html5lib import parse, parseFragment, HTMLParser
|
||||||
|
|
||||||
|
|
||||||
|
@ -53,42 +53,6 @@ def test_unicode_file():
|
||||||
assert parse(io.StringIO("a")) is not None
|
assert parse(io.StringIO("a")) is not None
|
||||||
|
|
||||||
|
|
||||||
def test_maintain_attribute_order():
|
|
||||||
# This is here because we impl it in parser and not tokenizer
|
|
||||||
p = HTMLParser()
|
|
||||||
# generate loads to maximize the chance a hash-based mutation will occur
|
|
||||||
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
|
|
||||||
token = {'name': 'html',
|
|
||||||
'selfClosing': False,
|
|
||||||
'selfClosingAcknowledged': False,
|
|
||||||
'type': tokenTypes["StartTag"],
|
|
||||||
'data': attrs}
|
|
||||||
out = p.normalizeToken(token)
|
|
||||||
attr_order = list(out["data"].keys())
|
|
||||||
assert attr_order == [x for x, i in attrs]
|
|
||||||
|
|
||||||
|
|
||||||
def test_duplicate_attribute():
|
|
||||||
# This is here because we impl it in parser and not tokenizer
|
|
||||||
doc = parse('<p class=a class=b>')
|
|
||||||
el = doc[1][0]
|
|
||||||
assert el.get("class") == "a"
|
|
||||||
|
|
||||||
|
|
||||||
def test_maintain_duplicate_attribute_order():
|
|
||||||
# This is here because we impl it in parser and not tokenizer
|
|
||||||
p = HTMLParser()
|
|
||||||
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
|
|
||||||
token = {'name': 'html',
|
|
||||||
'selfClosing': False,
|
|
||||||
'selfClosingAcknowledged': False,
|
|
||||||
'type': tokenTypes["StartTag"],
|
|
||||||
'data': attrs + [('a', len(attrs))]}
|
|
||||||
out = p.normalizeToken(token)
|
|
||||||
attr_order = list(out["data"].keys())
|
|
||||||
assert attr_order == [x for x, i in attrs]
|
|
||||||
|
|
||||||
|
|
||||||
def test_debug_log():
|
def test_debug_log():
|
||||||
parser = HTMLParser(debug=True)
|
parser = HTMLParser(debug=True)
|
||||||
parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
|
parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
|
||||||
|
|
|
@ -1,31 +1,22 @@
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from html5lib import constants, parseFragment, serialize
|
from html5lib import constants, parseFragment, serialize
|
||||||
from html5lib.filters import sanitizer
|
from html5lib.filters import sanitizer
|
||||||
|
|
||||||
|
|
||||||
def runSanitizerTest(_, expected, input):
|
|
||||||
parsed = parseFragment(expected)
|
|
||||||
expected = serialize(parsed,
|
|
||||||
omit_optional_tags=False,
|
|
||||||
use_trailing_solidus=True,
|
|
||||||
space_before_trailing_solidus=False,
|
|
||||||
quote_attr_values="always",
|
|
||||||
quote_char='"',
|
|
||||||
alphabetical_attributes=True)
|
|
||||||
assert expected == sanitize_html(input)
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_html(stream):
|
def sanitize_html(stream):
|
||||||
parsed = parseFragment(stream)
|
parsed = parseFragment(stream)
|
||||||
serialized = serialize(parsed,
|
with pytest.deprecated_call():
|
||||||
sanitize=True,
|
serialized = serialize(parsed,
|
||||||
omit_optional_tags=False,
|
sanitize=True,
|
||||||
use_trailing_solidus=True,
|
omit_optional_tags=False,
|
||||||
space_before_trailing_solidus=False,
|
use_trailing_solidus=True,
|
||||||
quote_attr_values="always",
|
space_before_trailing_solidus=False,
|
||||||
quote_char='"',
|
quote_attr_values="always",
|
||||||
alphabetical_attributes=True)
|
quote_char='"',
|
||||||
|
alphabetical_attributes=True)
|
||||||
return serialized
|
return serialized
|
||||||
|
|
||||||
|
|
||||||
|
@ -59,7 +50,7 @@ def test_data_uri_disallowed_type():
|
||||||
assert expected == sanitized
|
assert expected == sanitized
|
||||||
|
|
||||||
|
|
||||||
def test_sanitizer():
|
def param_sanitizer():
|
||||||
for ns, tag_name in sanitizer.allowed_elements:
|
for ns, tag_name in sanitizer.allowed_elements:
|
||||||
if ns != constants.namespaces["html"]:
|
if ns != constants.namespaces["html"]:
|
||||||
continue
|
continue
|
||||||
|
@ -67,19 +58,19 @@ def test_sanitizer():
|
||||||
'tfoot', 'th', 'thead', 'tr', 'select']:
|
'tfoot', 'th', 'thead', 'tr', 'select']:
|
||||||
continue # TODO
|
continue # TODO
|
||||||
if tag_name == 'image':
|
if tag_name == 'image':
|
||||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
yield ("test_should_allow_%s_tag" % tag_name,
|
||||||
"<img title=\"1\"/>foo <bad>bar</bad> baz",
|
"<img title=\"1\"/>foo <bad>bar</bad> baz",
|
||||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
||||||
elif tag_name == 'br':
|
elif tag_name == 'br':
|
||||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
yield ("test_should_allow_%s_tag" % tag_name,
|
||||||
"<br title=\"1\"/>foo <bad>bar</bad> baz<br/>",
|
"<br title=\"1\"/>foo <bad>bar</bad> baz<br/>",
|
||||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
||||||
elif tag_name in constants.voidElements:
|
elif tag_name in constants.voidElements:
|
||||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
yield ("test_should_allow_%s_tag" % tag_name,
|
||||||
"<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name,
|
"<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name,
|
||||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
||||||
else:
|
else:
|
||||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
yield ("test_should_allow_%s_tag" % tag_name,
|
||||||
"<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
|
"<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
|
||||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
||||||
|
|
||||||
|
@ -93,7 +84,7 @@ def test_sanitizer():
|
||||||
attribute_value = 'foo'
|
attribute_value = 'foo'
|
||||||
if attribute_name in sanitizer.attr_val_is_uri:
|
if attribute_name in sanitizer.attr_val_is_uri:
|
||||||
attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
|
attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
|
||||||
yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
|
yield ("test_should_allow_%s_attribute" % attribute_name,
|
||||||
"<p %s=\"%s\">foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value),
|
"<p %s=\"%s\">foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value),
|
||||||
"<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
|
"<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
|
||||||
|
|
||||||
|
@ -101,7 +92,7 @@ def test_sanitizer():
|
||||||
rest_of_uri = '//sub.domain.tld/path/object.ext'
|
rest_of_uri = '//sub.domain.tld/path/object.ext'
|
||||||
if protocol == 'data':
|
if protocol == 'data':
|
||||||
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
|
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
|
||||||
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
|
yield ("test_should_allow_uppercase_%s_uris" % protocol,
|
||||||
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
|
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
|
||||||
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
|
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
|
||||||
|
|
||||||
|
@ -110,11 +101,26 @@ def test_sanitizer():
|
||||||
if protocol == 'data':
|
if protocol == 'data':
|
||||||
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
|
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
|
||||||
protocol = protocol.upper()
|
protocol = protocol.upper()
|
||||||
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
|
yield ("test_should_allow_uppercase_%s_uris" % protocol,
|
||||||
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
|
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
|
||||||
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
|
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("expected, input",
|
||||||
|
(pytest.param(expected, input, id=id)
|
||||||
|
for id, expected, input in param_sanitizer()))
|
||||||
|
def test_sanitizer(expected, input):
|
||||||
|
parsed = parseFragment(expected)
|
||||||
|
expected = serialize(parsed,
|
||||||
|
omit_optional_tags=False,
|
||||||
|
use_trailing_solidus=True,
|
||||||
|
space_before_trailing_solidus=False,
|
||||||
|
quote_attr_values="always",
|
||||||
|
quote_char='"',
|
||||||
|
alphabetical_attributes=True)
|
||||||
|
assert expected == sanitize_html(input)
|
||||||
|
|
||||||
|
|
||||||
def test_lowercase_color_codes_in_style():
|
def test_lowercase_color_codes_in_style():
|
||||||
sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
|
sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
|
||||||
expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'
|
expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'
|
||||||
|
|
|
@ -80,7 +80,7 @@ class JsonWalker(TreeWalker):
|
||||||
|
|
||||||
|
|
||||||
def serialize_html(input, options):
|
def serialize_html(input, options):
|
||||||
options = dict([(str(k), v) for k, v in options.items()])
|
options = {str(k): v for k, v in options.items()}
|
||||||
encoding = options.get("encoding", None)
|
encoding = options.get("encoding", None)
|
||||||
if "encoding" in options:
|
if "encoding" in options:
|
||||||
del options["encoding"]
|
del options["encoding"]
|
||||||
|
@ -89,19 +89,6 @@ def serialize_html(input, options):
|
||||||
return serializer.render(stream, encoding)
|
return serializer.render(stream, encoding)
|
||||||
|
|
||||||
|
|
||||||
def runSerializerTest(input, expected, options):
|
|
||||||
encoding = options.get("encoding", None)
|
|
||||||
|
|
||||||
if encoding:
|
|
||||||
expected = list(map(lambda x: x.encode(encoding), expected))
|
|
||||||
|
|
||||||
result = serialize_html(input, options)
|
|
||||||
if len(expected) == 1:
|
|
||||||
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
|
|
||||||
elif result not in expected:
|
|
||||||
assert False, "Expected: %s, Received: %s" % (expected, result)
|
|
||||||
|
|
||||||
|
|
||||||
def throwsWithLatin1(input):
|
def throwsWithLatin1(input):
|
||||||
with pytest.raises(UnicodeEncodeError):
|
with pytest.raises(UnicodeEncodeError):
|
||||||
serialize_html(input, {"encoding": "iso-8859-1"})
|
serialize_html(input, {"encoding": "iso-8859-1"})
|
||||||
|
@ -120,13 +107,13 @@ def testDoctypeSystemId():
|
||||||
|
|
||||||
|
|
||||||
def testCdataCharacters():
|
def testCdataCharacters():
|
||||||
runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
|
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
|
||||||
["<style>ā"], {"encoding": "iso-8859-1"})
|
["<style>ā"], {"encoding": "iso-8859-1"})
|
||||||
|
|
||||||
|
|
||||||
def testCharacters():
|
def testCharacters():
|
||||||
runSerializerTest([["Characters", "\u0101"]],
|
test_serializer([["Characters", "\u0101"]],
|
||||||
["ā"], {"encoding": "iso-8859-1"})
|
["ā"], {"encoding": "iso-8859-1"})
|
||||||
|
|
||||||
|
|
||||||
def testStartTagName():
|
def testStartTagName():
|
||||||
|
@ -138,9 +125,9 @@ def testAttributeName():
|
||||||
|
|
||||||
|
|
||||||
def testAttributeValue():
|
def testAttributeValue():
|
||||||
runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span",
|
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span",
|
||||||
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
|
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
|
||||||
["<span potato=ā>"], {"encoding": "iso-8859-1"})
|
["<span potato=ā>"], {"encoding": "iso-8859-1"})
|
||||||
|
|
||||||
|
|
||||||
def testEndTagName():
|
def testEndTagName():
|
||||||
|
@ -165,7 +152,7 @@ def testSpecQuoteAttribute(c):
|
||||||
else:
|
else:
|
||||||
output_ = ['<span foo="%s">' % c]
|
output_ = ['<span foo="%s">' % c]
|
||||||
options_ = {"quote_attr_values": "spec"}
|
options_ = {"quote_attr_values": "spec"}
|
||||||
runSerializerTest(input_, output_, options_)
|
test_serializer(input_, output_, options_)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
|
@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
|
||||||
|
@ -184,7 +171,7 @@ def testLegacyQuoteAttribute(c):
|
||||||
else:
|
else:
|
||||||
output_ = ['<span foo="%s">' % c]
|
output_ = ['<span foo="%s">' % c]
|
||||||
options_ = {"quote_attr_values": "legacy"}
|
options_ = {"quote_attr_values": "legacy"}
|
||||||
runSerializerTest(input_, output_, options_)
|
test_serializer(input_, output_, options_)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
@ -217,9 +204,23 @@ def testEntityNoResolve(lxml_parser):
|
||||||
assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>'
|
assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>'
|
||||||
|
|
||||||
|
|
||||||
def test_serializer():
|
def param_serializer():
|
||||||
for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
|
for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
|
||||||
with open(filename) as fp:
|
with open(filename) as fp:
|
||||||
tests = json.load(fp)
|
tests = json.load(fp)
|
||||||
for test in tests['tests']:
|
for test in tests['tests']:
|
||||||
yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
|
yield test["input"], test["expected"], test.get("options", {})
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("input, expected, options", param_serializer())
|
||||||
|
def test_serializer(input, expected, options):
|
||||||
|
encoding = options.get("encoding", None)
|
||||||
|
|
||||||
|
if encoding:
|
||||||
|
expected = list(map(lambda x: x.encode(encoding), expected))
|
||||||
|
|
||||||
|
result = serialize_html(input, options)
|
||||||
|
if len(expected) == 1:
|
||||||
|
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
|
||||||
|
elif result not in expected:
|
||||||
|
assert False, "Expected: %s, Received: %s" % (expected, result)
|
||||||
|
|
|
@ -308,9 +308,11 @@ def test_invalid_codepoints(inp, num):
|
||||||
("'\\uD800\\uD800\\uD800'", 3),
|
("'\\uD800\\uD800\\uD800'", 3),
|
||||||
("'a\\uD800a\\uD800a\\uD800a'", 3),
|
("'a\\uD800a\\uD800a\\uD800a'", 3),
|
||||||
("'\\uDFFF\\uDBFF'", 2),
|
("'\\uDFFF\\uDBFF'", 2),
|
||||||
pytest.mark.skipif(sys.maxunicode == 0xFFFF,
|
pytest.param(
|
||||||
("'\\uDBFF\\uDFFF'", 2),
|
"'\\uDBFF\\uDFFF'", 2,
|
||||||
reason="narrow Python")])
|
marks=pytest.mark.skipif(
|
||||||
|
sys.maxunicode == 0xFFFF,
|
||||||
|
reason="narrow Python"))])
|
||||||
def test_invalid_codepoints_surrogates(inp, num):
|
def test_invalid_codepoints_surrogates(inp, num):
|
||||||
inp = eval(inp) # pylint:disable=eval-used
|
inp = eval(inp) # pylint:disable=eval-used
|
||||||
fp = StringIO(inp)
|
fp = StringIO(inp)
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
import io
|
||||||
|
|
||||||
|
from six import unichr, text_type
|
||||||
|
|
||||||
|
from html5lib._tokenizer import HTMLTokenizer
|
||||||
|
from html5lib.constants import tokenTypes
|
||||||
|
|
||||||
|
|
||||||
|
def ignore_parse_errors(toks):
|
||||||
|
for tok in toks:
|
||||||
|
if tok['type'] != tokenTypes['ParseError']:
|
||||||
|
yield tok
|
||||||
|
|
||||||
|
|
||||||
|
def test_maintain_attribute_order():
|
||||||
|
# generate loads to maximize the chance a hash-based mutation will occur
|
||||||
|
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
|
||||||
|
stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">")
|
||||||
|
|
||||||
|
toks = HTMLTokenizer(stream)
|
||||||
|
out = list(ignore_parse_errors(toks))
|
||||||
|
|
||||||
|
assert len(out) == 1
|
||||||
|
assert out[0]['type'] == tokenTypes['StartTag']
|
||||||
|
|
||||||
|
attrs_tok = out[0]['data']
|
||||||
|
assert len(attrs_tok) == len(attrs)
|
||||||
|
|
||||||
|
for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
|
||||||
|
assert in_name == out_name
|
||||||
|
assert in_value == out_value
|
||||||
|
|
||||||
|
|
||||||
|
def test_duplicate_attribute():
|
||||||
|
stream = io.StringIO("<span a=1 a=2 a=3>")
|
||||||
|
|
||||||
|
toks = HTMLTokenizer(stream)
|
||||||
|
out = list(ignore_parse_errors(toks))
|
||||||
|
|
||||||
|
assert len(out) == 1
|
||||||
|
assert out[0]['type'] == tokenTypes['StartTag']
|
||||||
|
|
||||||
|
attrs_tok = out[0]['data']
|
||||||
|
assert len(attrs_tok) == 1
|
||||||
|
assert list(attrs_tok.items()) == [('a', '1')]
|
||||||
|
|
||||||
|
|
||||||
|
def test_maintain_duplicate_attribute_order():
|
||||||
|
# generate loads to maximize the chance a hash-based mutation will occur
|
||||||
|
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
|
||||||
|
stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>")
|
||||||
|
|
||||||
|
toks = HTMLTokenizer(stream)
|
||||||
|
out = list(ignore_parse_errors(toks))
|
||||||
|
|
||||||
|
assert len(out) == 1
|
||||||
|
assert out[0]['type'] == tokenTypes['StartTag']
|
||||||
|
|
||||||
|
attrs_tok = out[0]['data']
|
||||||
|
assert len(attrs_tok) == len(attrs)
|
||||||
|
|
||||||
|
for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
|
||||||
|
assert in_name == out_name
|
||||||
|
assert in_value == out_value
|
|
@ -1,7 +1,9 @@
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from six import unichr, text_type
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -61,24 +63,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
|
||||||
setter['ElementTree'](docfrag)(name, value)
|
setter['ElementTree'](docfrag)(name, value)
|
||||||
|
|
||||||
|
|
||||||
def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
|
def param_treewalker_six_mix():
|
||||||
"""tests what happens when we add attributes to the intext"""
|
|
||||||
treeName, treeClass = tree
|
|
||||||
if treeClass is None:
|
|
||||||
pytest.skip("Treebuilder not loaded")
|
|
||||||
parser = html5parser.HTMLParser(tree=treeClass["builder"])
|
|
||||||
document = parser.parseFragment(intext)
|
|
||||||
for nom, val in attrs_to_add:
|
|
||||||
set_attribute_on_first_child(document, nom, val, treeName)
|
|
||||||
|
|
||||||
document = treeClass.get("adapter", lambda x: x)(document)
|
|
||||||
output = treewalkers.pprint(treeClass["walker"](document))
|
|
||||||
output = attrlist.sub(sortattrs, output)
|
|
||||||
if output not in expected:
|
|
||||||
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
|
|
||||||
|
|
||||||
|
|
||||||
def test_treewalker_six_mix():
|
|
||||||
"""Str/Unicode mix. If str attrs added to tree"""
|
"""Str/Unicode mix. If str attrs added to tree"""
|
||||||
|
|
||||||
# On Python 2.x string literals are of type str. Unless, like this
|
# On Python 2.x string literals are of type str. Unless, like this
|
||||||
|
@ -99,7 +84,25 @@ def test_treewalker_six_mix():
|
||||||
|
|
||||||
for tree in sorted(treeTypes.items()):
|
for tree in sorted(treeTypes.items()):
|
||||||
for intext, attrs, expected in sm_tests:
|
for intext, attrs, expected in sm_tests:
|
||||||
yield runTreewalkerEditTest, intext, expected, attrs, tree
|
yield intext, expected, attrs, tree
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
|
||||||
|
def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
|
||||||
|
"""tests what happens when we add attributes to the intext"""
|
||||||
|
treeName, treeClass = tree
|
||||||
|
if treeClass is None:
|
||||||
|
pytest.skip("Treebuilder not loaded")
|
||||||
|
parser = html5parser.HTMLParser(tree=treeClass["builder"])
|
||||||
|
document = parser.parseFragment(intext)
|
||||||
|
for nom, val in attrs_to_add:
|
||||||
|
set_attribute_on_first_child(document, nom, val, treeName)
|
||||||
|
|
||||||
|
document = treeClass.get("adapter", lambda x: x)(document)
|
||||||
|
output = treewalkers.pprint(treeClass["walker"](document))
|
||||||
|
output = attrlist.sub(sortattrs, output)
|
||||||
|
if output not in expected:
|
||||||
|
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
|
@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
|
||||||
|
@ -134,3 +137,69 @@ def test_lxml_xml():
|
||||||
output = Lint(walker(lxmltree))
|
output = Lint(walker(lxmltree))
|
||||||
|
|
||||||
assert list(output) == expected
|
assert list(output) == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("treeName",
|
||||||
|
[pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
|
||||||
|
pytest.mark.skipif(
|
||||||
|
treeName != "lxml" or
|
||||||
|
sys.version_info < (3, 7), reason="dict order undef")])
|
||||||
|
for treeName in sorted(treeTypes.keys())])
|
||||||
|
def test_maintain_attribute_order(treeName):
|
||||||
|
treeAPIs = treeTypes[treeName]
|
||||||
|
if treeAPIs is None:
|
||||||
|
pytest.skip("Treebuilder not loaded")
|
||||||
|
|
||||||
|
# generate loads to maximize the chance a hash-based mutation will occur
|
||||||
|
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
|
||||||
|
data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"
|
||||||
|
|
||||||
|
parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
|
||||||
|
document = parser.parseFragment(data)
|
||||||
|
|
||||||
|
document = treeAPIs.get("adapter", lambda x: x)(document)
|
||||||
|
output = list(Lint(treeAPIs["walker"](document)))
|
||||||
|
|
||||||
|
assert len(output) == 2
|
||||||
|
assert output[0]['type'] == 'StartTag'
|
||||||
|
assert output[1]['type'] == "EndTag"
|
||||||
|
|
||||||
|
attrs_out = output[0]['data']
|
||||||
|
assert len(attrs) == len(attrs_out)
|
||||||
|
|
||||||
|
for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_out.items()):
|
||||||
|
assert (None, in_name) == out_name
|
||||||
|
assert in_value == out_value
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("treeName",
|
||||||
|
[pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
|
||||||
|
pytest.mark.skipif(
|
||||||
|
treeName != "lxml" or
|
||||||
|
sys.version_info < (3, 7), reason="dict order undef")])
|
||||||
|
for treeName in sorted(treeTypes.keys())])
|
||||||
|
def test_maintain_attribute_order_adjusted(treeName):
|
||||||
|
treeAPIs = treeTypes[treeName]
|
||||||
|
if treeAPIs is None:
|
||||||
|
pytest.skip("Treebuilder not loaded")
|
||||||
|
|
||||||
|
# generate loads to maximize the chance a hash-based mutation will occur
|
||||||
|
data = "<svg a=1 refx=2 b=3 xml:lang=4 c=5>"
|
||||||
|
|
||||||
|
parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
|
||||||
|
document = parser.parseFragment(data)
|
||||||
|
|
||||||
|
document = treeAPIs.get("adapter", lambda x: x)(document)
|
||||||
|
output = list(Lint(treeAPIs["walker"](document)))
|
||||||
|
|
||||||
|
assert len(output) == 2
|
||||||
|
assert output[0]['type'] == 'StartTag'
|
||||||
|
assert output[1]['type'] == "EndTag"
|
||||||
|
|
||||||
|
attrs_out = output[0]['data']
|
||||||
|
|
||||||
|
assert list(attrs_out.items()) == [((None, 'a'), '1'),
|
||||||
|
((None, 'refX'), '2'),
|
||||||
|
((None, 'b'), '3'),
|
||||||
|
(('http://www.w3.org/XML/1998/namespace', 'lang'), '4'),
|
||||||
|
((None, 'c'), '5')]
|
||||||
|
|
|
@ -1,34 +0,0 @@
|
||||||
Credits
|
|
||||||
=======
|
|
||||||
|
|
||||||
The ``html5lib`` test data is maintained by:
|
|
||||||
|
|
||||||
- James Graham
|
|
||||||
- Geoffrey Sneddon
|
|
||||||
|
|
||||||
|
|
||||||
Contributors
|
|
||||||
------------
|
|
||||||
|
|
||||||
- Adam Barth
|
|
||||||
- Andi Sidwell
|
|
||||||
- Anne van Kesteren
|
|
||||||
- David Flanagan
|
|
||||||
- Edward Z. Yang
|
|
||||||
- Geoffrey Sneddon
|
|
||||||
- Henri Sivonen
|
|
||||||
- Ian Hickson
|
|
||||||
- Jacques Distler
|
|
||||||
- James Graham
|
|
||||||
- Lachlan Hunt
|
|
||||||
- lantis63
|
|
||||||
- Mark Pilgrim
|
|
||||||
- Mats Palmgren
|
|
||||||
- Ms2ger
|
|
||||||
- Nolan Waite
|
|
||||||
- Philip Taylor
|
|
||||||
- Rafael Weinstein
|
|
||||||
- Ryan King
|
|
||||||
- Sam Ruby
|
|
||||||
- Simon Pieters
|
|
||||||
- Thomas Broyer
|
|
|
@ -1,21 +0,0 @@
|
||||||
Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon, and
|
|
||||||
other contributors
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining
|
|
||||||
a copy of this software and associated documentation files (the
|
|
||||||
"Software"), to deal in the Software without restriction, including
|
|
||||||
without limitation the rights to use, copy, modify, merge, publish,
|
|
||||||
distribute, sublicense, and/or sell copies of the Software, and to
|
|
||||||
permit persons to whom the Software is furnished to do so, subject to
|
|
||||||
the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be
|
|
||||||
included in all copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
||||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
||||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
||||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
||||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
@ -1,51 +0,0 @@
|
||||||
老子《道德經》 第一~四十章
|
|
||||||
|
|
||||||
老子道經
|
|
||||||
|
|
||||||
第一章
|
|
||||||
|
|
||||||
道可道,非常道。名可名,非常名。無,名天地之始﹔有,名萬物之母。
|
|
||||||
故常無,欲以觀其妙;常有,欲以觀其徼。此兩者,同出而異名,同謂之
|
|
||||||
玄。玄之又玄,眾妙之門。
|
|
||||||
|
|
||||||
第二章
|
|
||||||
|
|
||||||
天下皆知美之為美,斯惡矣﹔皆知善之為善,斯不善矣。故有無相生,難
|
|
||||||
易相成,長短相形,高下相傾,音聲相和,前後相隨。是以聖人處「無為
|
|
||||||
」之事,行「不言」之教。萬物作焉而不辭,生而不有,為而不恃,功成
|
|
||||||
而弗居。夫唯弗居,是以不去。
|
|
||||||
|
|
||||||
第三章
|
|
||||||
|
|
||||||
不尚賢,使民不爭﹔不貴難得之貨,使民不為盜﹔不見可欲,使民心不亂
|
|
||||||
。是以「聖人」之治,虛其心,實其腹,弱其志,強其骨。常使民無知無
|
|
||||||
欲。使夫智者不敢為也。為「無為」,則無不治。
|
|
||||||
|
|
||||||
第四章
|
|
||||||
|
|
||||||
「道」沖,而用之或不盈。淵兮,似萬物之宗﹔挫其銳,解其紛,和其光
|
|
||||||
,同其塵﹔湛兮似或存。吾不知誰之子?象帝之先。
|
|
||||||
|
|
||||||
第五章
|
|
||||||
|
|
||||||
天地不仁,以萬物為芻狗﹔聖人不仁,以百姓為芻狗。天地之間,其猶橐
|
|
||||||
蘥乎?虛而不屈,動而愈出。多言數窮,不如守中。
|
|
||||||
|
|
||||||
第六章
|
|
||||||
|
|
||||||
谷神不死,是謂玄牝。玄牝之門,是謂天地根。綿綿若存,用之不勤。
|
|
||||||
|
|
||||||
第七章
|
|
||||||
|
|
||||||
天長地久。天地所以能長且久者,以其不自生,故能長久。是以聖人後其
|
|
||||||
身而身先,外其身而身存。非以其無私邪?故能成其私。
|
|
||||||
|
|
||||||
第八章
|
|
||||||
|
|
||||||
上善若水。水善利萬物而不爭。處眾人之所惡,故幾於道。居善地,心善
|
|
||||||
淵,與善仁,言善信,政善治,事善能,動善時。夫唯不爭,故無尤。
|
|
||||||
|
|
||||||
第九章
|
|
||||||
|
|
||||||
持而盈之,不如其已﹔揣而銳之,不可長保。金玉滿堂,莫之能守﹔富貴
|
|
||||||
而驕,自遺其咎。功遂身退,天之道。
|
|
|
@ -1,10 +0,0 @@
|
||||||
#data
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
|
|
||||||
<!--京-->
|
|
||||||
<title>Yahoo! JAPAN</title>
|
|
||||||
<meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
|
|
||||||
<style type="text/css" media="all">
|
|
||||||
#encoding
|
|
||||||
euc-jp
|
|
File diff suppressed because one or more lines are too long
|
@ -1,115 +0,0 @@
|
||||||
#data
|
|
||||||
<meta
|
|
||||||
#encoding
|
|
||||||
windows-1252
|
|
||||||
|
|
||||||
#data
|
|
||||||
<
|
|
||||||
#encoding
|
|
||||||
windows-1252
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!
|
|
||||||
#encoding
|
|
||||||
windows-1252
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta charset = "
|
|
||||||
#encoding
|
|
||||||
windows-1252
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta charset=euc-jp
|
|
||||||
#encoding
|
|
||||||
windows-1252
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta <meta charset='euc-jp'>
|
|
||||||
#encoding
|
|
||||||
euc-jp
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta charset = 'euc-jp'>
|
|
||||||
#encoding
|
|
||||||
euc-jp
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!-- -->
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
||||||
#encoding
|
|
||||||
utf-8
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!-- -->
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=utf
|
|
||||||
#encoding
|
|
||||||
windows-1252
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta http-equiv="Content-Type<meta charset="utf-8">
|
|
||||||
#encoding
|
|
||||||
windows-1252
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
|
|
||||||
#encoding
|
|
||||||
utf-8
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset='utf-8">
|
|
||||||
#encoding
|
|
||||||
windows-1252
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta
|
|
||||||
#encoding
|
|
||||||
windows-1252
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta charset =
|
|
||||||
#encoding
|
|
||||||
windows-1252
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta charset= utf-8
|
|
||||||
>
|
|
||||||
#encoding
|
|
||||||
utf-8
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta content = "text/html;
|
|
||||||
#encoding
|
|
||||||
windows-1252
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta charset="UTF-16">
|
|
||||||
#encoding
|
|
||||||
utf-8
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta charset="UTF-16LE">
|
|
||||||
#encoding
|
|
||||||
utf-8
|
|
||||||
|
|
||||||
#data
|
|
||||||
<meta charset="UTF-16BE">
|
|
||||||
#encoding
|
|
||||||
utf-8
|
|
||||||
|
|
||||||
#data
|
|
||||||
<html a=ñ>
|
|
||||||
<meta charset="utf-8">
|
|
||||||
#encoding
|
|
||||||
utf-8
|
|
||||||
|
|
||||||
#data
|
|
||||||
<html ñ>
|
|
||||||
<meta charset="utf-8">
|
|
||||||
#encoding
|
|
||||||
utf-8
|
|
||||||
|
|
||||||
#data
|
|
||||||
<html>ñ
|
|
||||||
<meta charset="utf-8">
|
|
||||||
#encoding
|
|
||||||
utf-8
|
|
|
@ -1,125 +0,0 @@
|
||||||
{"tests": [
|
|
||||||
|
|
||||||
{"description": "proper attribute value escaping",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test \"with\" ""}]]],
|
|
||||||
"expected": ["<span title='test \"with\" &quot;'>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value non-quoting",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo"}]]],
|
|
||||||
"expected": ["<span title=foo>"],
|
|
||||||
"xhtml": ["<span title=\"foo\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value non-quoting (with <)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo<bar"}]]],
|
|
||||||
"expected": ["<span title=foo<bar>"],
|
|
||||||
"xhtml": ["<span title=\"foo<bar\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value quoting (with =)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo=bar"}]]],
|
|
||||||
"expected": ["<span title=\"foo=bar\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value quoting (with >)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo>bar"}]]],
|
|
||||||
"expected": ["<span title=\"foo>bar\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value quoting (with \")",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\"bar"}]]],
|
|
||||||
"expected": ["<span title='foo\"bar'>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value quoting (with ')",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar"}]]],
|
|
||||||
"expected": ["<span title=\"foo'bar\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value quoting (with both \" and ')",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar\"baz"}]]],
|
|
||||||
"expected": ["<span title=\"foo'bar"baz\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value quoting (with space)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo bar"}]]],
|
|
||||||
"expected": ["<span title=\"foo bar\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value quoting (with tab)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\tbar"}]]],
|
|
||||||
"expected": ["<span title=\"foo\tbar\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value quoting (with LF)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\nbar"}]]],
|
|
||||||
"expected": ["<span title=\"foo\nbar\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value quoting (with CR)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\rbar"}]]],
|
|
||||||
"expected": ["<span title=\"foo\rbar\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value non-quoting (with linetab)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Bbar"}]]],
|
|
||||||
"expected": ["<span title=foo\u000Bbar>"],
|
|
||||||
"xhtml": ["<span title=\"foo\u000Bbar\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "proper attribute value quoting (with form feed)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Cbar"}]]],
|
|
||||||
"expected": ["<span title=\"foo\u000Cbar\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "void element (as EmptyTag token)",
|
|
||||||
"input": [["EmptyTag", "img", {}]],
|
|
||||||
"expected": ["<img>"],
|
|
||||||
"xhtml": ["<img />"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "void element (as StartTag token)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "img", {}]],
|
|
||||||
"expected": ["<img>"],
|
|
||||||
"xhtml": ["<img />"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "doctype in error",
|
|
||||||
"input": [["Doctype", "foo"]],
|
|
||||||
"expected": ["<!DOCTYPE foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "character data",
|
|
||||||
"options": {"encoding":"utf-8"},
|
|
||||||
"input": [["Characters", "a<b>c&d"]],
|
|
||||||
"expected": ["a<b>c&d"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "rcdata",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
|
|
||||||
"expected": ["<script>a<b>c&d"],
|
|
||||||
"xhtml": ["<script>a<b>c&d"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "doctype",
|
|
||||||
"input": [["Doctype", "HTML"]],
|
|
||||||
"expected": ["<!DOCTYPE HTML>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "HTML 4.01 DOCTYPE",
|
|
||||||
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"]],
|
|
||||||
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "HTML 4.01 DOCTYPE without system identifer",
|
|
||||||
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN"]],
|
|
||||||
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "IBM DOCTYPE without public identifer",
|
|
||||||
"input": [["Doctype", "html", "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
|
|
||||||
"expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
|
|
||||||
}
|
|
||||||
|
|
||||||
]}
|
|
|
@ -1,66 +0,0 @@
|
||||||
{"tests": [
|
|
||||||
|
|
||||||
{"description": "no encoding",
|
|
||||||
"options": {"inject_meta_charset": true},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
||||||
"expected": [""],
|
|
||||||
"xhtml": ["<head></head>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "empytag head",
|
|
||||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
||||||
"expected": ["<meta charset=utf-8>"],
|
|
||||||
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head w/title",
|
|
||||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml","title",{}], ["Characters", "foo"],["EndTag", "http://www.w3.org/1999/xhtml", "title"], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
||||||
"expected": ["<meta charset=utf-8><title>foo</title>"],
|
|
||||||
"xhtml": ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head w/meta-charset",
|
|
||||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
||||||
"expected": ["<meta charset=utf-8>"],
|
|
||||||
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head w/ two meta-charset",
|
|
||||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
||||||
"expected": ["<meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
|
|
||||||
"xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head w/robots",
|
|
||||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
||||||
"expected": ["<meta charset=utf-8><meta content=noindex name=robots>"],
|
|
||||||
"xhtml": ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head w/robots & charset",
|
|
||||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
||||||
"expected": ["<meta content=noindex name=robots><meta charset=utf-8>"],
|
|
||||||
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head w/ charset in http-equiv content-type",
|
|
||||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
||||||
"expected": ["<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
|
|
||||||
"xhtml": ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head w/robots & charset in http-equiv content-type",
|
|
||||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
||||||
"expected": ["<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
|
|
||||||
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
|
|
||||||
}
|
|
||||||
|
|
||||||
]}
|
|
|
@ -1,965 +0,0 @@
|
||||||
{"tests": [
|
|
||||||
|
|
||||||
{"description": "html start-tag followed by text, with attributes",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", [{"namespace": null, "name": "lang", "value": "en"}]], ["Characters", "foo"]],
|
|
||||||
"expected": ["<html lang=en>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "html start-tag followed by comment",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Comment", "foo"]],
|
|
||||||
"expected": ["<html><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "html start-tag followed by space character",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", " foo"]],
|
|
||||||
"expected": ["<html> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "html start-tag followed by text",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", "foo"]],
|
|
||||||
"expected": ["foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "html start-tag followed by start-tag",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["<foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "html start-tag followed by end-tag",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "html start-tag at EOF (shouldn't ever happen?!)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "html end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</html><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "html end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</html> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "html end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", "foo"]],
|
|
||||||
"expected": ["foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "html end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["<foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "html end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "html end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "head start-tag followed by comment",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Comment", "foo"]],
|
|
||||||
"expected": ["<head><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head start-tag followed by space character",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", " foo"]],
|
|
||||||
"expected": ["<head> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head start-tag followed by text",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", "foo"]],
|
|
||||||
"expected": ["<head>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head start-tag followed by start-tag",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["<foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head start-tag followed by end-tag (shouldn't ever happen?!)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["<head></foo>", "</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "empty head element",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head start-tag followed by empty-tag",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag", "foo", {}]],
|
|
||||||
"expected": ["<foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head start-tag at EOF (shouldn't ever happen?!)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}]],
|
|
||||||
"expected": ["<head>", ""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "head end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</head><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</head> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", "foo"]],
|
|
||||||
"expected": ["foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["<foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "head end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "body start-tag followed by comment",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Comment", "foo"]],
|
|
||||||
"expected": ["<body><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "body start-tag followed by space character",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", " foo"]],
|
|
||||||
"expected": ["<body> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "body start-tag followed by text",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", "foo"]],
|
|
||||||
"expected": ["foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "body start-tag followed by start-tag",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["<foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "body start-tag followed by end-tag",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "body start-tag at EOF (shouldn't ever happen?!)",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "body end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</body><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "body end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</body> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "body end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", "foo"]],
|
|
||||||
"expected": ["foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "body end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["<foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "body end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "body end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "li end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</li><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "li end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</li> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "li end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</li>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "li end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</li><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "li end-tag followed by li start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "li", {}]],
|
|
||||||
"expected": ["<li>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "li end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "li end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "dt end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</dt><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dt end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</dt> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dt end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</dt>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dt end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</dt><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dt end-tag followed by dt start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
|
|
||||||
"expected": ["<dt>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dt end-tag followed by dd start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
|
|
||||||
"expected": ["<dd>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dt end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</dt></foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dt end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"]],
|
|
||||||
"expected": ["</dt>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "dd end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</dd><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dd end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</dd> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dd end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</dd>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dd end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</dd><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dd end-tag followed by dd start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
|
|
||||||
"expected": ["<dd>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dd end-tag followed by dt start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
|
|
||||||
"expected": ["<dt>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dd end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "dd end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</p><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</p> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</p>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</p><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by address start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "address", {}]],
|
|
||||||
"expected": ["<address>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by article start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "article", {}]],
|
|
||||||
"expected": ["<article>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by aside start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "aside", {}]],
|
|
||||||
"expected": ["<aside>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by blockquote start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "blockquote", {}]],
|
|
||||||
"expected": ["<blockquote>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by datagrid start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "datagrid", {}]],
|
|
||||||
"expected": ["<datagrid>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by dialog start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dialog", {}]],
|
|
||||||
"expected": ["<dialog>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by dir start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dir", {}]],
|
|
||||||
"expected": ["<dir>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by div start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
|
|
||||||
"expected": ["<div>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by dl start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dl", {}]],
|
|
||||||
"expected": ["<dl>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by fieldset start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "fieldset", {}]],
|
|
||||||
"expected": ["<fieldset>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by footer start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "footer", {}]],
|
|
||||||
"expected": ["<footer>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by form start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "form", {}]],
|
|
||||||
"expected": ["<form>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by h1 start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h1", {}]],
|
|
||||||
"expected": ["<h1>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by h2 start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h2", {}]],
|
|
||||||
"expected": ["<h2>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by h3 start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h3", {}]],
|
|
||||||
"expected": ["<h3>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by h4 start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h4", {}]],
|
|
||||||
"expected": ["<h4>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by h5 start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h5", {}]],
|
|
||||||
"expected": ["<h5>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by h6 start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h6", {}]],
|
|
||||||
"expected": ["<h6>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by header start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "header", {}]],
|
|
||||||
"expected": ["<header>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by hr empty-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EmptyTag", "hr", {}]],
|
|
||||||
"expected": ["<hr>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by menu start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "menu", {}]],
|
|
||||||
"expected": ["<menu>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by nav start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "nav", {}]],
|
|
||||||
"expected": ["<nav>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by ol start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ol", {}]],
|
|
||||||
"expected": ["<ol>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by p start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "p", {}]],
|
|
||||||
"expected": ["<p>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by pre start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}]],
|
|
||||||
"expected": ["<pre>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by section start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "section", {}]],
|
|
||||||
"expected": ["<section>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by table start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "table", {}]],
|
|
||||||
"expected": ["<table>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by ul start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ul", {}]],
|
|
||||||
"expected": ["<ul>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "p end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "optgroup end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</optgroup><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "optgroup end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</optgroup> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "optgroup end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</optgroup>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "optgroup end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</optgroup><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "optgroup end-tag followed by optgroup start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
|
|
||||||
"expected": ["<optgroup>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "optgroup end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "optgroup end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "option end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</option><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "option end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</option> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "option end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</option>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "option end-tag followed by optgroup start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
|
|
||||||
"expected": ["<optgroup>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "option end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</option><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "option end-tag followed by option start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "option", {}]],
|
|
||||||
"expected": ["<option>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "option end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "option end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "colgroup start-tag followed by comment",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Comment", "foo"]],
|
|
||||||
"expected": ["<colgroup><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "colgroup start-tag followed by space character",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", " foo"]],
|
|
||||||
"expected": ["<colgroup> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "colgroup start-tag followed by text",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", "foo"]],
|
|
||||||
"expected": ["<colgroup>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "colgroup start-tag followed by start-tag",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["<colgroup><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "first colgroup in a table with a col child",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EmptyTag", "col", {}]],
|
|
||||||
"expected": ["<table><col>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "colgroup with a col child, following another colgroup",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "col", {}]],
|
|
||||||
"expected": ["</colgroup><col>", "<colgroup><col>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "colgroup start-tag followed by end-tag",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["<colgroup></foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "colgroup start-tag at EOF",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}]],
|
|
||||||
"expected": ["<colgroup>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "colgroup end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</colgroup><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "colgroup end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</colgroup> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "colgroup end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", "foo"]],
|
|
||||||
"expected": ["foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "colgroup end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["<foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "colgroup end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "colgroup end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "thead end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</thead><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "thead end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</thead> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "thead end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</thead>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "thead end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</thead><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "thead end-tag followed by tbody start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
|
|
||||||
"expected": ["<tbody>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "thead end-tag followed by tfoot start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
|
|
||||||
"expected": ["<tfoot>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "thead end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</thead></foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "thead end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"]],
|
|
||||||
"expected": ["</thead>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "tbody start-tag followed by comment",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Comment", "foo"]],
|
|
||||||
"expected": ["<tbody><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody start-tag followed by space character",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", " foo"]],
|
|
||||||
"expected": ["<tbody> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody start-tag followed by text",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", "foo"]],
|
|
||||||
"expected": ["<tbody>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody start-tag followed by start-tag",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["<tbody><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "first tbody in a table with a tr child",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
|
|
||||||
"expected": ["<table><tr>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody with a tr child, following another tbody",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
|
|
||||||
"expected": ["<tbody><tr>", "</tbody><tr>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody with a tr child, following a thead",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
|
|
||||||
"expected": ["<tbody><tr>", "</thead><tr>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody with a tr child, following a tfoot",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
|
|
||||||
"expected": ["<tbody><tr>", "</tfoot><tr>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody start-tag followed by end-tag",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["<tbody></foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody start-tag at EOF",
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
|
|
||||||
"expected": ["<tbody>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "tbody end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</tbody><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</tbody> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</tbody>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</tbody><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody end-tag followed by tbody start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
|
|
||||||
"expected": ["<tbody>", "</tbody>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody end-tag followed by tfoot start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
|
|
||||||
"expected": ["<tfoot>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tbody end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "tfoot end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</tfoot><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tfoot end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</tfoot> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tfoot end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</tfoot>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tfoot end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</tfoot><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tfoot end-tag followed by tbody start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
|
|
||||||
"expected": ["<tbody>", "</tfoot>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tfoot end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tfoot end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "tr end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</tr><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tr end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</tr> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tr end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</tr>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tr end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</tr><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tr end-tag followed by tr start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
|
|
||||||
"expected": ["<tr>", "</tr>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tr end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "tr end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "td end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</td><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "td end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</td> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "td end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</td>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "td end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</td><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "td end-tag followed by td start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
|
|
||||||
"expected": ["<td>", "</td>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "td end-tag followed by th start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
|
|
||||||
"expected": ["<th>", "</td>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "td end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "td end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"]],
|
|
||||||
"expected": [""]
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{"description": "th end-tag followed by comment",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Comment", "foo"]],
|
|
||||||
"expected": ["</th><!--foo-->"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "th end-tag followed by space character",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", " foo"]],
|
|
||||||
"expected": ["</th> foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "th end-tag followed by text",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", "foo"]],
|
|
||||||
"expected": ["</th>foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "th end-tag followed by start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
|
||||||
"expected": ["</th><foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "th end-tag followed by th start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
|
|
||||||
"expected": ["<th>", "</th>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "th end-tag followed by td start-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
|
|
||||||
"expected": ["<td>", "</th>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "th end-tag followed by end-tag",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
|
||||||
"expected": ["</foo>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "th end-tag at EOF",
|
|
||||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml" , "th"]],
|
|
||||||
"expected": [""]
|
|
||||||
}
|
|
||||||
|
|
||||||
]}
|
|
|
@ -1,60 +0,0 @@
|
||||||
{"tests":[
|
|
||||||
|
|
||||||
{"description": "quote_char=\"'\"",
|
|
||||||
"options": {"quote_char": "'"},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
|
|
||||||
"expected": ["<span title='test 'with' quote_char'>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "quote_attr_values=true",
|
|
||||||
"options": {"quote_attr_values": true},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
|
|
||||||
"expected": ["<button disabled>"],
|
|
||||||
"xhtml": ["<button disabled=\"disabled\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "quote_attr_values=true with irrelevant",
|
|
||||||
"options": {"quote_attr_values": true},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
|
|
||||||
"expected": ["<div irrelevant>"],
|
|
||||||
"xhtml": ["<div irrelevant=\"irrelevant\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "use_trailing_solidus=true with void element",
|
|
||||||
"options": {"use_trailing_solidus": true},
|
|
||||||
"input": [["EmptyTag", "img", {}]],
|
|
||||||
"expected": ["<img />"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "use_trailing_solidus=true with non-void element",
|
|
||||||
"options": {"use_trailing_solidus": true},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
|
|
||||||
"expected": ["<div>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "minimize_boolean_attributes=false",
|
|
||||||
"options": {"minimize_boolean_attributes": false},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
|
|
||||||
"expected": ["<div irrelevant=irrelevant>"],
|
|
||||||
"xhtml": ["<div irrelevant=\"irrelevant\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "minimize_boolean_attributes=false with empty value",
|
|
||||||
"options": {"minimize_boolean_attributes": false},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
|
|
||||||
"expected": ["<div irrelevant=\"\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "escape less than signs in attribute values",
|
|
||||||
"options": {"escape_lt_in_attrs": true},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
|
|
||||||
"expected": ["<a title=\"a<b>c&d\">"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "rcdata",
|
|
||||||
"options": {"escape_rcdata": true},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
|
|
||||||
"expected": ["<script>a<b>c&d"]
|
|
||||||
}
|
|
||||||
|
|
||||||
]}
|
|
|
@ -1,51 +0,0 @@
|
||||||
{"tests": [
|
|
||||||
|
|
||||||
{"description": "bare text with leading spaces",
|
|
||||||
"options": {"strip_whitespace": true},
|
|
||||||
"input": [["Characters", "\t\r\n\u000C foo"]],
|
|
||||||
"expected": [" foo"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "bare text with trailing spaces",
|
|
||||||
"options": {"strip_whitespace": true},
|
|
||||||
"input": [["Characters", "foo \t\r\n\u000C"]],
|
|
||||||
"expected": ["foo "]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "bare text with inner spaces",
|
|
||||||
"options": {"strip_whitespace": true},
|
|
||||||
"input": [["Characters", "foo \t\r\n\u000C bar"]],
|
|
||||||
"expected": ["foo bar"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "text within <pre>",
|
|
||||||
"options": {"strip_whitespace": true},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
|
|
||||||
"expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "text within <pre>, with inner markup",
|
|
||||||
"options": {"strip_whitespace": true},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
|
|
||||||
"expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "text within <textarea>",
|
|
||||||
"options": {"strip_whitespace": true},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
|
|
||||||
"expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "text within <script>",
|
|
||||||
"options": {"strip_whitespace": true},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
|
|
||||||
"expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
|
|
||||||
},
|
|
||||||
|
|
||||||
{"description": "text within <style>",
|
|
||||||
"options": {"strip_whitespace": true},
|
|
||||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
|
|
||||||
"expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
|
|
||||||
}
|
|
||||||
|
|
||||||
]}
|
|
|
@ -1,104 +0,0 @@
|
||||||
Tokenizer tests
|
|
||||||
===============
|
|
||||||
|
|
||||||
The test format is [JSON](http://www.json.org/). This has the advantage
|
|
||||||
that the syntax allows backward-compatible extensions to the tests and
|
|
||||||
the disadvantage that it is relatively verbose.
|
|
||||||
|
|
||||||
Basic Structure
|
|
||||||
---------------
|
|
||||||
|
|
||||||
{"tests": [
|
|
||||||
{"description": "Test description",
|
|
||||||
"input": "input_string",
|
|
||||||
"output": [expected_output_tokens],
|
|
||||||
"initialStates": [initial_states],
|
|
||||||
"lastStartTag": last_start_tag,
|
|
||||||
"ignoreErrorOrder": ignore_error_order
|
|
||||||
}
|
|
||||||
]}
|
|
||||||
|
|
||||||
Multiple tests per file are allowed simply by adding more objects to the
|
|
||||||
"tests" list.
|
|
||||||
|
|
||||||
`description`, `input` and `output` are always present. The other values
|
|
||||||
are optional.
|
|
||||||
|
|
||||||
### Test set-up
|
|
||||||
|
|
||||||
`test.input` is a string containing the characters to pass to the
|
|
||||||
tokenizer. Specifically, it represents the characters of the **input
|
|
||||||
stream**, and so implementations are expected to perform the processing
|
|
||||||
described in the spec's **Preprocessing the input stream** section
|
|
||||||
before feeding the result to the tokenizer.
|
|
||||||
|
|
||||||
If `test.doubleEscaped` is present and `true`, then `test.input` is not
|
|
||||||
quite as described above. Instead, it must first be subjected to another
|
|
||||||
round of unescaping (i.e., in addition to any unescaping involved in the
|
|
||||||
JSON import), and the result of *that* represents the characters of the
|
|
||||||
input stream. Currently, the only unescaping required by this option is
|
|
||||||
to convert each sequence of the form \\uHHHH (where H is a hex digit)
|
|
||||||
into the corresponding Unicode code point. (Note that this option also
|
|
||||||
affects the interpretation of `test.output`.)
|
|
||||||
|
|
||||||
`test.initialStates` is a list of strings, each being the name of a
|
|
||||||
tokenizer state. The test should be run once for each string, using it
|
|
||||||
to set the tokenizer's initial state for that run. If
|
|
||||||
`test.initialStates` is omitted, it defaults to `["data state"]`.
|
|
||||||
|
|
||||||
`test.lastStartTag` is a lowercase string that should be used as "the
|
|
||||||
tag name of the last start tag to have been emitted from this
|
|
||||||
tokenizer", referenced in the spec's definition of **appropriate end tag
|
|
||||||
token**. If it is omitted, it is treated as if "no start tag has been
|
|
||||||
emitted from this tokenizer".
|
|
||||||
|
|
||||||
### Test results
|
|
||||||
|
|
||||||
`test.output` is a list of tokens, ordered with the first produced by
|
|
||||||
the tokenizer the first (leftmost) in the list. The list must mach the
|
|
||||||
**complete** list of tokens that the tokenizer should produce. Valid
|
|
||||||
tokens are:
|
|
||||||
|
|
||||||
["DOCTYPE", name, public_id, system_id, correctness]
|
|
||||||
["StartTag", name, {attributes}*, true*]
|
|
||||||
["StartTag", name, {attributes}]
|
|
||||||
["EndTag", name]
|
|
||||||
["Comment", data]
|
|
||||||
["Character", data]
|
|
||||||
"ParseError"
|
|
||||||
|
|
||||||
`public_id` and `system_id` are either strings or `null`. `correctness`
|
|
||||||
is either `true` or `false`; `true` corresponds to the force-quirks flag
|
|
||||||
being false, and vice-versa.
|
|
||||||
|
|
||||||
When the self-closing flag is set, the `StartTag` array has `true` as
|
|
||||||
its fourth entry. When the flag is not set, the array has only three
|
|
||||||
entries for backwards compatibility.
|
|
||||||
|
|
||||||
All adjacent character tokens are coalesced into a single
|
|
||||||
`["Character", data]` token.
|
|
||||||
|
|
||||||
If `test.doubleEscaped` is present and `true`, then every string within
|
|
||||||
`test.output` must be further unescaped (as described above) before
|
|
||||||
comparing with the tokenizer's output.
|
|
||||||
|
|
||||||
`test.ignoreErrorOrder` is a boolean value indicating that the order of
|
|
||||||
`ParseError` tokens relative to other tokens in the output stream is
|
|
||||||
unimportant, and implementations should ignore such differences between
|
|
||||||
their output and `expected_output_tokens`. (This is used for errors
|
|
||||||
emitted by the input stream preprocessing stage, since it is useful to
|
|
||||||
test that code but it is undefined when the errors occur). If it is
|
|
||||||
omitted, it defaults to `false`.
|
|
||||||
|
|
||||||
xmlViolation tests
|
|
||||||
------------------
|
|
||||||
|
|
||||||
`tokenizer/xmlViolation.test` differs from the above in a couple of
|
|
||||||
ways:
|
|
||||||
|
|
||||||
- The name of the single member of the top-level JSON object is
|
|
||||||
"xmlViolationTests" instead of "tests".
|
|
||||||
- Each test's expected output assumes that implementation is applying
|
|
||||||
the tweaks given in the spec's "Coercing an HTML DOM into an
|
|
||||||
infoset" section.
|
|
||||||
|
|
|
@ -1,81 +0,0 @@
|
||||||
{"tests": [
|
|
||||||
|
|
||||||
{"description":"PLAINTEXT content model flag",
|
|
||||||
"initialStates":["PLAINTEXT state"],
|
|
||||||
"lastStartTag":"plaintext",
|
|
||||||
"input":"<head>&body;",
|
|
||||||
"output":[["Character", "<head>&body;"]]},
|
|
||||||
|
|
||||||
{"description":"End tag closing RCDATA or RAWTEXT",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"foo</xmp>",
|
|
||||||
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
|
|
||||||
|
|
||||||
{"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"foo</xMp>",
|
|
||||||
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
|
|
||||||
|
|
||||||
{"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"foo</xmp ",
|
|
||||||
"output":[["Character", "foo"], "ParseError"]},
|
|
||||||
|
|
||||||
{"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"foo</xmp",
|
|
||||||
"output":[["Character", "foo</xmp"]]},
|
|
||||||
|
|
||||||
{"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"foo</xmp/",
|
|
||||||
"output":[["Character", "foo"], "ParseError"]},
|
|
||||||
|
|
||||||
{"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"foo</xmp<",
|
|
||||||
"output":[["Character", "foo</xmp<"]]},
|
|
||||||
|
|
||||||
{"description":"End tag with incorrect name in RCDATA or RAWTEXT",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"</foo>bar</xmp>",
|
|
||||||
"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
|
|
||||||
|
|
||||||
{"description":"Partial end tags leading straight into partial end tags",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"</xmp</xmp</xmp>",
|
|
||||||
"output":[["Character", "</xmp</xmp"], ["EndTag", "xmp"]]},
|
|
||||||
|
|
||||||
{"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"</foo>bar</xmpaar>",
|
|
||||||
"output":[["Character", "</foo>bar</xmpaar>"]]},
|
|
||||||
|
|
||||||
{"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"foo</xmp></baz>",
|
|
||||||
"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
|
|
||||||
|
|
||||||
{"description":"RAWTEXT w/ something looking like an entity",
|
|
||||||
"initialStates":["RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"&foo;",
|
|
||||||
"output":[["Character", "&foo;"]]},
|
|
||||||
|
|
||||||
{"description":"RCDATA w/ an entity",
|
|
||||||
"initialStates":["RCDATA state"],
|
|
||||||
"lastStartTag":"textarea",
|
|
||||||
"input":"<",
|
|
||||||
"output":[["Character", "<"]]}
|
|
||||||
|
|
||||||
]}
|
|
|
@ -1,96 +0,0 @@
|
||||||
{
|
|
||||||
"tests": [
|
|
||||||
{
|
|
||||||
"description":"CR in bogus comment state",
|
|
||||||
"input":"<?\u000d",
|
|
||||||
"output":["ParseError", ["Comment", "?\u000a"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"CRLF in bogus comment state",
|
|
||||||
"input":"<?\u000d\u000a",
|
|
||||||
"output":["ParseError", ["Comment", "?\u000a"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"CRLFLF in bogus comment state",
|
|
||||||
"input":"<?\u000d\u000a\u000a",
|
|
||||||
"output":["ParseError", ["Comment", "?\u000a\u000a"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"NUL in RCDATA and RAWTEXT",
|
|
||||||
"doubleEscaped":true,
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"input":"\\u0000",
|
|
||||||
"output":["ParseError", ["Character", "\\uFFFD"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"leading U+FEFF must pass through",
|
|
||||||
"doubleEscaped":true,
|
|
||||||
"input":"\\uFEFFfoo\\uFEFFbar",
|
|
||||||
"output":[["Character", "\\uFEFFfoo\\uFEFFbar"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"Non BMP-charref in in RCDATA",
|
|
||||||
"initialStates":["RCDATA state"],
|
|
||||||
"input":"≂̸",
|
|
||||||
"output":[["Character", "\u2242\u0338"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"Bad charref in in RCDATA",
|
|
||||||
"initialStates":["RCDATA state"],
|
|
||||||
"input":"&NotEqualTild;",
|
|
||||||
"output":["ParseError", ["Character", "&NotEqualTild;"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"lowercase endtags in RCDATA and RAWTEXT",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"</XMP>",
|
|
||||||
"output":[["EndTag","xmp"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"</ XMP>",
|
|
||||||
"output":[["Character","</ XMP>"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"</xm>",
|
|
||||||
"output":[["Character","</xm>"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"</xm ",
|
|
||||||
"output":[["Character","</xm "]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"</xm/",
|
|
||||||
"output":[["Character","</xm/"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"Non BMP-charref in attribute",
|
|
||||||
"input":"<p id=\"≂̸\">",
|
|
||||||
"output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"--!NUL in comment ",
|
|
||||||
"doubleEscaped":true,
|
|
||||||
"input":"<!----!\\u0000-->",
|
|
||||||
"output":["ParseError", "ParseError", ["Comment", "--!\\uFFFD"]]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description":"space EOF after doctype ",
|
|
||||||
"input":"<!DOCTYPE html ",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", null, null , false]]
|
|
||||||
}
|
|
||||||
|
|
||||||
]
|
|
||||||
}
|
|
|
@ -1,283 +0,0 @@
|
||||||
{"tests": [
|
|
||||||
|
|
||||||
{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
|
|
||||||
"input":"<h a='¬i;'>",
|
|
||||||
"output": [["StartTag", "h", {"a": "¬i;"}]]},
|
|
||||||
|
|
||||||
{"description": "Entity name followed by the equals sign in an attribute value.",
|
|
||||||
"input":"<h a='&lang='>",
|
|
||||||
"output": [["StartTag", "h", {"a": "&lang="}]]},
|
|
||||||
|
|
||||||
{"description": "CR as numeric entity",
|
|
||||||
"input":"
",
|
|
||||||
"output": ["ParseError", ["Character", "\r"]]},
|
|
||||||
|
|
||||||
{"description": "CR as hexadecimal numeric entity",
|
|
||||||
"input":"
",
|
|
||||||
"output": ["ParseError", ["Character", "\r"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 EURO SIGN numeric entity.",
|
|
||||||
"input":"€",
|
|
||||||
"output": ["ParseError", ["Character", "\u20AC"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
|
||||||
"input":"",
|
|
||||||
"output": ["ParseError", ["Character", "\u0081"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
|
|
||||||
"input":"‚",
|
|
||||||
"output": ["ParseError", ["Character", "\u201A"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
|
|
||||||
"input":"ƒ",
|
|
||||||
"output": ["ParseError", ["Character", "\u0192"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
|
|
||||||
"input":"„",
|
|
||||||
"output": ["ParseError", ["Character", "\u201E"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
|
|
||||||
"input":"…",
|
|
||||||
"output": ["ParseError", ["Character", "\u2026"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 DAGGER numeric entity.",
|
|
||||||
"input":"†",
|
|
||||||
"output": ["ParseError", ["Character", "\u2020"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
|
|
||||||
"input":"‡",
|
|
||||||
"output": ["ParseError", ["Character", "\u2021"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
|
|
||||||
"input":"ˆ",
|
|
||||||
"output": ["ParseError", ["Character", "\u02C6"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 PER MILLE SIGN numeric entity.",
|
|
||||||
"input":"‰",
|
|
||||||
"output": ["ParseError", ["Character", "\u2030"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
|
|
||||||
"input":"Š",
|
|
||||||
"output": ["ParseError", ["Character", "\u0160"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
|
|
||||||
"input":"‹",
|
|
||||||
"output": ["ParseError", ["Character", "\u2039"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
|
|
||||||
"input":"Œ",
|
|
||||||
"output": ["ParseError", ["Character", "\u0152"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
|
||||||
"input":"",
|
|
||||||
"output": ["ParseError", ["Character", "\u008D"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
|
|
||||||
"input":"Ž",
|
|
||||||
"output": ["ParseError", ["Character", "\u017D"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
|
||||||
"input":"",
|
|
||||||
"output": ["ParseError", ["Character", "\u008F"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
|
||||||
"input":"",
|
|
||||||
"output": ["ParseError", ["Character", "\u0090"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
|
|
||||||
"input":"‘",
|
|
||||||
"output": ["ParseError", ["Character", "\u2018"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
|
|
||||||
"input":"’",
|
|
||||||
"output": ["ParseError", ["Character", "\u2019"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
|
|
||||||
"input":"“",
|
|
||||||
"output": ["ParseError", ["Character", "\u201C"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
|
|
||||||
"input":"”",
|
|
||||||
"output": ["ParseError", ["Character", "\u201D"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 BULLET numeric entity.",
|
|
||||||
"input":"•",
|
|
||||||
"output": ["ParseError", ["Character", "\u2022"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 EN DASH numeric entity.",
|
|
||||||
"input":"–",
|
|
||||||
"output": ["ParseError", ["Character", "\u2013"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 EM DASH numeric entity.",
|
|
||||||
"input":"—",
|
|
||||||
"output": ["ParseError", ["Character", "\u2014"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 SMALL TILDE numeric entity.",
|
|
||||||
"input":"˜",
|
|
||||||
"output": ["ParseError", ["Character", "\u02DC"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
|
|
||||||
"input":"™",
|
|
||||||
"output": ["ParseError", ["Character", "\u2122"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
|
|
||||||
"input":"š",
|
|
||||||
"output": ["ParseError", ["Character", "\u0161"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
|
|
||||||
"input":"›",
|
|
||||||
"output": ["ParseError", ["Character", "\u203A"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
|
|
||||||
"input":"œ",
|
|
||||||
"output": ["ParseError", ["Character", "\u0153"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
|
||||||
"input":"",
|
|
||||||
"output": ["ParseError", ["Character", "\u009D"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
|
|
||||||
"input":"€",
|
|
||||||
"output": ["ParseError", ["Character", "\u20AC"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
|
||||||
"input":"",
|
|
||||||
"output": ["ParseError", ["Character", "\u0081"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
|
|
||||||
"input":"‚",
|
|
||||||
"output": ["ParseError", ["Character", "\u201A"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
|
|
||||||
"input":"ƒ",
|
|
||||||
"output": ["ParseError", ["Character", "\u0192"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
|
|
||||||
"input":"„",
|
|
||||||
"output": ["ParseError", ["Character", "\u201E"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
|
|
||||||
"input":"…",
|
|
||||||
"output": ["ParseError", ["Character", "\u2026"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
|
|
||||||
"input":"†",
|
|
||||||
"output": ["ParseError", ["Character", "\u2020"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
|
|
||||||
"input":"‡",
|
|
||||||
"output": ["ParseError", ["Character", "\u2021"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
|
|
||||||
"input":"ˆ",
|
|
||||||
"output": ["ParseError", ["Character", "\u02C6"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
|
|
||||||
"input":"‰",
|
|
||||||
"output": ["ParseError", ["Character", "\u2030"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
|
|
||||||
"input":"Š",
|
|
||||||
"output": ["ParseError", ["Character", "\u0160"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
|
|
||||||
"input":"‹",
|
|
||||||
"output": ["ParseError", ["Character", "\u2039"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
|
|
||||||
"input":"Œ",
|
|
||||||
"output": ["ParseError", ["Character", "\u0152"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
|
||||||
"input":"",
|
|
||||||
"output": ["ParseError", ["Character", "\u008D"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
|
|
||||||
"input":"Ž",
|
|
||||||
"output": ["ParseError", ["Character", "\u017D"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
|
||||||
"input":"",
|
|
||||||
"output": ["ParseError", ["Character", "\u008F"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
|
||||||
"input":"",
|
|
||||||
"output": ["ParseError", ["Character", "\u0090"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
|
|
||||||
"input":"‘",
|
|
||||||
"output": ["ParseError", ["Character", "\u2018"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
|
|
||||||
"input":"’",
|
|
||||||
"output": ["ParseError", ["Character", "\u2019"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
|
|
||||||
"input":"“",
|
|
||||||
"output": ["ParseError", ["Character", "\u201C"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
|
|
||||||
"input":"”",
|
|
||||||
"output": ["ParseError", ["Character", "\u201D"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 BULLET hexadecimal numeric entity.",
|
|
||||||
"input":"•",
|
|
||||||
"output": ["ParseError", ["Character", "\u2022"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
|
|
||||||
"input":"–",
|
|
||||||
"output": ["ParseError", ["Character", "\u2013"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
|
|
||||||
"input":"—",
|
|
||||||
"output": ["ParseError", ["Character", "\u2014"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
|
|
||||||
"input":"˜",
|
|
||||||
"output": ["ParseError", ["Character", "\u02DC"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
|
|
||||||
"input":"™",
|
|
||||||
"output": ["ParseError", ["Character", "\u2122"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
|
|
||||||
"input":"š",
|
|
||||||
"output": ["ParseError", ["Character", "\u0161"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
|
|
||||||
"input":"›",
|
|
||||||
"output": ["ParseError", ["Character", "\u203A"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
|
|
||||||
"input":"œ",
|
|
||||||
"output": ["ParseError", ["Character", "\u0153"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
|
||||||
"input":"",
|
|
||||||
"output": ["ParseError", ["Character", "\u009D"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
|
|
||||||
"input":"ž",
|
|
||||||
"output": ["ParseError", ["Character", "\u017E"]]},
|
|
||||||
|
|
||||||
{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
|
|
||||||
"input":"Ÿ",
|
|
||||||
"output": ["ParseError", ["Character", "\u0178"]]},
|
|
||||||
|
|
||||||
{"description": "Decimal numeric entity followed by hex character a.",
|
|
||||||
"input":"aa",
|
|
||||||
"output": ["ParseError", ["Character", "aa"]]},
|
|
||||||
|
|
||||||
{"description": "Decimal numeric entity followed by hex character A.",
|
|
||||||
"input":"aA",
|
|
||||||
"output": ["ParseError", ["Character", "aA"]]},
|
|
||||||
|
|
||||||
{"description": "Decimal numeric entity followed by hex character f.",
|
|
||||||
"input":"af",
|
|
||||||
"output": ["ParseError", ["Character", "af"]]},
|
|
||||||
|
|
||||||
{"description": "Decimal numeric entity followed by hex character A.",
|
|
||||||
"input":"aF",
|
|
||||||
"output": ["ParseError", ["Character", "aF"]]}
|
|
||||||
|
|
||||||
]}
|
|
|
@ -1,33 +0,0 @@
|
||||||
{"tests": [
|
|
||||||
|
|
||||||
{"description":"Commented close tag in RCDATA or RAWTEXT",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"foo<!--</xmp>--></xmp>",
|
|
||||||
"output":[["Character", "foo<!--"], ["EndTag", "xmp"], ["Character", "-->"], ["EndTag", "xmp"]]},
|
|
||||||
|
|
||||||
{"description":"Bogus comment in RCDATA or RAWTEXT",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"foo<!-->baz</xmp>",
|
|
||||||
"output":[["Character", "foo<!-->baz"], ["EndTag", "xmp"]]},
|
|
||||||
|
|
||||||
{"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"foo<!--></xmp><!-->baz</xmp>",
|
|
||||||
"output":[["Character", "foo<!-->"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]]},
|
|
||||||
|
|
||||||
{"description":"Commented entities in RCDATA",
|
|
||||||
"initialStates":["RCDATA state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":" & <!-- & --> & </xmp>",
|
|
||||||
"output":[["Character", " & <!-- & --> & "], ["EndTag", "xmp"]]},
|
|
||||||
|
|
||||||
{"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
|
|
||||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
|
||||||
"lastStartTag":"xmp",
|
|
||||||
"input":"foo<!-- x --x>x-- >x--!>x--<></xmp>",
|
|
||||||
"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<>"], ["EndTag", "xmp"]]}
|
|
||||||
|
|
||||||
]}
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,7 +0,0 @@
|
||||||
{"tests": [
|
|
||||||
|
|
||||||
{"description":"<!---- >",
|
|
||||||
"input":"<!---- >",
|
|
||||||
"output":["ParseError", "ParseError", ["Comment","-- >"]]}
|
|
||||||
|
|
||||||
]}
|
|
|
@ -1,196 +0,0 @@
|
||||||
{"tests": [
|
|
||||||
|
|
||||||
{"description":"Correct Doctype lowercase",
|
|
||||||
"input":"<!DOCTYPE html>",
|
|
||||||
"output":[["DOCTYPE", "html", null, null, true]]},
|
|
||||||
|
|
||||||
{"description":"Correct Doctype uppercase",
|
|
||||||
"input":"<!DOCTYPE HTML>",
|
|
||||||
"output":[["DOCTYPE", "html", null, null, true]]},
|
|
||||||
|
|
||||||
{"description":"Correct Doctype mixed case",
|
|
||||||
"input":"<!DOCTYPE HtMl>",
|
|
||||||
"output":[["DOCTYPE", "html", null, null, true]]},
|
|
||||||
|
|
||||||
{"description":"Correct Doctype case with EOF",
|
|
||||||
"input":"<!DOCTYPE HtMl",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
||||||
|
|
||||||
{"description":"Truncated doctype start",
|
|
||||||
"input":"<!DOC>",
|
|
||||||
"output":["ParseError", ["Comment", "DOC"]]},
|
|
||||||
|
|
||||||
{"description":"Doctype in error",
|
|
||||||
"input":"<!DOCTYPE foo>",
|
|
||||||
"output":[["DOCTYPE", "foo", null, null, true]]},
|
|
||||||
|
|
||||||
{"description":"Single Start Tag",
|
|
||||||
"input":"<h>",
|
|
||||||
"output":[["StartTag", "h", {}]]},
|
|
||||||
|
|
||||||
{"description":"Empty end tag",
|
|
||||||
"input":"</>",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"Empty start tag",
|
|
||||||
"input":"<>",
|
|
||||||
"output":["ParseError", ["Character", "<>"]]},
|
|
||||||
|
|
||||||
{"description":"Start Tag w/attribute",
|
|
||||||
"input":"<h a='b'>",
|
|
||||||
"output":[["StartTag", "h", {"a":"b"}]]},
|
|
||||||
|
|
||||||
{"description":"Start Tag w/attribute no quotes",
|
|
||||||
"input":"<h a=b>",
|
|
||||||
"output":[["StartTag", "h", {"a":"b"}]]},
|
|
||||||
|
|
||||||
{"description":"Start/End Tag",
|
|
||||||
"input":"<h></h>",
|
|
||||||
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
|
|
||||||
|
|
||||||
{"description":"Two unclosed start tags",
|
|
||||||
"input":"<p>One<p>Two",
|
|
||||||
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
|
|
||||||
|
|
||||||
{"description":"End Tag w/attribute",
|
|
||||||
"input":"<h></h a='b'>",
|
|
||||||
"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
|
|
||||||
|
|
||||||
{"description":"Multiple atts",
|
|
||||||
"input":"<h a='b' c='d'>",
|
|
||||||
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
|
|
||||||
|
|
||||||
{"description":"Multiple atts no space",
|
|
||||||
"input":"<h a='b'c='d'>",
|
|
||||||
"output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]]},
|
|
||||||
|
|
||||||
{"description":"Repeated attr",
|
|
||||||
"input":"<h a='b' a='d'>",
|
|
||||||
"output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
|
|
||||||
|
|
||||||
{"description":"Simple comment",
|
|
||||||
"input":"<!--comment-->",
|
|
||||||
"output":[["Comment", "comment"]]},
|
|
||||||
|
|
||||||
{"description":"Comment, Central dash no space",
|
|
||||||
"input":"<!----->",
|
|
||||||
"output":["ParseError", ["Comment", "-"]]},
|
|
||||||
|
|
||||||
{"description":"Comment, two central dashes",
|
|
||||||
"input":"<!-- --comment -->",
|
|
||||||
"output":["ParseError", ["Comment", " --comment "]]},
|
|
||||||
|
|
||||||
{"description":"Unfinished comment",
|
|
||||||
"input":"<!--comment",
|
|
||||||
"output":["ParseError", ["Comment", "comment"]]},
|
|
||||||
|
|
||||||
{"description":"Start of a comment",
|
|
||||||
"input":"<!-",
|
|
||||||
"output":["ParseError", ["Comment", "-"]]},
|
|
||||||
|
|
||||||
{"description":"Short comment",
|
|
||||||
"input":"<!-->",
|
|
||||||
"output":["ParseError", ["Comment", ""]]},
|
|
||||||
|
|
||||||
{"description":"Short comment two",
|
|
||||||
"input":"<!--->",
|
|
||||||
"output":["ParseError", ["Comment", ""]]},
|
|
||||||
|
|
||||||
{"description":"Short comment three",
|
|
||||||
"input":"<!---->",
|
|
||||||
"output":[["Comment", ""]]},
|
|
||||||
|
|
||||||
|
|
||||||
{"description":"Ampersand EOF",
|
|
||||||
"input":"&",
|
|
||||||
"output":[["Character", "&"]]},
|
|
||||||
|
|
||||||
{"description":"Ampersand ampersand EOF",
|
|
||||||
"input":"&&",
|
|
||||||
"output":[["Character", "&&"]]},
|
|
||||||
|
|
||||||
{"description":"Ampersand space EOF",
|
|
||||||
"input":"& ",
|
|
||||||
"output":[["Character", "& "]]},
|
|
||||||
|
|
||||||
{"description":"Unfinished entity",
|
|
||||||
"input":"&f",
|
|
||||||
"output":[["Character", "&f"]]},
|
|
||||||
|
|
||||||
{"description":"Ampersand, number sign",
|
|
||||||
"input":"&#",
|
|
||||||
"output":["ParseError", ["Character", "&#"]]},
|
|
||||||
|
|
||||||
{"description":"Unfinished numeric entity",
|
|
||||||
"input":"&#x",
|
|
||||||
"output":["ParseError", ["Character", "&#x"]]},
|
|
||||||
|
|
||||||
{"description":"Entity with trailing semicolon (1)",
|
|
||||||
"input":"I'm ¬it",
|
|
||||||
"output":[["Character","I'm \u00ACit"]]},
|
|
||||||
|
|
||||||
{"description":"Entity with trailing semicolon (2)",
|
|
||||||
"input":"I'm ∉",
|
|
||||||
"output":[["Character","I'm \u2209"]]},
|
|
||||||
|
|
||||||
{"description":"Entity without trailing semicolon (1)",
|
|
||||||
"input":"I'm ¬it",
|
|
||||||
"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACit"]]},
|
|
||||||
|
|
||||||
{"description":"Entity without trailing semicolon (2)",
|
|
||||||
"input":"I'm ¬in",
|
|
||||||
"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACin"]]},
|
|
||||||
|
|
||||||
{"description":"Partial entity match at end of file",
|
|
||||||
"input":"I'm &no",
|
|
||||||
"output":[["Character","I'm &no"]]},
|
|
||||||
|
|
||||||
{"description":"Non-ASCII character reference name",
|
|
||||||
"input":"&\u00AC;",
|
|
||||||
"output":[["Character", "&\u00AC;"]]},
|
|
||||||
|
|
||||||
{"description":"ASCII decimal entity",
|
|
||||||
"input":"$",
|
|
||||||
"output":[["Character","$"]]},
|
|
||||||
|
|
||||||
{"description":"ASCII hexadecimal entity",
|
|
||||||
"input":"?",
|
|
||||||
"output":[["Character","?"]]},
|
|
||||||
|
|
||||||
{"description":"Hexadecimal entity in attribute",
|
|
||||||
"input":"<h a='?'></h>",
|
|
||||||
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
|
|
||||||
|
|
||||||
{"description":"Entity in attribute without semicolon ending in x",
|
|
||||||
"input":"<h a='¬x'>",
|
|
||||||
"output":[["StartTag", "h", {"a":"¬x"}]]},
|
|
||||||
|
|
||||||
{"description":"Entity in attribute without semicolon ending in 1",
|
|
||||||
"input":"<h a='¬1'>",
|
|
||||||
"output":[["StartTag", "h", {"a":"¬1"}]]},
|
|
||||||
|
|
||||||
{"description":"Entity in attribute without semicolon ending in i",
|
|
||||||
"input":"<h a='¬i'>",
|
|
||||||
"output":[["StartTag", "h", {"a":"¬i"}]]},
|
|
||||||
|
|
||||||
{"description":"Entity in attribute without semicolon",
|
|
||||||
"input":"<h a='©'>",
|
|
||||||
"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]},
|
|
||||||
|
|
||||||
{"description":"Unquoted attribute ending in ampersand",
|
|
||||||
"input":"<s o=& t>",
|
|
||||||
"output":[["StartTag","s",{"o":"&","t":""}]]},
|
|
||||||
|
|
||||||
{"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
|
|
||||||
"input":"<a a=a&>foo",
|
|
||||||
"output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
|
|
||||||
|
|
||||||
{"description":"plaintext element",
|
|
||||||
"input":"<plaintext>foobar",
|
|
||||||
"output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
|
|
||||||
|
|
||||||
{"description":"Open angled bracket in unquoted attribute value state",
|
|
||||||
"input":"<a a=f<>",
|
|
||||||
"output":["ParseError", ["StartTag", "a", {"a":"f<"}]]}
|
|
||||||
|
|
||||||
]}
|
|
|
@ -1,179 +0,0 @@
|
||||||
{"tests": [
|
|
||||||
|
|
||||||
{"description":"DOCTYPE without name",
|
|
||||||
"input":"<!DOCTYPE>",
|
|
||||||
"output":["ParseError", "ParseError", ["DOCTYPE", null, null, null, false]]},
|
|
||||||
|
|
||||||
{"description":"DOCTYPE without space before name",
|
|
||||||
"input":"<!DOCTYPEhtml>",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
|
|
||||||
|
|
||||||
{"description":"Incorrect DOCTYPE without a space before name",
|
|
||||||
"input":"<!DOCTYPEfoo>",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
|
|
||||||
|
|
||||||
{"description":"DOCTYPE with publicId",
|
|
||||||
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
|
||||||
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
|
|
||||||
|
|
||||||
{"description":"DOCTYPE with EOF after PUBLIC",
|
|
||||||
"input":"<!DOCTYPE html PUBLIC",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
||||||
|
|
||||||
{"description":"DOCTYPE with EOF after PUBLIC '",
|
|
||||||
"input":"<!DOCTYPE html PUBLIC '",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
|
|
||||||
|
|
||||||
{"description":"DOCTYPE with EOF after PUBLIC 'x",
|
|
||||||
"input":"<!DOCTYPE html PUBLIC 'x",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
|
|
||||||
|
|
||||||
{"description":"DOCTYPE with systemId",
|
|
||||||
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
|
||||||
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
|
||||||
|
|
||||||
{"description":"DOCTYPE with publicId and systemId",
|
|
||||||
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
|
||||||
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
|
||||||
|
|
||||||
{"description":"DOCTYPE with > in double-quoted publicId",
|
|
||||||
"input":"<!DOCTYPE html PUBLIC \">x",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
|
|
||||||
|
|
||||||
{"description":"DOCTYPE with > in single-quoted publicId",
|
|
||||||
"input":"<!DOCTYPE html PUBLIC '>x",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
|
|
||||||
|
|
||||||
{"description":"DOCTYPE with > in double-quoted systemId",
|
|
||||||
"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
|
|
||||||
|
|
||||||
{"description":"DOCTYPE with > in single-quoted systemId",
|
|
||||||
"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
|
|
||||||
|
|
||||||
{"description":"Incomplete doctype",
|
|
||||||
"input":"<!DOCTYPE html ",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
||||||
|
|
||||||
{"description":"Numeric entity representing the NUL character",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"Hexadecimal entity representing the NUL character",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"Hexadecimal entity pair representing a surrogate pair",
|
|
||||||
"input":"��",
|
|
||||||
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
|
|
||||||
"input":"ꯍ",
|
|
||||||
"output":[["Character", "\uABCD"]]},
|
|
||||||
|
|
||||||
{"description":"Entity without a name",
|
|
||||||
"input":"&;",
|
|
||||||
"output":[["Character", "&;"]]},
|
|
||||||
|
|
||||||
{"description":"Unescaped ampersand in attribute value",
|
|
||||||
"input":"<h a='&'>",
|
|
||||||
"output":[["StartTag", "h", { "a":"&" }]]},
|
|
||||||
|
|
||||||
{"description":"StartTag containing <",
|
|
||||||
"input":"<a<b>",
|
|
||||||
"output":[["StartTag", "a<b", { }]]},
|
|
||||||
|
|
||||||
{"description":"Non-void element containing trailing /",
|
|
||||||
"input":"<h/>",
|
|
||||||
"output":[["StartTag","h",{},true]]},
|
|
||||||
|
|
||||||
{"description":"Void element with permitted slash",
|
|
||||||
"input":"<br/>",
|
|
||||||
"output":[["StartTag","br",{},true]]},
|
|
||||||
|
|
||||||
{"description":"Void element with permitted slash (with attribute)",
|
|
||||||
"input":"<br foo='bar'/>",
|
|
||||||
"output":[["StartTag","br",{"foo":"bar"},true]]},
|
|
||||||
|
|
||||||
{"description":"StartTag containing /",
|
|
||||||
"input":"<h/a='b'>",
|
|
||||||
"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
|
|
||||||
|
|
||||||
{"description":"Double-quoted attribute value",
|
|
||||||
"input":"<h a=\"b\">",
|
|
||||||
"output":[["StartTag", "h", { "a":"b" }]]},
|
|
||||||
|
|
||||||
{"description":"Unescaped </",
|
|
||||||
"input":"</",
|
|
||||||
"output":["ParseError", ["Character", "</"]]},
|
|
||||||
|
|
||||||
{"description":"Illegal end tag name",
|
|
||||||
"input":"</1>",
|
|
||||||
"output":["ParseError", ["Comment", "1"]]},
|
|
||||||
|
|
||||||
{"description":"Simili processing instruction",
|
|
||||||
"input":"<?namespace>",
|
|
||||||
"output":["ParseError", ["Comment", "?namespace"]]},
|
|
||||||
|
|
||||||
{"description":"A bogus comment stops at >, even if preceeded by two dashes",
|
|
||||||
"input":"<?foo-->",
|
|
||||||
"output":["ParseError", ["Comment", "?foo--"]]},
|
|
||||||
|
|
||||||
{"description":"Unescaped <",
|
|
||||||
"input":"foo < bar",
|
|
||||||
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
|
|
||||||
|
|
||||||
{"description":"Null Byte Replacement",
|
|
||||||
"input":"\u0000",
|
|
||||||
"output":["ParseError", ["Character", "\u0000"]]},
|
|
||||||
|
|
||||||
{"description":"Comment with dash",
|
|
||||||
"input":"<!---x",
|
|
||||||
"output":["ParseError", ["Comment", "-x"]]},
|
|
||||||
|
|
||||||
{"description":"Entity + newline",
|
|
||||||
"input":"\nx\n>\n",
|
|
||||||
"output":[["Character","\nx\n>\n"]]},
|
|
||||||
|
|
||||||
{"description":"Start tag with no attributes but space before the greater-than sign",
|
|
||||||
"input":"<h >",
|
|
||||||
"output":[["StartTag", "h", {}]]},
|
|
||||||
|
|
||||||
{"description":"Empty attribute followed by uppercase attribute",
|
|
||||||
"input":"<h a B=''>",
|
|
||||||
"output":[["StartTag", "h", {"a":"", "b":""}]]},
|
|
||||||
|
|
||||||
{"description":"Double-quote after attribute name",
|
|
||||||
"input":"<h a \">",
|
|
||||||
"output":["ParseError", ["StartTag", "h", {"a":"", "\"":""}]]},
|
|
||||||
|
|
||||||
{"description":"Single-quote after attribute name",
|
|
||||||
"input":"<h a '>",
|
|
||||||
"output":["ParseError", ["StartTag", "h", {"a":"", "'":""}]]},
|
|
||||||
|
|
||||||
{"description":"Empty end tag with following characters",
|
|
||||||
"input":"a</>bc",
|
|
||||||
"output":[["Character", "a"], "ParseError", ["Character", "bc"]]},
|
|
||||||
|
|
||||||
{"description":"Empty end tag with following tag",
|
|
||||||
"input":"a</><b>c",
|
|
||||||
"output":[["Character", "a"], "ParseError", ["StartTag", "b", {}], ["Character", "c"]]},
|
|
||||||
|
|
||||||
{"description":"Empty end tag with following comment",
|
|
||||||
"input":"a</><!--b-->c",
|
|
||||||
"output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]]},
|
|
||||||
|
|
||||||
{"description":"Empty end tag with following end tag",
|
|
||||||
"input":"a</></b>c",
|
|
||||||
"output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]]}
|
|
||||||
|
|
||||||
]}
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,344 +0,0 @@
|
||||||
{"tests": [
|
|
||||||
|
|
||||||
{"description":"< in attribute name",
|
|
||||||
"input":"<z/0 <>",
|
|
||||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
|
|
||||||
|
|
||||||
{"description":"< in attribute value",
|
|
||||||
"input":"<z x=<>",
|
|
||||||
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
|
|
||||||
|
|
||||||
{"description":"= in unquoted attribute value",
|
|
||||||
"input":"<z z=z=z>",
|
|
||||||
"output":["ParseError", ["StartTag", "z", {"z": "z=z"}]]},
|
|
||||||
|
|
||||||
{"description":"= attribute",
|
|
||||||
"input":"<z =>",
|
|
||||||
"output":["ParseError", ["StartTag", "z", {"=": ""}]]},
|
|
||||||
|
|
||||||
{"description":"== attribute",
|
|
||||||
"input":"<z ==>",
|
|
||||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": ""}]]},
|
|
||||||
|
|
||||||
{"description":"=== attribute",
|
|
||||||
"input":"<z ===>",
|
|
||||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": "="}]]},
|
|
||||||
|
|
||||||
{"description":"==== attribute",
|
|
||||||
"input":"<z ====>",
|
|
||||||
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},
|
|
||||||
|
|
||||||
{"description":"\" after ampersand in double-quoted attribute value",
|
|
||||||
"input":"<z z=\"&\">",
|
|
||||||
"output":[["StartTag", "z", {"z": "&"}]]},
|
|
||||||
|
|
||||||
{"description":"' after ampersand in double-quoted attribute value",
|
|
||||||
"input":"<z z=\"&'\">",
|
|
||||||
"output":[["StartTag", "z", {"z": "&'"}]]},
|
|
||||||
|
|
||||||
{"description":"' after ampersand in single-quoted attribute value",
|
|
||||||
"input":"<z z='&'>",
|
|
||||||
"output":[["StartTag", "z", {"z": "&"}]]},
|
|
||||||
|
|
||||||
{"description":"\" after ampersand in single-quoted attribute value",
|
|
||||||
"input":"<z z='&\"'>",
|
|
||||||
"output":[["StartTag", "z", {"z": "&\""}]]},
|
|
||||||
|
|
||||||
{"description":"Text after bogus character reference",
|
|
||||||
"input":"<z z='&xlink_xmlns;'>bar<z>",
|
|
||||||
"output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
|
|
||||||
|
|
||||||
{"description":"Text after hex character reference",
|
|
||||||
"input":"<z z='  foo'>bar<z>",
|
|
||||||
"output":[["StartTag","z",{"z":" foo"}],["Character","bar"],["StartTag","z",{}]]},
|
|
||||||
|
|
||||||
{"description":"Attribute name starting with \"",
|
|
||||||
"input":"<foo \"='bar'>",
|
|
||||||
"output":["ParseError", ["StartTag", "foo", {"\"": "bar"}]]},
|
|
||||||
|
|
||||||
{"description":"Attribute name starting with '",
|
|
||||||
"input":"<foo '='bar'>",
|
|
||||||
"output":["ParseError", ["StartTag", "foo", {"'": "bar"}]]},
|
|
||||||
|
|
||||||
{"description":"Attribute name containing \"",
|
|
||||||
"input":"<foo a\"b='bar'>",
|
|
||||||
"output":["ParseError", ["StartTag", "foo", {"a\"b": "bar"}]]},
|
|
||||||
|
|
||||||
{"description":"Attribute name containing '",
|
|
||||||
"input":"<foo a'b='bar'>",
|
|
||||||
"output":["ParseError", ["StartTag", "foo", {"a'b": "bar"}]]},
|
|
||||||
|
|
||||||
{"description":"Unquoted attribute value containing '",
|
|
||||||
"input":"<foo a=b'c>",
|
|
||||||
"output":["ParseError", ["StartTag", "foo", {"a": "b'c"}]]},
|
|
||||||
|
|
||||||
{"description":"Unquoted attribute value containing \"",
|
|
||||||
"input":"<foo a=b\"c>",
|
|
||||||
"output":["ParseError", ["StartTag", "foo", {"a": "b\"c"}]]},
|
|
||||||
|
|
||||||
{"description":"Double-quoted attribute value not followed by whitespace",
|
|
||||||
"input":"<foo a=\"b\"c>",
|
|
||||||
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
|
|
||||||
|
|
||||||
{"description":"Single-quoted attribute value not followed by whitespace",
|
|
||||||
"input":"<foo a='b'c>",
|
|
||||||
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
|
|
||||||
|
|
||||||
{"description":"Quoted attribute followed by permitted /",
|
|
||||||
"input":"<br a='b'/>",
|
|
||||||
"output":[["StartTag","br",{"a":"b"},true]]},
|
|
||||||
|
|
||||||
{"description":"Quoted attribute followed by non-permitted /",
|
|
||||||
"input":"<bar a='b'/>",
|
|
||||||
"output":[["StartTag","bar",{"a":"b"},true]]},
|
|
||||||
|
|
||||||
{"description":"CR EOF after doctype name",
|
|
||||||
"input":"<!doctype html \r",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
||||||
|
|
||||||
{"description":"CR EOF in tag name",
|
|
||||||
"input":"<z\r",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"Slash EOF in tag name",
|
|
||||||
"input":"<z/",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"Zero hex numeric entity",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"Zero decimal numeric entity",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"Zero-prefixed hex numeric entity",
|
|
||||||
"input":"A",
|
|
||||||
"output":[["Character", "A"]]},
|
|
||||||
|
|
||||||
{"description":"Zero-prefixed decimal numeric entity",
|
|
||||||
"input":"A",
|
|
||||||
"output":[["Character", "A"]]},
|
|
||||||
|
|
||||||
{"description":"Empty hex numeric entities",
|
|
||||||
"input":"&#x &#X ",
|
|
||||||
"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
|
|
||||||
|
|
||||||
{"description":"Empty decimal numeric entities",
|
|
||||||
"input":"&# &#; ",
|
|
||||||
"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
|
|
||||||
|
|
||||||
{"description":"Non-BMP numeric entity",
|
|
||||||
"input":"𐀀",
|
|
||||||
"output":[["Character", "\uD800\uDC00"]]},
|
|
||||||
|
|
||||||
{"description":"Maximum non-BMP numeric entity",
|
|
||||||
"input":"",
|
|
||||||
"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]},
|
|
||||||
|
|
||||||
{"description":"Above maximum numeric entity",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"32-bit hex numeric entity",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"33-bit hex numeric entity",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"33-bit decimal numeric entity",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"65-bit hex numeric entity",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"65-bit decimal numeric entity",
|
|
||||||
"input":"�",
|
|
||||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
||||||
|
|
||||||
{"description":"Surrogate code point edge cases",
|
|
||||||
"input":"퟿����",
|
|
||||||
"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
|
|
||||||
|
|
||||||
{"description":"Uppercase start tag name",
|
|
||||||
"input":"<X>",
|
|
||||||
"output":[["StartTag", "x", {}]]},
|
|
||||||
|
|
||||||
{"description":"Uppercase end tag name",
|
|
||||||
"input":"</X>",
|
|
||||||
"output":[["EndTag", "x"]]},
|
|
||||||
|
|
||||||
{"description":"Uppercase attribute name",
|
|
||||||
"input":"<x X>",
|
|
||||||
"output":[["StartTag", "x", { "x":"" }]]},
|
|
||||||
|
|
||||||
{"description":"Tag/attribute name case edge values",
|
|
||||||
"input":"<x@AZ[`az{ @AZ[`az{>",
|
|
||||||
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
|
|
||||||
|
|
||||||
{"description":"Duplicate different-case attributes",
|
|
||||||
"input":"<x x=1 x=2 X=3>",
|
|
||||||
"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
|
|
||||||
|
|
||||||
{"description":"Uppercase close tag attributes",
|
|
||||||
"input":"</x X>",
|
|
||||||
"output":["ParseError", ["EndTag", "x"]]},
|
|
||||||
|
|
||||||
{"description":"Duplicate close tag attributes",
|
|
||||||
"input":"</x x x>",
|
|
||||||
"output":["ParseError", "ParseError", ["EndTag", "x"]]},
|
|
||||||
|
|
||||||
{"description":"Permitted slash",
|
|
||||||
"input":"<br/>",
|
|
||||||
"output":[["StartTag","br",{},true]]},
|
|
||||||
|
|
||||||
{"description":"Non-permitted slash",
|
|
||||||
"input":"<xr/>",
|
|
||||||
"output":[["StartTag","xr",{},true]]},
|
|
||||||
|
|
||||||
{"description":"Permitted slash but in close tag",
|
|
||||||
"input":"</br/>",
|
|
||||||
"output":["ParseError", ["EndTag", "br"]]},
|
|
||||||
|
|
||||||
{"description":"Doctype public case-sensitivity (1)",
|
|
||||||
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
|
|
||||||
"output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
|
|
||||||
|
|
||||||
{"description":"Doctype public case-sensitivity (2)",
|
|
||||||
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
|
|
||||||
"output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
|
|
||||||
|
|
||||||
{"description":"Doctype system case-sensitivity (1)",
|
|
||||||
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
|
|
||||||
"output":[["DOCTYPE", "html", null, "XyZ", true]]},
|
|
||||||
|
|
||||||
{"description":"Doctype system case-sensitivity (2)",
|
|
||||||
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
|
|
||||||
"output":[["DOCTYPE", "html", null, "xYz", true]]},
|
|
||||||
|
|
||||||
{"description":"U+0000 in lookahead region after non-matching character",
|
|
||||||
"input":"<!doc>\u0000",
|
|
||||||
"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\u0000"]],
|
|
||||||
"ignoreErrorOrder":true},
|
|
||||||
|
|
||||||
{"description":"U+0000 in lookahead region",
|
|
||||||
"input":"<!doc\u0000",
|
|
||||||
"output":["ParseError", ["Comment", "doc\uFFFD"]],
|
|
||||||
"ignoreErrorOrder":true},
|
|
||||||
|
|
||||||
{"description":"U+0080 in lookahead region",
|
|
||||||
"input":"<!doc\u0080",
|
|
||||||
"output":["ParseError", "ParseError", ["Comment", "doc\u0080"]],
|
|
||||||
"ignoreErrorOrder":true},
|
|
||||||
|
|
||||||
{"description":"U+FDD1 in lookahead region",
|
|
||||||
"input":"<!doc\uFDD1",
|
|
||||||
"output":["ParseError", "ParseError", ["Comment", "doc\uFDD1"]],
|
|
||||||
"ignoreErrorOrder":true},
|
|
||||||
|
|
||||||
{"description":"U+1FFFF in lookahead region",
|
|
||||||
"input":"<!doc\uD83F\uDFFF",
|
|
||||||
"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
|
|
||||||
"ignoreErrorOrder":true},
|
|
||||||
|
|
||||||
{"description":"CR followed by non-LF",
|
|
||||||
"input":"\r?",
|
|
||||||
"output":[["Character", "\n?"]]},
|
|
||||||
|
|
||||||
{"description":"CR at EOF",
|
|
||||||
"input":"\r",
|
|
||||||
"output":[["Character", "\n"]]},
|
|
||||||
|
|
||||||
{"description":"LF at EOF",
|
|
||||||
"input":"\n",
|
|
||||||
"output":[["Character", "\n"]]},
|
|
||||||
|
|
||||||
{"description":"CR LF",
|
|
||||||
"input":"\r\n",
|
|
||||||
"output":[["Character", "\n"]]},
|
|
||||||
|
|
||||||
{"description":"CR CR",
|
|
||||||
"input":"\r\r",
|
|
||||||
"output":[["Character", "\n\n"]]},
|
|
||||||
|
|
||||||
{"description":"LF LF",
|
|
||||||
"input":"\n\n",
|
|
||||||
"output":[["Character", "\n\n"]]},
|
|
||||||
|
|
||||||
{"description":"LF CR",
|
|
||||||
"input":"\n\r",
|
|
||||||
"output":[["Character", "\n\n"]]},
|
|
||||||
|
|
||||||
{"description":"text CR CR CR text",
|
|
||||||
"input":"text\r\r\rtext",
|
|
||||||
"output":[["Character", "text\n\n\ntext"]]},
|
|
||||||
|
|
||||||
{"description":"Doctype publik",
|
|
||||||
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
||||||
|
|
||||||
{"description":"Doctype publi",
|
|
||||||
"input":"<!DOCTYPE html PUBLI",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
||||||
|
|
||||||
{"description":"Doctype sistem",
|
|
||||||
"input":"<!DOCTYPE html SISTEM \"AbC\">",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
||||||
|
|
||||||
{"description":"Doctype sys",
|
|
||||||
"input":"<!DOCTYPE html SYS",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
||||||
|
|
||||||
{"description":"Doctype html x>text",
|
|
||||||
"input":"<!DOCTYPE html x>text",
|
|
||||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]},
|
|
||||||
|
|
||||||
{"description":"Grave accent in unquoted attribute",
|
|
||||||
"input":"<a a=aa`>",
|
|
||||||
"output":["ParseError", ["StartTag", "a", {"a":"aa`"}]]},
|
|
||||||
|
|
||||||
{"description":"EOF in tag name state ",
|
|
||||||
"input":"<a",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"EOF in tag name state",
|
|
||||||
"input":"<a",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"EOF in before attribute name state",
|
|
||||||
"input":"<a ",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"EOF in attribute name state",
|
|
||||||
"input":"<a a",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"EOF in after attribute name state",
|
|
||||||
"input":"<a a ",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"EOF in before attribute value state",
|
|
||||||
"input":"<a a =",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"EOF in attribute value (double quoted) state",
|
|
||||||
"input":"<a a =\"a",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"EOF in attribute value (single quoted) state",
|
|
||||||
"input":"<a a ='a",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"EOF in attribute value (unquoted) state",
|
|
||||||
"input":"<a a =a",
|
|
||||||
"output":["ParseError"]},
|
|
||||||
|
|
||||||
{"description":"EOF in after attribute value state",
|
|
||||||
"input":"<a a ='a'",
|
|
||||||
"output":["ParseError"]}
|
|
||||||
|
|
||||||
]}
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,31 +0,0 @@
|
||||||
{"tests" : [
|
|
||||||
{"description": "Invalid Unicode character U+DFFF",
|
|
||||||
"doubleEscaped":true,
|
|
||||||
"input": "\\uDFFF",
|
|
||||||
"output":["ParseError", ["Character", "\\uDFFF"]],
|
|
||||||
"ignoreErrorOrder":true},
|
|
||||||
|
|
||||||
{"description": "Invalid Unicode character U+D800",
|
|
||||||
"doubleEscaped":true,
|
|
||||||
"input": "\\uD800",
|
|
||||||
"output":["ParseError", ["Character", "\\uD800"]],
|
|
||||||
"ignoreErrorOrder":true},
|
|
||||||
|
|
||||||
{"description": "Invalid Unicode character U+DFFF with valid preceding character",
|
|
||||||
"doubleEscaped":true,
|
|
||||||
"input": "a\\uDFFF",
|
|
||||||
"output":[["Character", "a"], "ParseError", ["Character", "\\uDFFF"]],
|
|
||||||
"ignoreErrorOrder":true},
|
|
||||||
|
|
||||||
{"description": "Invalid Unicode character U+D800 with valid following character",
|
|
||||||
"doubleEscaped":true,
|
|
||||||
"input": "\\uD800a",
|
|
||||||
"output":["ParseError", ["Character", "\\uD800a"]],
|
|
||||||
"ignoreErrorOrder":true},
|
|
||||||
|
|
||||||
{"description":"CR followed by U+0000",
|
|
||||||
"input":"\r\u0000",
|
|
||||||
"output":[["Character", "\n"], "ParseError", ["Character", "\u0000"]],
|
|
||||||
"ignoreErrorOrder":true}
|
|
||||||
]
|
|
||||||
}
|
|
|
@ -1,22 +0,0 @@
|
||||||
{"xmlViolationTests": [
|
|
||||||
|
|
||||||
{"description":"Non-XML character",
|
|
||||||
"input":"a\uFFFFb",
|
|
||||||
"ignoreErrorOrder":true,
|
|
||||||
"output":["ParseError",["Character","a\uFFFDb"]]},
|
|
||||||
|
|
||||||
{"description":"Non-XML space",
|
|
||||||
"input":"a\u000Cb",
|
|
||||||
"ignoreErrorOrder":true,
|
|
||||||
"output":[["Character","a b"]]},
|
|
||||||
|
|
||||||
{"description":"Double hyphen in comment",
|
|
||||||
"input":"<!-- foo -- bar -->",
|
|
||||||
"output":["ParseError",["Comment"," foo - - bar "]]},
|
|
||||||
|
|
||||||
{"description":"FF between attributes",
|
|
||||||
"input":"<a b=''\u000Cc=''>",
|
|
||||||
"output":[["StartTag","a",{"b":"","c":""}]]}
|
|
||||||
]}
|
|
||||||
|
|
||||||
|
|
|
@ -1,104 +0,0 @@
|
||||||
Tree Construction Tests
|
|
||||||
=======================
|
|
||||||
|
|
||||||
Each file containing tree construction tests consists of any number of
|
|
||||||
tests separated by two newlines (LF) and a single newline before the end
|
|
||||||
of the file. For instance:
|
|
||||||
|
|
||||||
[TEST]LF
|
|
||||||
LF
|
|
||||||
[TEST]LF
|
|
||||||
LF
|
|
||||||
[TEST]LF
|
|
||||||
|
|
||||||
Where [TEST] is the following format:
|
|
||||||
|
|
||||||
Each test must begin with a string "\#data" followed by a newline (LF).
|
|
||||||
All subsequent lines until a line that says "\#errors" are the test data
|
|
||||||
and must be passed to the system being tested unchanged, except with the
|
|
||||||
final newline (on the last line) removed.
|
|
||||||
|
|
||||||
Then there must be a line that says "\#errors". It must be followed by
|
|
||||||
one line per parse error that a conformant checker would return. It
|
|
||||||
doesn't matter what those lines are, although they can't be
|
|
||||||
"\#document-fragment", "\#document", "\#script-off", "\#script-on", or
|
|
||||||
empty, the only thing that matters is that there be the right number
|
|
||||||
of parse errors.
|
|
||||||
|
|
||||||
Then there \*may\* be a line that says "\#document-fragment", which must
|
|
||||||
be followed by a newline (LF), followed by a string of characters that
|
|
||||||
indicates the context element, followed by a newline (LF). If the string
|
|
||||||
of characters starts with "svg ", the context element is in the SVG
|
|
||||||
namespace and the substring after "svg " is the local name. If the
|
|
||||||
string of characters starts with "math ", the context element is in the
|
|
||||||
MathML namespace and the substring after "math " is the local name.
|
|
||||||
Otherwise, the context element is in the HTML namespace and the string
|
|
||||||
is the local name. If this line is present the "\#data" must be parsed
|
|
||||||
using the HTML fragment parsing algorithm with the context element as
|
|
||||||
context.
|
|
||||||
|
|
||||||
Then there \*may\* be a line that says "\#script-off" or
|
|
||||||
"\#script-on". If a line that says "\#script-off" is present, the
|
|
||||||
parser must set the scripting flag to disabled. If a line that says
|
|
||||||
"\#script-on" is present, it must set it to enabled. Otherwise, the
|
|
||||||
test should be run in both modes.
|
|
||||||
|
|
||||||
Then there must be a line that says "\#document", which must be followed
|
|
||||||
by a dump of the tree of the parsed DOM. Each node must be represented
|
|
||||||
by a single line. Each line must start with "| ", followed by two spaces
|
|
||||||
per parent node that the node has before the root document node.
|
|
||||||
|
|
||||||
- Element nodes must be represented by a "`<`" then the *tag name
|
|
||||||
string* "`>`", and all the attributes must be given, sorted
|
|
||||||
lexicographically by UTF-16 code unit according to their *attribute
|
|
||||||
name string*, on subsequent lines, as if they were children of the
|
|
||||||
element node.
|
|
||||||
- Attribute nodes must have the *attribute name string*, then an "="
|
|
||||||
sign, then the attribute value in double quotes (").
|
|
||||||
- Text nodes must be the string, in double quotes. Newlines aren't
|
|
||||||
escaped.
|
|
||||||
- Comments must be "`<`" then "`!-- `" then the data then "` -->`".
|
|
||||||
- DOCTYPEs must be "`<!DOCTYPE `" then the name then if either of the
|
|
||||||
system id or public id is non-empty a space, public id in
|
|
||||||
double-quotes, another space an the system id in double-quotes, and
|
|
||||||
then in any case "`>`".
|
|
||||||
- Processing instructions must be "`<?`", then the target, then a
|
|
||||||
space, then the data and then "`>`". (The HTML parser cannot emit
|
|
||||||
processing instructions, but scripts can, and the WebVTT to DOM
|
|
||||||
rules can emit them.)
|
|
||||||
- Template contents are represented by the string "content" with the
|
|
||||||
children below it.
|
|
||||||
|
|
||||||
The *tag name string* is the local name prefixed by a namespace
|
|
||||||
designator. For the HTML namespace, the namespace designator is the
|
|
||||||
empty string, i.e. there's no prefix. For the SVG namespace, the
|
|
||||||
namespace designator is "svg ". For the MathML namespace, the namespace
|
|
||||||
designator is "math ".
|
|
||||||
|
|
||||||
The *attribute name string* is the local name prefixed by a namespace
|
|
||||||
designator. For no namespace, the namespace designator is the empty
|
|
||||||
string, i.e. there's no prefix. For the XLink namespace, the namespace
|
|
||||||
designator is "xlink ". For the XML namespace, the namespace designator
|
|
||||||
is "xml ". For the XMLNS namespace, the namespace designator is "xmlns
|
|
||||||
". Note the difference between "xlink:href" which is an attribute in no
|
|
||||||
namespace with the local name "xlink:href" and "xlink href" which is an
|
|
||||||
attribute in the xlink namespace with the local name "href".
|
|
||||||
|
|
||||||
If there is also a "\#document-fragment" the bit following "\#document"
|
|
||||||
must be a representation of the HTML fragment serialization for the
|
|
||||||
context element given by "\#document-fragment".
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
#data
|
|
||||||
<p>One<p>Two
|
|
||||||
#errors
|
|
||||||
3: Missing document type declaration
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <p>
|
|
||||||
| "One"
|
|
||||||
| <p>
|
|
||||||
| "Two"
|
|
|
@ -1,354 +0,0 @@
|
||||||
#data
|
|
||||||
<a><p></a></p>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,10): adoption-agency-1.3
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <a>
|
|
||||||
| <p>
|
|
||||||
| <a>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<a>1<p>2</a>3</p>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,12): adoption-agency-1.3
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <a>
|
|
||||||
| "1"
|
|
||||||
| <p>
|
|
||||||
| <a>
|
|
||||||
| "2"
|
|
||||||
| "3"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<a>1<button>2</a>3</button>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,17): adoption-agency-1.3
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <a>
|
|
||||||
| "1"
|
|
||||||
| <button>
|
|
||||||
| <a>
|
|
||||||
| "2"
|
|
||||||
| "3"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<a>1<b>2</a>3</b>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,12): adoption-agency-1.3
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <a>
|
|
||||||
| "1"
|
|
||||||
| <b>
|
|
||||||
| "2"
|
|
||||||
| <b>
|
|
||||||
| "3"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<a>1<div>2<div>3</a>4</div>5</div>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,20): adoption-agency-1.3
|
|
||||||
(1,20): adoption-agency-1.3
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <a>
|
|
||||||
| "1"
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| "2"
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| "3"
|
|
||||||
| "4"
|
|
||||||
| "5"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<table><a>1<p>2</a>3</p>
|
|
||||||
#errors
|
|
||||||
(1,7): expected-doctype-but-got-start-tag
|
|
||||||
(1,10): unexpected-start-tag-implies-table-voodoo
|
|
||||||
(1,11): unexpected-character-implies-table-voodoo
|
|
||||||
(1,14): unexpected-start-tag-implies-table-voodoo
|
|
||||||
(1,15): unexpected-character-implies-table-voodoo
|
|
||||||
(1,19): unexpected-end-tag-implies-table-voodoo
|
|
||||||
(1,19): adoption-agency-1.3
|
|
||||||
(1,20): unexpected-character-implies-table-voodoo
|
|
||||||
(1,24): unexpected-end-tag-implies-table-voodoo
|
|
||||||
(1,24): eof-in-table
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <a>
|
|
||||||
| "1"
|
|
||||||
| <p>
|
|
||||||
| <a>
|
|
||||||
| "2"
|
|
||||||
| "3"
|
|
||||||
| <table>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<b><b><a><p></a>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,16): adoption-agency-1.3
|
|
||||||
(1,16): expected-closing-tag-but-got-eof
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <a>
|
|
||||||
| <p>
|
|
||||||
| <a>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<b><a><b><p></a>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,16): adoption-agency-1.3
|
|
||||||
(1,16): expected-closing-tag-but-got-eof
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <b>
|
|
||||||
| <a>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <p>
|
|
||||||
| <a>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<a><b><b><p></a>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,16): adoption-agency-1.3
|
|
||||||
(1,16): expected-closing-tag-but-got-eof
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <a>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <p>
|
|
||||||
| <a>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,30): unexpected-end-tag
|
|
||||||
(1,35): adoption-agency-1.3
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <p>
|
|
||||||
| "1"
|
|
||||||
| <s>
|
|
||||||
| id="A"
|
|
||||||
| "2"
|
|
||||||
| <b>
|
|
||||||
| id="B"
|
|
||||||
| "3"
|
|
||||||
| <s>
|
|
||||||
| id="A"
|
|
||||||
| <b>
|
|
||||||
| id="B"
|
|
||||||
| "4"
|
|
||||||
| <b>
|
|
||||||
| id="B"
|
|
||||||
| "5"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<table><a>1<td>2</td>3</table>
|
|
||||||
#errors
|
|
||||||
(1,7): expected-doctype-but-got-start-tag
|
|
||||||
(1,10): unexpected-start-tag-implies-table-voodoo
|
|
||||||
(1,11): unexpected-character-implies-table-voodoo
|
|
||||||
(1,15): unexpected-cell-in-table-body
|
|
||||||
(1,30): unexpected-implied-end-tag-in-table-view
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <a>
|
|
||||||
| "1"
|
|
||||||
| <a>
|
|
||||||
| "3"
|
|
||||||
| <table>
|
|
||||||
| <tbody>
|
|
||||||
| <tr>
|
|
||||||
| <td>
|
|
||||||
| "2"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<table>A<td>B</td>C</table>
|
|
||||||
#errors
|
|
||||||
(1,7): expected-doctype-but-got-start-tag
|
|
||||||
(1,8): unexpected-character-implies-table-voodoo
|
|
||||||
(1,12): unexpected-cell-in-table-body
|
|
||||||
(1,22): unexpected-character-implies-table-voodoo
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "AC"
|
|
||||||
| <table>
|
|
||||||
| <tbody>
|
|
||||||
| <tr>
|
|
||||||
| <td>
|
|
||||||
| "B"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<a><svg><tr><input></a>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,23): unexpected-end-tag
|
|
||||||
(1,23): adoption-agency-1.3
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <a>
|
|
||||||
| <svg svg>
|
|
||||||
| <svg tr>
|
|
||||||
| <svg input>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<div><a><b><div><div><div><div><div><div><div><div><div><div></a>
|
|
||||||
#errors
|
|
||||||
(1,5): expected-doctype-but-got-start-tag
|
|
||||||
(1,65): adoption-agency-1.3
|
|
||||||
(1,65): adoption-agency-1.3
|
|
||||||
(1,65): adoption-agency-1.3
|
|
||||||
(1,65): adoption-agency-1.3
|
|
||||||
(1,65): adoption-agency-1.3
|
|
||||||
(1,65): adoption-agency-1.3
|
|
||||||
(1,65): adoption-agency-1.3
|
|
||||||
(1,65): adoption-agency-1.3
|
|
||||||
(1,65): expected-closing-tag-but-got-eof
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| <div>
|
|
||||||
| <div>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<div><a><b><u><i><code><div></a>
|
|
||||||
#errors
|
|
||||||
(1,5): expected-doctype-but-got-start-tag
|
|
||||||
(1,32): adoption-agency-1.3
|
|
||||||
(1,32): expected-closing-tag-but-got-eof
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| <b>
|
|
||||||
| <u>
|
|
||||||
| <i>
|
|
||||||
| <code>
|
|
||||||
| <u>
|
|
||||||
| <i>
|
|
||||||
| <code>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<b><b><b><b>x</b></b></b></b>y
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| "x"
|
|
||||||
| "y"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<p><b><b><b><b><p>x
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,18): unexpected-end-tag
|
|
||||||
(1,19): expected-closing-tag-but-got-eof
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <p>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <p>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| <b>
|
|
||||||
| "x"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<b><em><foo><foob><fooc><aside></b></em>
|
|
||||||
#errors
|
|
||||||
(1,35): adoption-agency-1.3
|
|
||||||
(1,40): adoption-agency-1.3
|
|
||||||
(1,40): expected-closing-tag-but-got-eof
|
|
||||||
#document-fragment
|
|
||||||
div
|
|
||||||
#document
|
|
||||||
| <b>
|
|
||||||
| <em>
|
|
||||||
| <foo>
|
|
||||||
| <foob>
|
|
||||||
| <fooc>
|
|
||||||
| <aside>
|
|
||||||
| <b>
|
|
|
@ -1,39 +0,0 @@
|
||||||
#data
|
|
||||||
<b>1<i>2<p>3</b>4
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,16): adoption-agency-1.3
|
|
||||||
(1,17): expected-closing-tag-but-got-eof
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <b>
|
|
||||||
| "1"
|
|
||||||
| <i>
|
|
||||||
| "2"
|
|
||||||
| <i>
|
|
||||||
| <p>
|
|
||||||
| <b>
|
|
||||||
| "3"
|
|
||||||
| "4"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<a><div><style></style><address><a>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-start-tag
|
|
||||||
(1,35): unexpected-start-tag-implies-end-tag
|
|
||||||
(1,35): adoption-agency-1.3
|
|
||||||
(1,35): adoption-agency-1.3
|
|
||||||
(1,35): expected-closing-tag-but-got-eof
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <a>
|
|
||||||
| <div>
|
|
||||||
| <a>
|
|
||||||
| <style>
|
|
||||||
| <address>
|
|
||||||
| <a>
|
|
||||||
| <a>
|
|
|
@ -1,178 +0,0 @@
|
||||||
#data
|
|
||||||
FOO<!-- BAR -->BAZ
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO"
|
|
||||||
| <!-- BAR -->
|
|
||||||
| "BAZ"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO<!-- BAR --!>BAZ
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,15): unexpected-bang-after-double-dash-in-comment
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO"
|
|
||||||
| <!-- BAR -->
|
|
||||||
| "BAZ"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO<!-- BAR -- >BAZ
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,15): unexpected-char-in-comment
|
|
||||||
(1,21): eof-in-comment
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO"
|
|
||||||
| <!-- BAR -- >BAZ -->
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO<!-- BAR -- <QUX> -- MUX -->BAZ
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,15): unexpected-char-in-comment
|
|
||||||
(1,24): unexpected-char-in-comment
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO"
|
|
||||||
| <!-- BAR -- <QUX> -- MUX -->
|
|
||||||
| "BAZ"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,15): unexpected-char-in-comment
|
|
||||||
(1,24): unexpected-char-in-comment
|
|
||||||
(1,31): unexpected-bang-after-double-dash-in-comment
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO"
|
|
||||||
| <!-- BAR -- <QUX> -- MUX -->
|
|
||||||
| "BAZ"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,15): unexpected-char-in-comment
|
|
||||||
(1,24): unexpected-char-in-comment
|
|
||||||
(1,31): unexpected-char-in-comment
|
|
||||||
(1,35): eof-in-comment
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO"
|
|
||||||
| <!-- BAR -- <QUX> -- MUX -- >BAZ -->
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO<!---->BAZ
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO"
|
|
||||||
| <!-- -->
|
|
||||||
| "BAZ"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO<!--->BAZ
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,9): incorrect-comment
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO"
|
|
||||||
| <!-- -->
|
|
||||||
| "BAZ"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO<!-->BAZ
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,8): incorrect-comment
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO"
|
|
||||||
| <!-- -->
|
|
||||||
| "BAZ"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<?xml version="1.0">Hi
|
|
||||||
#errors
|
|
||||||
(1,1): expected-tag-name-but-got-question-mark
|
|
||||||
(1,22): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <!-- ?xml version="1.0" -->
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hi"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<?xml version="1.0">
|
|
||||||
#errors
|
|
||||||
(1,1): expected-tag-name-but-got-question-mark
|
|
||||||
(1,20): expected-doctype-but-got-eof
|
|
||||||
#document
|
|
||||||
| <!-- ?xml version="1.0" -->
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<?xml version
|
|
||||||
#errors
|
|
||||||
(1,1): expected-tag-name-but-got-question-mark
|
|
||||||
(1,13): expected-doctype-but-got-eof
|
|
||||||
#document
|
|
||||||
| <!-- ?xml version -->
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO<!----->BAZ
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,10): unexpected-dash-after-double-dash-in-comment
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO"
|
|
||||||
| <!-- - -->
|
|
||||||
| "BAZ"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<html><!-- comment --><title>Comment before head</title>
|
|
||||||
#errors
|
|
||||||
(1,6): expected-doctype-but-got-start-tag
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <!-- comment -->
|
|
||||||
| <head>
|
|
||||||
| <title>
|
|
||||||
| "Comment before head"
|
|
||||||
| <body>
|
|
|
@ -1,424 +0,0 @@
|
||||||
#data
|
|
||||||
<!DOCTYPE html>Hello
|
|
||||||
#errors
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!dOctYpE HtMl>Hello
|
|
||||||
#errors
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPEhtml>Hello
|
|
||||||
#errors
|
|
||||||
(1,9): need-space-after-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE>Hello
|
|
||||||
#errors
|
|
||||||
(1,9): need-space-after-doctype
|
|
||||||
(1,10): expected-doctype-name-but-got-right-bracket
|
|
||||||
(1,10): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE >
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE >Hello
|
|
||||||
#errors
|
|
||||||
(1,11): expected-doctype-name-but-got-right-bracket
|
|
||||||
(1,11): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE >
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato>Hello
|
|
||||||
#errors
|
|
||||||
(1,17): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato >Hello
|
|
||||||
#errors
|
|
||||||
(1,18): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato taco>Hello
|
|
||||||
#errors
|
|
||||||
(1,17): expected-space-or-right-bracket-in-doctype
|
|
||||||
(1,22): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato taco "ddd>Hello
|
|
||||||
#errors
|
|
||||||
(1,17): expected-space-or-right-bracket-in-doctype
|
|
||||||
(1,27): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato sYstEM>Hello
|
|
||||||
#errors
|
|
||||||
(1,24): unexpected-char-in-doctype
|
|
||||||
(1,24): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato sYstEM >Hello
|
|
||||||
#errors
|
|
||||||
(1,28): unexpected-char-in-doctype
|
|
||||||
(1,28): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato sYstEM ggg>Hello
|
|
||||||
#errors
|
|
||||||
(1,34): unexpected-char-in-doctype
|
|
||||||
(1,37): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato SYSTEM taco >Hello
|
|
||||||
#errors
|
|
||||||
(1,25): unexpected-char-in-doctype
|
|
||||||
(1,31): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato SYSTEM 'taco"'>Hello
|
|
||||||
#errors
|
|
||||||
(1,32): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato "" "taco"">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato SYSTEM "taco">Hello
|
|
||||||
#errors
|
|
||||||
(1,31): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato "" "taco">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato SYSTEM "tai'co">Hello
|
|
||||||
#errors
|
|
||||||
(1,33): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato "" "tai'co">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato SYSTEMtaco "ddd">Hello
|
|
||||||
#errors
|
|
||||||
(1,24): unexpected-char-in-doctype
|
|
||||||
(1,34): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato grass SYSTEM taco>Hello
|
|
||||||
#errors
|
|
||||||
(1,17): expected-space-or-right-bracket-in-doctype
|
|
||||||
(1,35): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato pUbLIc>Hello
|
|
||||||
#errors
|
|
||||||
(1,24): unexpected-end-of-doctype
|
|
||||||
(1,24): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato pUbLIc >Hello
|
|
||||||
#errors
|
|
||||||
(1,25): unexpected-end-of-doctype
|
|
||||||
(1,25): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato pUbLIcgoof>Hello
|
|
||||||
#errors
|
|
||||||
(1,24): unexpected-char-in-doctype
|
|
||||||
(1,28): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato PUBLIC goof>Hello
|
|
||||||
#errors
|
|
||||||
(1,25): unexpected-char-in-doctype
|
|
||||||
(1,29): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato PUBLIC "go'of">Hello
|
|
||||||
#errors
|
|
||||||
(1,32): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato "go'of" "">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato PUBLIC 'go'of'>Hello
|
|
||||||
#errors
|
|
||||||
(1,29): unexpected-char-in-doctype
|
|
||||||
(1,32): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato "go" "">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato PUBLIC 'go:hh of' >Hello
|
|
||||||
#errors
|
|
||||||
(1,38): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato "go:hh of" "">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
|
|
||||||
#errors
|
|
||||||
(1,38): unexpected-char-in-doctype
|
|
||||||
(1,48): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE potato "W3C-//dfdf" "">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
|
||||||
"http://www.w3.org/TR/html4/strict.dtd">Hello
|
|
||||||
#errors
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE ...>Hello
|
|
||||||
#errors
|
|
||||||
(1,14): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE ...>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "Hello"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
||||||
#errors
|
|
||||||
(2,58): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
|
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
|
|
||||||
#errors
|
|
||||||
(2,54): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [
|
|
||||||
<!-- internal declarations -->
|
|
||||||
]>
|
|
||||||
#errors
|
|
||||||
(1,23): expected-space-or-right-bracket-in-doctype
|
|
||||||
(2,30): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE root-element>
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "]>"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE html PUBLIC
|
|
||||||
"-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
|
|
||||||
"http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
|
|
||||||
#errors
|
|
||||||
(3,53): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
|
|
||||||
#errors
|
|
||||||
(1,63): unknown-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| <b>
|
|
||||||
| "Mine!"
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
|
|
||||||
#errors
|
|
||||||
(1,50): unexpected-char-in-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
|
|
||||||
#errors
|
|
||||||
(1,50): unexpected-char-in-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
|
|
||||||
#errors
|
|
||||||
(1,21): unexpected-char-in-doctype
|
|
||||||
(1,49): unexpected-char-in-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
|
|
||||||
#data
|
|
||||||
<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
|
|
||||||
#errors
|
|
||||||
(1,21): unexpected-char-in-doctype
|
|
||||||
(1,49): unexpected-char-in-doctype
|
|
||||||
#document
|
|
||||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
Binary file not shown.
|
@ -1,795 +0,0 @@
|
||||||
#data
|
|
||||||
FOO>BAR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO>BAR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO>BAR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,6): named-entity-without-semicolon
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO>BAR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO> BAR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,6): named-entity-without-semicolon
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO> BAR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO>;;BAR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO>;;BAR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
I'm ¬it; I tell you
|
|
||||||
#errors
|
|
||||||
(1,4): expected-doctype-but-got-chars
|
|
||||||
(1,9): named-entity-without-semicolon
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "I'm ¬it; I tell you"
|
|
||||||
|
|
||||||
#data
|
|
||||||
I'm ∉ I tell you
|
|
||||||
#errors
|
|
||||||
(1,4): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "I'm ∉ I tell you"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO& BAR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO& BAR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO&<BAR>
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,9): expected-closing-tag-but-got-eof
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO&"
|
|
||||||
| <bar>
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO&&&>BAR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO&&&>BAR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO)BAR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO)BAR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOABAR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOABAR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOABAR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOABAR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO&#BAR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,5): expected-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO&#BAR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO&#ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,5): expected-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO&#ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOºR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,7): expected-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOºR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO&#xZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,6): expected-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO&#xZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO&#XZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,6): expected-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO&#XZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO)BAR
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,7): numeric-entity-without-semicolon
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO)BAR"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO䆺R
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,10): numeric-entity-without-semicolon
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO䆺R"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOAZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,8): numeric-entity-without-semicolon
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOAZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOxZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOxZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOyZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOyZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO€ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO€ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO‚ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO‚ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOƒZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOƒZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO„ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO„ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO…ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO…ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO†ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO†ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO‡ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO‡ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOˆZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOˆZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO‰ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO‰ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOŠZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOŠZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO‹ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO‹ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOŒZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOŒZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOŽZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOŽZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO‘ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO‘ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO’ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO’ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO“ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO“ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO”ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO”ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO•ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO•ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO–ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO–ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO—ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO—ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO˜ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO˜ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO™ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO™ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOšZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOšZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO›ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO›ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOœZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOœZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOžZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOžZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOŸZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOŸZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO퟿ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,11): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,13): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO􈟔ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOOZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,13): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOOZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,13): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,13): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,13): illegal-codepoint-for-numeric-entity
|
|
||||||
(1,13): eof-in-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,13): illegal-codepoint-for-numeric-entity
|
|
||||||
(1,13): eof-in-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,13): illegal-codepoint-for-numeric-entity
|
|
||||||
(1,13): eof-in-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,16): numeric-entity-without-semicolon
|
|
||||||
(1,16): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,15): numeric-entity-without-semicolon
|
|
||||||
(1,15): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>ZOO"
|
|
||||||
|
|
||||||
#data
|
|
||||||
FOO�ZOO
|
|
||||||
#errors
|
|
||||||
(1,3): expected-doctype-but-got-chars
|
|
||||||
(1,17): numeric-entity-without-semicolon
|
|
||||||
(1,17): illegal-codepoint-for-numeric-entity
|
|
||||||
#document
|
|
||||||
| <html>
|
|
||||||
| <head>
|
|
||||||
| <body>
|
|
||||||
| "FOO<4F>ZOO"
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue