from __future__ import absolute_import, division, unicode_literals import itertools import re import warnings from difflib import unified_diff import pytest from .support import TestData, convert, convertExpected, treeTypes from html5lib import html5parser, constants, treewalkers from html5lib.filters.lint import Filter as Lint _attrlist_re = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M) def sortattrs(s): def replace(m): lines = m.group(0).split("\n") lines.sort() return "\n".join(lines) return _attrlist_re.sub(replace, s) class TreeConstructionFile(pytest.File): def collect(self): tests = TestData(str(self.fspath), "data") for i, test in enumerate(tests): yield TreeConstructionTest(str(i), self, testdata=test) class TreeConstructionTest(pytest.Collector): def __init__(self, name, parent=None, config=None, session=None, testdata=None): super(TreeConstructionTest, self).__init__(name, parent, config, session) self.testdata = testdata def collect(self): for treeName, treeAPIs in sorted(treeTypes.items()): for x in itertools.chain(self._getParserTests(treeName, treeAPIs), self._getTreeWalkerTests(treeName, treeAPIs)): yield x def _getParserTests(self, treeName, treeAPIs): if treeAPIs is not None and "adapter" in treeAPIs: return for namespaceHTMLElements in (True, False): if namespaceHTMLElements: nodeid = "%s::parser::namespaced" % treeName else: nodeid = "%s::parser::void-namespace" % treeName item = ParserTest(nodeid, self, self.testdata, treeAPIs["builder"] if treeAPIs is not None else None, namespaceHTMLElements) item.add_marker(getattr(pytest.mark, treeName)) item.add_marker(pytest.mark.parser) if namespaceHTMLElements: item.add_marker(pytest.mark.namespaced) yield item def _getTreeWalkerTests(self, treeName, treeAPIs): nodeid = "%s::treewalker" % treeName item = TreeWalkerTest(nodeid, self, self.testdata, treeAPIs) item.add_marker(getattr(pytest.mark, treeName)) item.add_marker(pytest.mark.treewalker) yield item def convertTreeDump(data): return "\n".join(convert(3)(data).split("\n")[1:]) namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub class ParserTest(pytest.Item): def __init__(self, name, parent, test, treeClass, namespaceHTMLElements): super(ParserTest, self).__init__(name, parent) self.test = test self.treeClass = treeClass self.namespaceHTMLElements = namespaceHTMLElements def runtest(self): if self.treeClass is None: pytest.skip("Treebuilder not loaded") p = html5parser.HTMLParser(tree=self.treeClass, namespaceHTMLElements=self.namespaceHTMLElements) input = self.test['data'] fragmentContainer = self.test['document-fragment'] expected = convertExpected(self.test['document']) expectedErrors = self.test['errors'].split("\n") if self.test['errors'] else [] scripting = False if 'script-on' in self.test: scripting = True with warnings.catch_warnings(): warnings.simplefilter("error") try: if fragmentContainer: document = p.parseFragment(input, fragmentContainer, scripting=scripting) else: document = p.parse(input, scripting=scripting) except constants.DataLossWarning: pytest.skip("data loss warning") output = convertTreeDump(p.tree.testSerializer(document)) expected = expected if self.namespaceHTMLElements: expected = namespaceExpected(r"\1", expected) errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, "\nReceived:", output]) assert expected == output, errorMsg errStr = [] for (line, col), errorcode, datavars in p.errors: assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars)) errStr.append("Line: %i Col: %i %s" % (line, col, constants.E[errorcode] % datavars)) errorMsg2 = "\n".join(["\n\nInput:", input, "\nExpected errors (" + str(len(expectedErrors)) + "):\n" + "\n".join(expectedErrors), "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) if False: # we're currently not testing parse errors assert len(p.errors) == len(expectedErrors), errorMsg2 def repr_failure(self, excinfo): traceback = excinfo.traceback ntraceback = traceback.cut(path=__file__) excinfo.traceback = ntraceback.filter() return excinfo.getrepr(funcargs=True, showlocals=False, style="short", tbfilter=False) class TreeWalkerTest(pytest.Item): def __init__(self, name, parent, test, treeAPIs): super(TreeWalkerTest, self).__init__(name, parent) self.test = test self.treeAPIs = treeAPIs def runtest(self): if self.treeAPIs is None: pytest.skip("Treebuilder not loaded") p = html5parser.HTMLParser(tree=self.treeAPIs["builder"]) input = self.test['data'] fragmentContainer = self.test['document-fragment'] expected = convertExpected(self.test['document']) scripting = False if 'script-on' in self.test: scripting = True with warnings.catch_warnings(): warnings.simplefilter("error") try: if fragmentContainer: document = p.parseFragment(input, fragmentContainer, scripting=scripting) else: document = p.parse(input, scripting=scripting) except constants.DataLossWarning: pytest.skip("data loss warning") poutput = convertTreeDump(p.tree.testSerializer(document)) namespace_expected = namespaceExpected(r"\1", expected) if poutput != namespace_expected: pytest.skip("parser output incorrect") document = self.treeAPIs.get("adapter", lambda x: x)(document) try: output = treewalkers.pprint(Lint(self.treeAPIs["walker"](document))) output = sortattrs(output) expected = sortattrs(expected) diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()], [line + "\n" for line in output.splitlines()], "Expected", "Received")) assert expected == output, "\n".join([ "", "Input:", input, "", "Expected:", expected, "", "Received:", output, "", "Diff:", diff, ]) except NotImplementedError: pytest.skip("tree walker NotImplementedError") def repr_failure(self, excinfo): traceback = excinfo.traceback ntraceback = traceback.cut(path=__file__) excinfo.traceback = ntraceback.filter() return excinfo.getrepr(funcargs=True, showlocals=False, style="short", tbfilter=False)