Skip to content

Commit 9dc49f6

Browse files
committed
Merge pull request #240 from gsnedders/tree_types
Sort out the tokenizer and tree-construction tests; r=nobody!
2 parents b5b91cc + 68d6f34 commit 9dc49f6

File tree

6 files changed

+711
-924
lines changed

6 files changed

+711
-924
lines changed

.pytest.expect

+459-729
Large diffs are not rendered by default.

html5lib/tests/conftest.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import os.path
22

33
from .tree_construction import TreeConstructionFile
4+
from .tokenizer import TokenizerFile
45

56
_dir = os.path.abspath(os.path.dirname(__file__))
67
_testdata = os.path.join(_dir, "testdata")
78
_tree_construction = os.path.join(_testdata, "tree-construction")
9+
_tokenizer = os.path.join(_testdata, "tokenizer")
810

911

1012
def pytest_collectstart():
@@ -19,3 +21,6 @@ def pytest_collect_file(path, parent):
1921
return
2022
if path.ext == ".dat":
2123
return TreeConstructionFile(path, parent)
24+
elif dir == _tokenizer:
25+
if path.ext == ".test":
26+
return TokenizerFile(path, parent)

html5lib/tests/support.py

+33-7
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,24 @@
1313
os.path.pardir,
1414
os.path.pardir)))
1515

16-
from html5lib import treebuilders
16+
from html5lib import treebuilders, treewalkers, treeadapters
1717
del base_path
1818

1919
# Build a dict of available trees
20-
treeTypes = {"DOM": treebuilders.getTreeBuilder("dom")}
20+
treeTypes = {}
2121

22-
# Try whatever etree implementations are available from a list that are
23-
#"supposed" to work
22+
# DOM impls
23+
treeTypes["DOM"] = {
24+
"builder": treebuilders.getTreeBuilder("dom"),
25+
"walker": treewalkers.getTreeWalker("dom")
26+
}
27+
28+
# ElementTree impls
2429
import xml.etree.ElementTree as ElementTree
25-
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
30+
treeTypes['ElementTree'] = {
31+
"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
32+
"walker": treewalkers.getTreeWalker("etree", ElementTree)
33+
}
2634

2735
try:
2836
import xml.etree.cElementTree as cElementTree
@@ -33,14 +41,32 @@
3341
if cElementTree.Element is ElementTree.Element:
3442
treeTypes['cElementTree'] = None
3543
else:
36-
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
44+
treeTypes['cElementTree'] = {
45+
"builder": treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True),
46+
"walker": treewalkers.getTreeWalker("etree", cElementTree)
47+
}
3748

3849
try:
3950
import lxml.etree as lxml # flake8: noqa
4051
except ImportError:
4152
treeTypes['lxml'] = None
4253
else:
43-
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
54+
treeTypes['lxml'] = {
55+
"builder": treebuilders.getTreeBuilder("lxml"),
56+
"walker": treewalkers.getTreeWalker("lxml")
57+
}
58+
59+
# Genshi impls
60+
try:
61+
import genshi # flake8: noqa
62+
except ImportError:
63+
pass
64+
else:
65+
treeTypes["genshi"] = {
66+
"builder": treebuilders.getTreeBuilder("dom"),
67+
"adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
68+
"walker": treewalkers.getTreeWalker("genshi")
69+
}
4470

4571

4672
def get_data_files(subdirectory, files='*.dat'):

html5lib/tests/test_treewalkers.py

+28-133
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,12 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3-
import os
4-
import sys
5-
import unittest
6-
import warnings
7-
from difflib import unified_diff
3+
import pytest
84

9-
try:
10-
unittest.TestCase.assertEqual
11-
except AttributeError:
12-
unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
5+
from .support import treeTypes
136

14-
from .support import get_data_files, TestData, convertExpected
15-
16-
from html5lib import html5parser, treewalkers, treebuilders, treeadapters, constants
7+
from html5lib import html5parser, treewalkers
178
from html5lib.filters.lint import Filter as Lint
189

19-
20-
treeTypes = {
21-
"DOM": {"builder": treebuilders.getTreeBuilder("dom"),
22-
"walker": treewalkers.getTreeWalker("dom")},
23-
}
24-
25-
# Try whatever etree implementations are available from a list that are
26-
#"supposed" to work
27-
try:
28-
import xml.etree.ElementTree as ElementTree
29-
except ImportError:
30-
pass
31-
else:
32-
treeTypes['ElementTree'] = \
33-
{"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
34-
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
35-
36-
try:
37-
import xml.etree.cElementTree as ElementTree
38-
except ImportError:
39-
pass
40-
else:
41-
treeTypes['cElementTree'] = \
42-
{"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
43-
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
44-
45-
46-
try:
47-
import lxml.etree as ElementTree # flake8: noqa
48-
except ImportError:
49-
pass
50-
else:
51-
treeTypes['lxml_native'] = \
52-
{"builder": treebuilders.getTreeBuilder("lxml"),
53-
"walker": treewalkers.getTreeWalker("lxml")}
54-
55-
56-
try:
57-
import genshi # flake8: noqa
58-
except ImportError:
59-
pass
60-
else:
61-
treeTypes["genshi"] = \
62-
{"builder": treebuilders.getTreeBuilder("dom"),
63-
"adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
64-
"walker": treewalkers.getTreeWalker("genshi")}
65-
6610
import re
6711
attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
6812

@@ -73,80 +17,29 @@ def sortattrs(x):
7317
return "\n".join(lines)
7418

7519

76-
class TokenTestCase(unittest.TestCase):
77-
def test_all_tokens(self):
78-
expected = [
79-
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
80-
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
81-
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
82-
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
83-
{'data': 'a', 'type': 'Characters'},
84-
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
85-
{'data': 'b', 'type': 'Characters'},
86-
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
87-
{'data': 'c', 'type': 'Characters'},
88-
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
89-
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
90-
]
91-
for treeName, treeCls in sorted(treeTypes.items()):
92-
p = html5parser.HTMLParser(tree=treeCls["builder"])
93-
document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
94-
document = treeCls.get("adapter", lambda x: x)(document)
95-
output = Lint(treeCls["walker"](document))
96-
for expectedToken, outputToken in zip(expected, output):
97-
self.assertEqual(expectedToken, outputToken)
98-
99-
100-
def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
101-
warnings.resetwarnings()
102-
warnings.simplefilter("error")
103-
try:
104-
p = html5parser.HTMLParser(tree=treeClass["builder"])
105-
if innerHTML:
106-
document = p.parseFragment(input, innerHTML)
107-
else:
108-
document = p.parse(input)
109-
except constants.DataLossWarning:
110-
# Ignore testcases we know we don't pass
111-
return
112-
113-
document = treeClass.get("adapter", lambda x: x)(document)
114-
try:
115-
output = treewalkers.pprint(Lint(treeClass["walker"](document)))
116-
output = attrlist.sub(sortattrs, output)
117-
expected = attrlist.sub(sortattrs, convertExpected(expected))
118-
diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
119-
[line + "\n" for line in output.splitlines()],
120-
"Expected", "Received"))
121-
assert expected == output, "\n".join([
122-
"", "Input:", input,
123-
"", "Expected:", expected,
124-
"", "Received:", output,
125-
"", "Diff:", diff,
126-
])
127-
except NotImplementedError:
128-
pass # Amnesty for those that confess...
129-
130-
131-
def test_treewalker():
132-
sys.stdout.write('Testing tree walkers ' + " ".join(list(treeTypes.keys())) + "\n")
133-
20+
def test_all_tokens():
21+
expected = [
22+
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
23+
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
24+
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
25+
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
26+
{'data': 'a', 'type': 'Characters'},
27+
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
28+
{'data': 'b', 'type': 'Characters'},
29+
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
30+
{'data': 'c', 'type': 'Characters'},
31+
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
32+
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
33+
]
13434
for treeName, treeCls in sorted(treeTypes.items()):
135-
files = get_data_files('tree-construction')
136-
for filename in files:
137-
testName = os.path.basename(filename).replace(".dat", "")
138-
if testName in ("template",):
139-
continue
140-
141-
tests = TestData(filename, "data")
142-
143-
for index, test in enumerate(tests):
144-
(input, errors,
145-
innerHTML, expected) = [test[key] for key in ("data", "errors",
146-
"document-fragment",
147-
"document")]
148-
errors = errors.split("\n")
149-
yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
35+
if treeCls is None:
36+
continue
37+
p = html5parser.HTMLParser(tree=treeCls["builder"])
38+
document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
39+
document = treeCls.get("adapter", lambda x: x)(document)
40+
output = Lint(treeCls["walker"](document))
41+
for expectedToken, outputToken in zip(expected, output):
42+
assert expectedToken == outputToken
15043

15144

15245
def set_attribute_on_first_child(docfrag, name, value, treeName):
@@ -164,6 +57,8 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
16457
def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
16558
"""tests what happens when we add attributes to the intext"""
16659
treeName, treeClass = tree
60+
if treeClass is None:
61+
pytest.skip("Treebuilder not loaded")
16762
parser = html5parser.HTMLParser(tree=treeClass["builder"])
16863
document = parser.parseFragment(intext)
16964
for nom, val in attrs_to_add:
@@ -172,7 +67,7 @@ def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
17267
document = treeClass.get("adapter", lambda x: x)(document)
17368
output = treewalkers.pprint(treeClass["walker"](document))
17469
output = attrlist.sub(sortattrs, output)
175-
if not output in expected:
70+
if output not in expected:
17671
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
17772

17873

0 commit comments

Comments
 (0)