Skip to content

Commit 8ddd3b9

Browse files
committed
Generate tokenizer tests from testdata files
1 parent 7556f22 commit 8ddd3b9

File tree

3 files changed

+85
-51
lines changed

3 files changed

+85
-51
lines changed

.pytest.expect

+14-14
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@ pytest-expect file v1
22
(2, 7, 11, 'final', 0)
33
b'html5lib/tests/test_encoding.py::test_encoding::[110]': FAIL
44
b'html5lib/tests/test_encoding.py::test_encoding::[111]': FAIL
5-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4718]': FAIL
6-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4990]': FAIL
7-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4993]': FAIL
8-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4994]': FAIL
9-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4996]': FAIL
10-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4997]': FAIL
11-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4999]': FAIL
12-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5002]': FAIL
13-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5003]': FAIL
14-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5005]': FAIL
15-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5006]': FAIL
16-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5008]': FAIL
17-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5020]': FAIL
18-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5418]': FAIL
5+
u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL
6+
u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL
7+
u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL
8+
u'html5lib/tests/testdata/tokenizer/test3.test::232::dataState': FAIL
9+
u'html5lib/tests/testdata/tokenizer/test3.test::234::dataState': FAIL
10+
u'html5lib/tests/testdata/tokenizer/test3.test::235::dataState': FAIL
11+
u'html5lib/tests/testdata/tokenizer/test3.test::237::dataState': FAIL
12+
u'html5lib/tests/testdata/tokenizer/test3.test::240::dataState': FAIL
13+
u'html5lib/tests/testdata/tokenizer/test3.test::241::dataState': FAIL
14+
u'html5lib/tests/testdata/tokenizer/test3.test::243::dataState': FAIL
15+
u'html5lib/tests/testdata/tokenizer/test3.test::244::dataState': FAIL
16+
u'html5lib/tests/testdata/tokenizer/test3.test::246::dataState': FAIL
17+
u'html5lib/tests/testdata/tokenizer/test3.test::258::dataState': FAIL
18+
u'html5lib/tests/testdata/tokenizer/test3.test::656::dataState': FAIL
1919
u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::DOM::parser::namespaced': FAIL
2020
u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::DOM::parser::void-namespace': FAIL
2121
u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::ElementTree::parser::namespaced': FAIL

html5lib/tests/conftest.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import os.path
22

33
from .tree_construction import TreeConstructionFile
4+
from .tokenizer import TokenizerFile
45

56
_dir = os.path.abspath(os.path.dirname(__file__))
67
_testdata = os.path.join(_dir, "testdata")
78
_tree_construction = os.path.join(_testdata, "tree-construction")
9+
_tokenizer = os.path.join(_testdata, "tokenizer")
810

911

1012
def pytest_collectstart():
@@ -19,3 +21,6 @@ def pytest_collect_file(path, parent):
1921
return
2022
if path.ext == ".dat":
2123
return TreeConstructionFile(path, parent)
24+
elif dir == _tokenizer:
25+
if path.ext == ".test":
26+
return TokenizerFile(path, parent)

html5lib/tests/test_tokenizer.py renamed to html5lib/tests/tokenizer.py

+66-37
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3+
import codecs
34
import json
45
import warnings
56
import re
67

8+
import pytest
79
from six import unichr
810

9-
from .support import get_data_files
10-
1111
from html5lib.tokenizer import HTMLTokenizer
1212
from html5lib import constants, utils
1313

@@ -172,27 +172,6 @@ def repl(m):
172172
return test
173173

174174

175-
def runTokenizerTest(test):
176-
warnings.resetwarnings()
177-
warnings.simplefilter("error")
178-
179-
expected = test['output']
180-
if 'lastStartTag' not in test:
181-
test['lastStartTag'] = None
182-
parser = TokenizerTestParser(test['initialState'],
183-
test['lastStartTag'])
184-
tokens = parser.parse(test['input'])
185-
received = normalizeTokens(tokens)
186-
errorMsg = "\n".join(["\n\nInitial state:",
187-
test['initialState'],
188-
"\nInput:", test['input'],
189-
"\nExpected:", repr(expected),
190-
"\nreceived:", repr(tokens)])
191-
errorMsg = errorMsg
192-
ignoreErrorOrder = test.get('ignoreErrorOrder', False)
193-
assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg
194-
195-
196175
def _doCapitalize(match):
197176
return match.group(1).upper()
198177

@@ -205,18 +184,68 @@ def capitalize(s):
205184
return s
206185

207186

208-
def testTokenizer():
209-
for filename in get_data_files('tokenizer', '*.test'):
210-
with open(filename) as fp:
187+
class TokenizerFile(pytest.File):
188+
def collect(self):
189+
with codecs.open(str(self.fspath), "r", encoding="utf-8") as fp:
211190
tests = json.load(fp)
212-
if 'tests' in tests:
213-
for index, test in enumerate(tests['tests']):
214-
if 'initialStates' not in test:
215-
test["initialStates"] = ["Data state"]
216-
if 'doubleEscaped' in test:
217-
test = unescape(test)
218-
if test["input"] is None:
219-
continue # Not valid input for this platform
220-
for initialState in test["initialStates"]:
221-
test["initialState"] = capitalize(initialState)
222-
yield runTokenizerTest, test
191+
if 'tests' in tests:
192+
for i, test in enumerate(tests['tests']):
193+
yield TokenizerTestCollector(str(i), self, testdata=test)
194+
195+
196+
class TokenizerTestCollector(pytest.Collector):
197+
def __init__(self, name, parent=None, config=None, session=None, testdata=None):
198+
super(TokenizerTestCollector, self).__init__(name, parent, config, session)
199+
if 'initialStates' not in testdata:
200+
testdata["initialStates"] = ["Data state"]
201+
if 'doubleEscaped' in testdata:
202+
testdata = unescape(testdata)
203+
self.testdata = testdata
204+
205+
def collect(self):
206+
for initialState in self.testdata["initialStates"]:
207+
initialState = capitalize(initialState)
208+
item = TokenizerTest(initialState,
209+
self,
210+
self.testdata,
211+
initialState)
212+
if self.testdata["input"] is None:
213+
item.add_marker(pytest.mark.skipif(True, reason="Relies on lone surrogates"))
214+
yield item
215+
216+
217+
class TokenizerTest(pytest.Item):
218+
def __init__(self, name, parent, test, initialState):
219+
super(TokenizerTest, self).__init__(name, parent)
220+
self.obj = lambda: 1 # this is to hack around skipif needing a function!
221+
self.test = test
222+
self.initialState = initialState
223+
224+
def runtest(self):
225+
warnings.resetwarnings()
226+
warnings.simplefilter("error")
227+
228+
expected = self.test['output']
229+
if 'lastStartTag' not in self.test:
230+
self.test['lastStartTag'] = None
231+
parser = TokenizerTestParser(self.initialState,
232+
self.test['lastStartTag'])
233+
tokens = parser.parse(self.test['input'])
234+
received = normalizeTokens(tokens)
235+
errorMsg = "\n".join(["\n\nInitial state:",
236+
self.initialState,
237+
"\nInput:", self.test['input'],
238+
"\nExpected:", repr(expected),
239+
"\nreceived:", repr(tokens)])
240+
errorMsg = errorMsg
241+
ignoreErrorOrder = self.test.get('ignoreErrorOrder', False)
242+
assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg
243+
244+
def repr_failure(self, excinfo):
245+
traceback = excinfo.traceback
246+
ntraceback = traceback.cut(path=__file__)
247+
excinfo.traceback = ntraceback.filter()
248+
249+
return excinfo.getrepr(funcargs=True,
250+
showlocals=False,
251+
style="short", tbfilter=False)

0 commit comments

Comments
 (0)