1
1
from __future__ import absolute_import , division , unicode_literals
2
2
3
+ import codecs
3
4
import json
4
5
import warnings
5
6
import re
6
7
8
+ import pytest
7
9
from six import unichr
8
10
9
- from .support import get_data_files
10
-
11
11
from html5lib .tokenizer import HTMLTokenizer
12
12
from html5lib import constants , utils
13
13
@@ -172,27 +172,6 @@ def repl(m):
172
172
return test
173
173
174
174
175
- def runTokenizerTest (test ):
176
- warnings .resetwarnings ()
177
- warnings .simplefilter ("error" )
178
-
179
- expected = test ['output' ]
180
- if 'lastStartTag' not in test :
181
- test ['lastStartTag' ] = None
182
- parser = TokenizerTestParser (test ['initialState' ],
183
- test ['lastStartTag' ])
184
- tokens = parser .parse (test ['input' ])
185
- received = normalizeTokens (tokens )
186
- errorMsg = "\n " .join (["\n \n Initial state:" ,
187
- test ['initialState' ],
188
- "\n Input:" , test ['input' ],
189
- "\n Expected:" , repr (expected ),
190
- "\n received:" , repr (tokens )])
191
- errorMsg = errorMsg
192
- ignoreErrorOrder = test .get ('ignoreErrorOrder' , False )
193
- assert tokensMatch (expected , received , ignoreErrorOrder , True ), errorMsg
194
-
195
-
196
175
def _doCapitalize (match ):
197
176
return match .group (1 ).upper ()
198
177
@@ -205,18 +184,68 @@ def capitalize(s):
205
184
return s
206
185
207
186
208
- def testTokenizer ( ):
209
- for filename in get_data_files ( 'tokenizer' , '*.test' ):
210
- with open (filename ) as fp :
187
+ class TokenizerFile ( pytest . File ):
188
+ def collect ( self ):
189
+ with codecs . open (str ( self . fspath ), "r" , encoding = "utf-8" ) as fp :
211
190
tests = json .load (fp )
212
- if 'tests' in tests :
213
- for index , test in enumerate (tests ['tests' ]):
214
- if 'initialStates' not in test :
215
- test ["initialStates" ] = ["Data state" ]
216
- if 'doubleEscaped' in test :
217
- test = unescape (test )
218
- if test ["input" ] is None :
219
- continue # Not valid input for this platform
220
- for initialState in test ["initialStates" ]:
221
- test ["initialState" ] = capitalize (initialState )
222
- yield runTokenizerTest , test
191
+ if 'tests' in tests :
192
+ for i , test in enumerate (tests ['tests' ]):
193
+ yield TokenizerTestCollector (str (i ), self , testdata = test )
194
+
195
+
196
+ class TokenizerTestCollector (pytest .Collector ):
197
+ def __init__ (self , name , parent = None , config = None , session = None , testdata = None ):
198
+ super (TokenizerTestCollector , self ).__init__ (name , parent , config , session )
199
+ if 'initialStates' not in testdata :
200
+ testdata ["initialStates" ] = ["Data state" ]
201
+ if 'doubleEscaped' in testdata :
202
+ testdata = unescape (testdata )
203
+ self .testdata = testdata
204
+
205
+ def collect (self ):
206
+ for initialState in self .testdata ["initialStates" ]:
207
+ initialState = capitalize (initialState )
208
+ item = TokenizerTest (initialState ,
209
+ self ,
210
+ self .testdata ,
211
+ initialState )
212
+ if self .testdata ["input" ] is None :
213
+ item .add_marker (pytest .mark .skipif (True , reason = "Relies on lone surrogates" ))
214
+ yield item
215
+
216
+
217
+ class TokenizerTest (pytest .Item ):
218
+ def __init__ (self , name , parent , test , initialState ):
219
+ super (TokenizerTest , self ).__init__ (name , parent )
220
+ self .obj = lambda : 1 # this is to hack around skipif needing a function!
221
+ self .test = test
222
+ self .initialState = initialState
223
+
224
+ def runtest (self ):
225
+ warnings .resetwarnings ()
226
+ warnings .simplefilter ("error" )
227
+
228
+ expected = self .test ['output' ]
229
+ if 'lastStartTag' not in self .test :
230
+ self .test ['lastStartTag' ] = None
231
+ parser = TokenizerTestParser (self .initialState ,
232
+ self .test ['lastStartTag' ])
233
+ tokens = parser .parse (self .test ['input' ])
234
+ received = normalizeTokens (tokens )
235
+ errorMsg = "\n " .join (["\n \n Initial state:" ,
236
+ self .initialState ,
237
+ "\n Input:" , self .test ['input' ],
238
+ "\n Expected:" , repr (expected ),
239
+ "\n received:" , repr (tokens )])
240
+ errorMsg = errorMsg
241
+ ignoreErrorOrder = self .test .get ('ignoreErrorOrder' , False )
242
+ assert tokensMatch (expected , received , ignoreErrorOrder , True ), errorMsg
243
+
244
+ def repr_failure (self , excinfo ):
245
+ traceback = excinfo .traceback
246
+ ntraceback = traceback .cut (path = __file__ )
247
+ excinfo .traceback = ntraceback .filter ()
248
+
249
+ return excinfo .getrepr (funcargs = True ,
250
+ showlocals = False ,
251
+ style = "short" , tbfilter = False )
0 commit comments