1
1
from __future__ import absolute_import , division , unicode_literals
2
2
3
- import os
4
- import sys
5
- import unittest
6
- import warnings
7
- from difflib import unified_diff
3
+ import pytest
8
4
9
- try :
10
- unittest .TestCase .assertEqual
11
- except AttributeError :
12
- unittest .TestCase .assertEqual = unittest .TestCase .assertEquals
5
+ from .support import treeTypes
13
6
14
- from .support import get_data_files , TestData , convertExpected
15
-
16
- from html5lib import html5parser , treewalkers , treebuilders , treeadapters , constants
7
+ from html5lib import html5parser , treewalkers
17
8
from html5lib .filters .lint import Filter as Lint
18
9
19
-
20
- treeTypes = {
21
- "DOM" : {"builder" : treebuilders .getTreeBuilder ("dom" ),
22
- "walker" : treewalkers .getTreeWalker ("dom" )},
23
- }
24
-
25
- # Try whatever etree implementations are available from a list that are
26
- #"supposed" to work
27
- try :
28
- import xml .etree .ElementTree as ElementTree
29
- except ImportError :
30
- pass
31
- else :
32
- treeTypes ['ElementTree' ] = \
33
- {"builder" : treebuilders .getTreeBuilder ("etree" , ElementTree , fullTree = True ),
34
- "walker" : treewalkers .getTreeWalker ("etree" , ElementTree )}
35
-
36
- try :
37
- import xml .etree .cElementTree as ElementTree
38
- except ImportError :
39
- pass
40
- else :
41
- treeTypes ['cElementTree' ] = \
42
- {"builder" : treebuilders .getTreeBuilder ("etree" , ElementTree , fullTree = True ),
43
- "walker" : treewalkers .getTreeWalker ("etree" , ElementTree )}
44
-
45
-
46
- try :
47
- import lxml .etree as ElementTree # flake8: noqa
48
- except ImportError :
49
- pass
50
- else :
51
- treeTypes ['lxml_native' ] = \
52
- {"builder" : treebuilders .getTreeBuilder ("lxml" ),
53
- "walker" : treewalkers .getTreeWalker ("lxml" )}
54
-
55
-
56
- try :
57
- import genshi # flake8: noqa
58
- except ImportError :
59
- pass
60
- else :
61
- treeTypes ["genshi" ] = \
62
- {"builder" : treebuilders .getTreeBuilder ("dom" ),
63
- "adapter" : lambda tree : treeadapters .genshi .to_genshi (treewalkers .getTreeWalker ("dom" )(tree )),
64
- "walker" : treewalkers .getTreeWalker ("genshi" )}
65
-
66
10
import re
67
11
attrlist = re .compile (r"^(\s+)\w+=.*(\n\1\w+=.*)+" , re .M )
68
12
@@ -73,80 +17,29 @@ def sortattrs(x):
73
17
return "\n " .join (lines )
74
18
75
19
76
- class TokenTestCase (unittest .TestCase ):
77
- def test_all_tokens (self ):
78
- expected = [
79
- {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'html' },
80
- {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'head' },
81
- {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'head' },
82
- {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'body' },
83
- {'data' : 'a' , 'type' : 'Characters' },
84
- {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'div' },
85
- {'data' : 'b' , 'type' : 'Characters' },
86
- {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'div' },
87
- {'data' : 'c' , 'type' : 'Characters' },
88
- {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'body' },
89
- {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'html' }
90
- ]
91
- for treeName , treeCls in sorted (treeTypes .items ()):
92
- p = html5parser .HTMLParser (tree = treeCls ["builder" ])
93
- document = p .parse ("<html><head></head><body>a<div>b</div>c</body></html>" )
94
- document = treeCls .get ("adapter" , lambda x : x )(document )
95
- output = Lint (treeCls ["walker" ](document ))
96
- for expectedToken , outputToken in zip (expected , output ):
97
- self .assertEqual (expectedToken , outputToken )
98
-
99
-
100
- def runTreewalkerTest (innerHTML , input , expected , errors , treeClass ):
101
- warnings .resetwarnings ()
102
- warnings .simplefilter ("error" )
103
- try :
104
- p = html5parser .HTMLParser (tree = treeClass ["builder" ])
105
- if innerHTML :
106
- document = p .parseFragment (input , innerHTML )
107
- else :
108
- document = p .parse (input )
109
- except constants .DataLossWarning :
110
- # Ignore testcases we know we don't pass
111
- return
112
-
113
- document = treeClass .get ("adapter" , lambda x : x )(document )
114
- try :
115
- output = treewalkers .pprint (Lint (treeClass ["walker" ](document )))
116
- output = attrlist .sub (sortattrs , output )
117
- expected = attrlist .sub (sortattrs , convertExpected (expected ))
118
- diff = "" .join (unified_diff ([line + "\n " for line in expected .splitlines ()],
119
- [line + "\n " for line in output .splitlines ()],
120
- "Expected" , "Received" ))
121
- assert expected == output , "\n " .join ([
122
- "" , "Input:" , input ,
123
- "" , "Expected:" , expected ,
124
- "" , "Received:" , output ,
125
- "" , "Diff:" , diff ,
126
- ])
127
- except NotImplementedError :
128
- pass # Amnesty for those that confess...
129
-
130
-
131
- def test_treewalker ():
132
- sys .stdout .write ('Testing tree walkers ' + " " .join (list (treeTypes .keys ())) + "\n " )
133
-
20
+ def test_all_tokens ():
21
+ expected = [
22
+ {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'html' },
23
+ {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'head' },
24
+ {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'head' },
25
+ {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'body' },
26
+ {'data' : 'a' , 'type' : 'Characters' },
27
+ {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'div' },
28
+ {'data' : 'b' , 'type' : 'Characters' },
29
+ {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'div' },
30
+ {'data' : 'c' , 'type' : 'Characters' },
31
+ {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'body' },
32
+ {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'html' }
33
+ ]
134
34
for treeName , treeCls in sorted (treeTypes .items ()):
135
- files = get_data_files ('tree-construction' )
136
- for filename in files :
137
- testName = os .path .basename (filename ).replace (".dat" , "" )
138
- if testName in ("template" ,):
139
- continue
140
-
141
- tests = TestData (filename , "data" )
142
-
143
- for index , test in enumerate (tests ):
144
- (input , errors ,
145
- innerHTML , expected ) = [test [key ] for key in ("data" , "errors" ,
146
- "document-fragment" ,
147
- "document" )]
148
- errors = errors .split ("\n " )
149
- yield runTreewalkerTest , innerHTML , input , expected , errors , treeCls
35
+ if treeCls is None :
36
+ continue
37
+ p = html5parser .HTMLParser (tree = treeCls ["builder" ])
38
+ document = p .parse ("<html><head></head><body>a<div>b</div>c</body></html>" )
39
+ document = treeCls .get ("adapter" , lambda x : x )(document )
40
+ output = Lint (treeCls ["walker" ](document ))
41
+ for expectedToken , outputToken in zip (expected , output ):
42
+ assert expectedToken == outputToken
150
43
151
44
152
45
def set_attribute_on_first_child (docfrag , name , value , treeName ):
@@ -164,6 +57,8 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
164
57
def runTreewalkerEditTest (intext , expected , attrs_to_add , tree ):
165
58
"""tests what happens when we add attributes to the intext"""
166
59
treeName , treeClass = tree
60
+ if treeClass is None :
61
+ pytest .skip ("Treebuilder not loaded" )
167
62
parser = html5parser .HTMLParser (tree = treeClass ["builder" ])
168
63
document = parser .parseFragment (intext )
169
64
for nom , val in attrs_to_add :
@@ -172,7 +67,7 @@ def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
172
67
document = treeClass .get ("adapter" , lambda x : x )(document )
173
68
output = treewalkers .pprint (treeClass ["walker" ](document ))
174
69
output = attrlist .sub (sortattrs , output )
175
- if not output in expected :
70
+ if output not in expected :
176
71
raise AssertionError ("TreewalkerEditTest: %s\n Expected:\n %s\n Received:\n %s" % (treeName , expected , output ))
177
72
178
73
0 commit comments