From 3065630b2c43eeaf8d5d12fea930847e3e508350 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 16:19:02 +0100 Subject: [PATCH 1/2] Get rid of last remenents of our usage of the stdlib unittest --- html5lib/tests/test_encoding.py | 6 - html5lib/tests/test_parser2.py | 71 +++-- html5lib/tests/test_stream.py | 344 +++++++++++------------ html5lib/tests/test_whitespace_filter.py | 246 ++++++++-------- 4 files changed, 315 insertions(+), 352 deletions(-) diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index 41b888c4..09504654 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -1,12 +1,6 @@ from __future__ import absolute_import, division, unicode_literals import os -import unittest - -try: - unittest.TestCase.assertEqual -except AttributeError: - unittest.TestCase.assertEqual = unittest.TestCase.assertEquals from .support import get_data_files, test_dir, errorMessage, TestData as _TestData from html5lib import HTMLParser, inputstream diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 01f16eea..26eff241 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -2,63 +2,56 @@ import io +import pytest + from . import support # flake8: noqa from html5lib import html5parser from html5lib.constants import namespaces from html5lib import treebuilders -import unittest # tests that aren't autogenerated from text files +@pytest.fixture +def dom_tree(): + return treebuilders.getTreeBuilder("dom") -class MoreParserTests(unittest.TestCase): - - def setUp(self): - self.dom_tree = treebuilders.getTreeBuilder("dom") +def test_assertDoctypeCloneable(dom_tree): + parser = html5parser.HTMLParser(tree=dom_tree) + doc = parser.parse('') + assert doc.cloneNode(True) is not None - def test_assertDoctypeCloneable(self): - parser = html5parser.HTMLParser(tree=self.dom_tree) - doc = parser.parse('') - self.assertTrue(doc.cloneNode(True)) - def test_line_counter(self): - # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0 - parser = html5parser.HTMLParser(tree=self.dom_tree) - parser.parse("
\nx\n>\n
") +def test_line_counter(dom_tree): + # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0 + parser = html5parser.HTMLParser(tree=dom_tree) + parser.parse("
\nx\n>\n
") - def test_namespace_html_elements_0_dom(self): - parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=True) - doc = parser.parse("") - self.assertTrue(doc.childNodes[0].namespaceURI == namespaces["html"]) - def test_namespace_html_elements_1_dom(self): - parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=False) - doc = parser.parse("") - self.assertTrue(doc.childNodes[0].namespaceURI is None) +def test_namespace_html_elements_0_dom(dom_tree): + parser = html5parser.HTMLParser(tree=dom_tree, namespaceHTMLElements=True) + doc = parser.parse("") + assert doc.childNodes[0].namespaceURI == namespaces["html"] - def test_namespace_html_elements_0_etree(self): - parser = html5parser.HTMLParser(namespaceHTMLElements=True) - doc = parser.parse("") - self.assertTrue(doc.tag == "{%s}html" % (namespaces["html"],)) - def test_namespace_html_elements_1_etree(self): - parser = html5parser.HTMLParser(namespaceHTMLElements=False) - doc = parser.parse("") - self.assertTrue(doc.tag == "html") +def test_namespace_html_elements_1_dom(dom_tree): + parser = html5parser.HTMLParser(tree=dom_tree, namespaceHTMLElements=False) + doc = parser.parse("") + assert doc.childNodes[0].namespaceURI is None - def test_unicode_file(self): - parser = html5parser.HTMLParser() - parser.parse(io.StringIO("a")) +def test_namespace_html_elements_0_etree(): + parser = html5parser.HTMLParser(namespaceHTMLElements=True) + doc = parser.parse("") + assert doc.tag == "{%s}html" % (namespaces["html"],) -def buildTestSuite(): - return unittest.defaultTestLoader.loadTestsFromName(__name__) +def test_namespace_html_elements_1_etree(): + parser = html5parser.HTMLParser(namespaceHTMLElements=False) + doc = parser.parse("") + assert doc.tag == "html" -def main(): - buildTestSuite() - unittest.main() -if __name__ == '__main__': - main() +def test_unicode_file(): + parser = html5parser.HTMLParser() + parser.parse(io.StringIO("a")) diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index ed203766..3b659fbb 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -1,7 +1,6 @@ from __future__ import absolute_import, division, unicode_literals from . import support # flake8: noqa -import unittest import codecs from io import BytesIO import socket @@ -12,66 +11,65 @@ from html5lib.inputstream import (BufferedStream, HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream) -class BufferedStreamTest(unittest.TestCase): - def test_basic(self): - s = b"abc" - fp = BufferedStream(BytesIO(s)) - read = fp.read(10) - assert read == s - - def test_read_length(self): - fp = BufferedStream(BytesIO(b"abcdef")) - read1 = fp.read(1) - assert read1 == b"a" - read2 = fp.read(2) - assert read2 == b"bc" - read3 = fp.read(3) - assert read3 == b"def" - read4 = fp.read(4) - assert read4 == b"" - - def test_tell(self): - fp = BufferedStream(BytesIO(b"abcdef")) - read1 = fp.read(1) - assert fp.tell() == 1 - read2 = fp.read(2) - assert fp.tell() == 3 - read3 = fp.read(3) - assert fp.tell() == 6 - read4 = fp.read(4) - assert fp.tell() == 6 - - def test_seek(self): - fp = BufferedStream(BytesIO(b"abcdef")) - read1 = fp.read(1) - assert read1 == b"a" - fp.seek(0) - read2 = fp.read(1) - assert read2 == b"a" - read3 = fp.read(2) - assert read3 == b"bc" - fp.seek(2) - read4 = fp.read(2) - assert read4 == b"cd" - fp.seek(4) - read5 = fp.read(2) - assert read5 == b"ef" - - def test_seek_tell(self): - fp = BufferedStream(BytesIO(b"abcdef")) - read1 = fp.read(1) - assert fp.tell() == 1 - fp.seek(0) - read2 = fp.read(1) - assert fp.tell() == 1 - read3 = fp.read(2) - assert fp.tell() == 3 - fp.seek(2) - read4 = fp.read(2) - assert fp.tell() == 4 - fp.seek(4) - read5 = fp.read(2) - assert fp.tell() == 6 +def test_basic(): + s = b"abc" + fp = BufferedStream(BytesIO(s)) + read = fp.read(10) + assert read == s + +def test_read_length(): + fp = BufferedStream(BytesIO(b"abcdef")) + read1 = fp.read(1) + assert read1 == b"a" + read2 = fp.read(2) + assert read2 == b"bc" + read3 = fp.read(3) + assert read3 == b"def" + read4 = fp.read(4) + assert read4 == b"" + +def test_tell(): + fp = BufferedStream(BytesIO(b"abcdef")) + read1 = fp.read(1) + assert fp.tell() == 1 + read2 = fp.read(2) + assert fp.tell() == 3 + read3 = fp.read(3) + assert fp.tell() == 6 + read4 = fp.read(4) + assert fp.tell() == 6 + +def test_seek(): + fp = BufferedStream(BytesIO(b"abcdef")) + read1 = fp.read(1) + assert read1 == b"a" + fp.seek(0) + read2 = fp.read(1) + assert read2 == b"a" + read3 = fp.read(2) + assert read3 == b"bc" + fp.seek(2) + read4 = fp.read(2) + assert read4 == b"cd" + fp.seek(4) + read5 = fp.read(2) + assert read5 == b"ef" + +def test_seek_tell(): + fp = BufferedStream(BytesIO(b"abcdef")) + read1 = fp.read(1) + assert fp.tell() == 1 + fp.seek(0) + read2 = fp.read(1) + assert fp.tell() == 1 + read3 = fp.read(2) + assert fp.tell() == 3 + fp.seek(2) + read4 = fp.read(2) + assert fp.tell() == 4 + fp.seek(4) + read5 = fp.read(2) + assert fp.tell() == 6 class HTMLUnicodeInputStreamShortChunk(HTMLUnicodeInputStream): @@ -82,122 +80,108 @@ class HTMLBinaryInputStreamShortChunk(HTMLBinaryInputStream): _defaultChunkSize = 2 -class HTMLInputStreamTest(unittest.TestCase): - - def test_char_ascii(self): - stream = HTMLInputStream(b"'", encoding='ascii') - self.assertEqual(stream.charEncoding[0].name, 'windows-1252') - self.assertEqual(stream.char(), "'") - - def test_char_utf8(self): - stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8') - self.assertEqual(stream.charEncoding[0].name, 'utf-8') - self.assertEqual(stream.char(), '\u2018') - - def test_char_win1252(self): - stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252')) - self.assertEqual(stream.charEncoding[0].name, 'windows-1252') - self.assertEqual(stream.char(), "\xa9") - self.assertEqual(stream.char(), "\xf1") - self.assertEqual(stream.char(), "\u2019") - - def test_bom(self): - stream = HTMLInputStream(codecs.BOM_UTF8 + b"'") - self.assertEqual(stream.charEncoding[0].name, 'utf-8') - self.assertEqual(stream.char(), "'") - - def test_utf_16(self): - stream = HTMLInputStream((' ' * 1025).encode('utf-16')) - self.assertTrue(stream.charEncoding[0].name in ['utf-16le', 'utf-16be'], stream.charEncoding) - self.assertEqual(len(stream.charsUntil(' ', True)), 1025) - - def test_newlines(self): - stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe") - self.assertEqual(stream.position(), (1, 0)) - self.assertEqual(stream.charsUntil('c'), "a\nbb\n") - self.assertEqual(stream.position(), (3, 0)) - self.assertEqual(stream.charsUntil('x'), "ccc\ndddd") - self.assertEqual(stream.position(), (4, 4)) - self.assertEqual(stream.charsUntil('e'), "x") - self.assertEqual(stream.position(), (4, 5)) - - def test_newlines2(self): - size = HTMLUnicodeInputStream._defaultChunkSize - stream = HTMLInputStream("\r" * size + "\n") - self.assertEqual(stream.charsUntil('x'), "\n" * size) - - def test_position(self): - stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh") - self.assertEqual(stream.position(), (1, 0)) - self.assertEqual(stream.charsUntil('c'), "a\nbb\n") - self.assertEqual(stream.position(), (3, 0)) - stream.unget("\n") - self.assertEqual(stream.position(), (2, 2)) - self.assertEqual(stream.charsUntil('c'), "\n") - self.assertEqual(stream.position(), (3, 0)) - stream.unget("\n") - self.assertEqual(stream.position(), (2, 2)) - self.assertEqual(stream.char(), "\n") - self.assertEqual(stream.position(), (3, 0)) - self.assertEqual(stream.charsUntil('e'), "ccc\nddd") - self.assertEqual(stream.position(), (4, 3)) - self.assertEqual(stream.charsUntil('h'), "e\nf\ng") - self.assertEqual(stream.position(), (6, 1)) - - def test_position2(self): - stream = HTMLUnicodeInputStreamShortChunk("abc\nd") - self.assertEqual(stream.position(), (1, 0)) - self.assertEqual(stream.char(), "a") - self.assertEqual(stream.position(), (1, 1)) - self.assertEqual(stream.char(), "b") - self.assertEqual(stream.position(), (1, 2)) - self.assertEqual(stream.char(), "c") - self.assertEqual(stream.position(), (1, 3)) - self.assertEqual(stream.char(), "\n") - self.assertEqual(stream.position(), (2, 0)) - self.assertEqual(stream.char(), "d") - self.assertEqual(stream.position(), (2, 1)) - - def test_python_issue_20007(self): - """ - Make sure we have a work-around for Python bug #20007 - http://bugs.python.org/issue20007 - """ - class FakeSocket(object): - def makefile(self, _mode, _bufsize=None): - return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") - - source = http_client.HTTPResponse(FakeSocket()) - source.begin() - stream = HTMLInputStream(source) - self.assertEqual(stream.charsUntil(" "), "Text") - - def test_python_issue_20007_b(self): - """ - Make sure we have a work-around for Python bug #20007 - http://bugs.python.org/issue20007 - """ - if six.PY2: - return - - class FakeSocket(object): - def makefile(self, _mode, _bufsize=None): - return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") - - source = http_client.HTTPResponse(FakeSocket()) - source.begin() - wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com") - stream = HTMLInputStream(wrapped) - self.assertEqual(stream.charsUntil(" "), "Text") - - -def buildTestSuite(): - return unittest.defaultTestLoader.loadTestsFromName(__name__) - - -def main(): - buildTestSuite() - unittest.main() - -if __name__ == '__main__': - main() +def test_char_ascii(): + stream = HTMLInputStream(b"'", encoding='ascii') + assert stream.charEncoding[0].name == 'windows-1252' + assert stream.char() == "'" + +def test_char_utf8(): + stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8') + assert stream.charEncoding[0].name == 'utf-8' + assert stream.char() == '\u2018' + +def test_char_win1252(): + stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252')) + assert stream.charEncoding[0].name == 'windows-1252' + assert stream.char() == "\xa9" + assert stream.char() == "\xf1" + assert stream.char() == "\u2019" + +def test_bom(): + stream = HTMLInputStream(codecs.BOM_UTF8 + b"'") + assert stream.charEncoding[0].name == 'utf-8' + assert stream.char() == "'" + +def test_utf_16(): + stream = HTMLInputStream((' ' * 1025).encode('utf-16')) + assert stream.charEncoding[0].name in ['utf-16le', 'utf-16be'] + assert len(stream.charsUntil(' ', True)) == 1025 + +def test_newlines(): + stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe") + assert stream.position() == (1, 0) + assert stream.charsUntil('c') == "a\nbb\n" + assert stream.position() == (3, 0) + assert stream.charsUntil('x') == "ccc\ndddd" + assert stream.position() == (4, 4) + assert stream.charsUntil('e') == "x" + assert stream.position() == (4, 5) + +def test_newlines2(): + size = HTMLUnicodeInputStream._defaultChunkSize + stream = HTMLInputStream("\r" * size + "\n") + assert stream.charsUntil('x') == "\n" * size + +def test_position(): + stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh") + assert stream.position() == (1, 0) + assert stream.charsUntil('c') == "a\nbb\n" + assert stream.position() == (3, 0) + stream.unget("\n") + assert stream.position() == (2, 2) + assert stream.charsUntil('c') == "\n" + assert stream.position() == (3, 0) + stream.unget("\n") + assert stream.position() == (2, 2) + assert stream.char() == "\n" + assert stream.position() == (3, 0) + assert stream.charsUntil('e') == "ccc\nddd" + assert stream.position() == (4, 3) + assert stream.charsUntil('h') == "e\nf\ng" + assert stream.position() == (6, 1) + +def test_position2(): + stream = HTMLUnicodeInputStreamShortChunk("abc\nd") + assert stream.position() == (1, 0) + assert stream.char() == "a" + assert stream.position() == (1, 1) + assert stream.char() == "b" + assert stream.position() == (1, 2) + assert stream.char() == "c" + assert stream.position() == (1, 3) + assert stream.char() == "\n" + assert stream.position() == (2, 0) + assert stream.char() == "d" + assert stream.position() == (2, 1) + +def test_python_issue_20007(): + """ + Make sure we have a work-around for Python bug #20007 + http://bugs.python.org/issue20007 + """ + class FakeSocket(object): + def makefile(self, _mode, _bufsize=None): + return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") + + source = http_client.HTTPResponse(FakeSocket()) + source.begin() + stream = HTMLInputStream(source) + assert stream.charsUntil(" ") == "Text" + +def test_python_issue_20007_b(): + """ + Make sure we have a work-around for Python bug #20007 + http://bugs.python.org/issue20007 + """ + if six.PY2: + return + + class FakeSocket(object): + def makefile(self, _mode, _bufsize=None): + return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") + + source = http_client.HTTPResponse(FakeSocket()) + source.begin() + wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com") + stream = HTMLInputStream(wrapped) + assert stream.charsUntil(" ") == "Text" diff --git a/html5lib/tests/test_whitespace_filter.py b/html5lib/tests/test_whitespace_filter.py index 9ed27fd6..e9da6140 100644 --- a/html5lib/tests/test_whitespace_filter.py +++ b/html5lib/tests/test_whitespace_filter.py @@ -1,133 +1,125 @@ from __future__ import absolute_import, division, unicode_literals -import unittest - from html5lib.filters.whitespace import Filter from html5lib.constants import spaceCharacters spaceCharacters = "".join(spaceCharacters) -try: - unittest.TestCase.assertEqual -except AttributeError: - unittest.TestCase.assertEqual = unittest.TestCase.assertEquals - - -class TestCase(unittest.TestCase): - def runTest(self, input, expected): - output = list(Filter(input)) - errorMsg = "\n".join(["\n\nInput:", str(input), - "\nExpected:", str(expected), - "\nReceived:", str(output)]) - self.assertEqual(output, expected, errorMsg) - - def runTestUnmodifiedOutput(self, input): - self.runTest(input, input) - - def testPhrasingElements(self): - self.runTestUnmodifiedOutput( - [{"type": "Characters", "data": "This is a "}, - {"type": "StartTag", "name": "span", "data": []}, - {"type": "Characters", "data": "phrase"}, - {"type": "EndTag", "name": "span", "data": []}, - {"type": "SpaceCharacters", "data": " "}, - {"type": "Characters", "data": "with"}, - {"type": "SpaceCharacters", "data": " "}, - {"type": "StartTag", "name": "em", "data": []}, - {"type": "Characters", "data": "emphasised text"}, - {"type": "EndTag", "name": "em", "data": []}, - {"type": "Characters", "data": " and an "}, - {"type": "StartTag", "name": "img", "data": [["alt", "image"]]}, - {"type": "Characters", "data": "."}]) - - def testLeadingWhitespace(self): - self.runTest( - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "SpaceCharacters", "data": spaceCharacters}, - {"type": "Characters", "data": "foo"}, - {"type": "EndTag", "name": "p", "data": []}], - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "SpaceCharacters", "data": " "}, - {"type": "Characters", "data": "foo"}, - {"type": "EndTag", "name": "p", "data": []}]) - - def testLeadingWhitespaceAsCharacters(self): - self.runTest( - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": spaceCharacters + "foo"}, - {"type": "EndTag", "name": "p", "data": []}], - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": " foo"}, - {"type": "EndTag", "name": "p", "data": []}]) - - def testTrailingWhitespace(self): - self.runTest( - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo"}, - {"type": "SpaceCharacters", "data": spaceCharacters}, - {"type": "EndTag", "name": "p", "data": []}], - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo"}, - {"type": "SpaceCharacters", "data": " "}, - {"type": "EndTag", "name": "p", "data": []}]) - - def testTrailingWhitespaceAsCharacters(self): - self.runTest( - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo" + spaceCharacters}, - {"type": "EndTag", "name": "p", "data": []}], - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo "}, - {"type": "EndTag", "name": "p", "data": []}]) - - def testWhitespace(self): - self.runTest( - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo" + spaceCharacters + "bar"}, - {"type": "EndTag", "name": "p", "data": []}], - [{"type": "StartTag", "name": "p", "data": []}, - {"type": "Characters", "data": "foo bar"}, - {"type": "EndTag", "name": "p", "data": []}]) - - def testLeadingWhitespaceInPre(self): - self.runTestUnmodifiedOutput( - [{"type": "StartTag", "name": "pre", "data": []}, - {"type": "SpaceCharacters", "data": spaceCharacters}, - {"type": "Characters", "data": "foo"}, - {"type": "EndTag", "name": "pre", "data": []}]) - - def testLeadingWhitespaceAsCharactersInPre(self): - self.runTestUnmodifiedOutput( - [{"type": "StartTag", "name": "pre", "data": []}, - {"type": "Characters", "data": spaceCharacters + "foo"}, - {"type": "EndTag", "name": "pre", "data": []}]) - - def testTrailingWhitespaceInPre(self): - self.runTestUnmodifiedOutput( - [{"type": "StartTag", "name": "pre", "data": []}, - {"type": "Characters", "data": "foo"}, - {"type": "SpaceCharacters", "data": spaceCharacters}, - {"type": "EndTag", "name": "pre", "data": []}]) - - def testTrailingWhitespaceAsCharactersInPre(self): - self.runTestUnmodifiedOutput( - [{"type": "StartTag", "name": "pre", "data": []}, - {"type": "Characters", "data": "foo" + spaceCharacters}, - {"type": "EndTag", "name": "pre", "data": []}]) - - def testWhitespaceInPre(self): - self.runTestUnmodifiedOutput( - [{"type": "StartTag", "name": "pre", "data": []}, - {"type": "Characters", "data": "foo" + spaceCharacters + "bar"}, - {"type": "EndTag", "name": "pre", "data": []}]) - - -def buildTestSuite(): - return unittest.defaultTestLoader.loadTestsFromName(__name__) - - -def main(): - buildTestSuite() - unittest.main() - -if __name__ == "__main__": - main() + +def runTest(input, expected): + output = list(Filter(input)) + errorMsg = "\n".join(["\n\nInput:", str(input), + "\nExpected:", str(expected), + "\nReceived:", str(output)]) + assert expected == output, errorMsg + + +def runTestUnmodifiedOutput(input): + runTest(input, input) + + +def testPhrasingElements(): + runTestUnmodifiedOutput( + [{"type": "Characters", "data": "This is a "}, + {"type": "StartTag", "name": "span", "data": []}, + {"type": "Characters", "data": "phrase"}, + {"type": "EndTag", "name": "span", "data": []}, + {"type": "SpaceCharacters", "data": " "}, + {"type": "Characters", "data": "with"}, + {"type": "SpaceCharacters", "data": " "}, + {"type": "StartTag", "name": "em", "data": []}, + {"type": "Characters", "data": "emphasised text"}, + {"type": "EndTag", "name": "em", "data": []}, + {"type": "Characters", "data": " and an "}, + {"type": "StartTag", "name": "img", "data": [["alt", "image"]]}, + {"type": "Characters", "data": "."}]) + + +def testLeadingWhitespace(): + runTest( + [{"type": "StartTag", "name": "p", "data": []}, + {"type": "SpaceCharacters", "data": spaceCharacters}, + {"type": "Characters", "data": "foo"}, + {"type": "EndTag", "name": "p", "data": []}], + [{"type": "StartTag", "name": "p", "data": []}, + {"type": "SpaceCharacters", "data": " "}, + {"type": "Characters", "data": "foo"}, + {"type": "EndTag", "name": "p", "data": []}]) + + +def testLeadingWhitespaceAsCharacters(): + runTest( + [{"type": "StartTag", "name": "p", "data": []}, + {"type": "Characters", "data": spaceCharacters + "foo"}, + {"type": "EndTag", "name": "p", "data": []}], + [{"type": "StartTag", "name": "p", "data": []}, + {"type": "Characters", "data": " foo"}, + {"type": "EndTag", "name": "p", "data": []}]) + + +def testTrailingWhitespace(): + runTest( + [{"type": "StartTag", "name": "p", "data": []}, + {"type": "Characters", "data": "foo"}, + {"type": "SpaceCharacters", "data": spaceCharacters}, + {"type": "EndTag", "name": "p", "data": []}], + [{"type": "StartTag", "name": "p", "data": []}, + {"type": "Characters", "data": "foo"}, + {"type": "SpaceCharacters", "data": " "}, + {"type": "EndTag", "name": "p", "data": []}]) + + +def testTrailingWhitespaceAsCharacters(): + runTest( + [{"type": "StartTag", "name": "p", "data": []}, + {"type": "Characters", "data": "foo" + spaceCharacters}, + {"type": "EndTag", "name": "p", "data": []}], + [{"type": "StartTag", "name": "p", "data": []}, + {"type": "Characters", "data": "foo "}, + {"type": "EndTag", "name": "p", "data": []}]) + + +def testWhitespace(): + runTest( + [{"type": "StartTag", "name": "p", "data": []}, + {"type": "Characters", "data": "foo" + spaceCharacters + "bar"}, + {"type": "EndTag", "name": "p", "data": []}], + [{"type": "StartTag", "name": "p", "data": []}, + {"type": "Characters", "data": "foo bar"}, + {"type": "EndTag", "name": "p", "data": []}]) + + +def testLeadingWhitespaceInPre(): + runTestUnmodifiedOutput( + [{"type": "StartTag", "name": "pre", "data": []}, + {"type": "SpaceCharacters", "data": spaceCharacters}, + {"type": "Characters", "data": "foo"}, + {"type": "EndTag", "name": "pre", "data": []}]) + + +def testLeadingWhitespaceAsCharactersInPre(): + runTestUnmodifiedOutput( + [{"type": "StartTag", "name": "pre", "data": []}, + {"type": "Characters", "data": spaceCharacters + "foo"}, + {"type": "EndTag", "name": "pre", "data": []}]) + + +def testTrailingWhitespaceInPre(): + runTestUnmodifiedOutput( + [{"type": "StartTag", "name": "pre", "data": []}, + {"type": "Characters", "data": "foo"}, + {"type": "SpaceCharacters", "data": spaceCharacters}, + {"type": "EndTag", "name": "pre", "data": []}]) + + +def testTrailingWhitespaceAsCharactersInPre(): + runTestUnmodifiedOutput( + [{"type": "StartTag", "name": "pre", "data": []}, + {"type": "Characters", "data": "foo" + spaceCharacters}, + {"type": "EndTag", "name": "pre", "data": []}]) + + +def testWhitespaceInPre(): + runTestUnmodifiedOutput( + [{"type": "StartTag", "name": "pre", "data": []}, + {"type": "Characters", "data": "foo" + spaceCharacters + "bar"}, + {"type": "EndTag", "name": "pre", "data": []}]) From 1df7e5f6ec91584f43b5067d18732673f842f587 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 16:24:56 +0100 Subject: [PATCH 2/2] Cleanup test_parser2.py a bit --- html5lib/tests/test_parser2.py | 44 ++++++++++++++++------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 26eff241..2f3ba2c8 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -7,51 +7,47 @@ from . import support # flake8: noqa from html5lib import html5parser from html5lib.constants import namespaces -from html5lib import treebuilders +from html5lib import parse # tests that aren't autogenerated from text files -@pytest.fixture -def dom_tree(): - return treebuilders.getTreeBuilder("dom") - - -def test_assertDoctypeCloneable(dom_tree): - parser = html5parser.HTMLParser(tree=dom_tree) - doc = parser.parse('') +def test_assertDoctypeCloneable(): + doc = parse('', treebuilder="dom") assert doc.cloneNode(True) is not None -def test_line_counter(dom_tree): +def test_line_counter(): # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0 - parser = html5parser.HTMLParser(tree=dom_tree) - parser.parse("
\nx\n>\n
") + assert parse("
\nx\n>\n
") is not None -def test_namespace_html_elements_0_dom(dom_tree): - parser = html5parser.HTMLParser(tree=dom_tree, namespaceHTMLElements=True) - doc = parser.parse("") +def test_namespace_html_elements_0_dom(): + doc = parse("", + treebuilder="dom", + namespaceHTMLElements=True) assert doc.childNodes[0].namespaceURI == namespaces["html"] -def test_namespace_html_elements_1_dom(dom_tree): - parser = html5parser.HTMLParser(tree=dom_tree, namespaceHTMLElements=False) - doc = parser.parse("") +def test_namespace_html_elements_1_dom(): + doc = parse("", + treebuilder="dom", + namespaceHTMLElements=False) assert doc.childNodes[0].namespaceURI is None def test_namespace_html_elements_0_etree(): - parser = html5parser.HTMLParser(namespaceHTMLElements=True) - doc = parser.parse("") + doc = parse("", + treebuilder="etree", + namespaceHTMLElements=True) assert doc.tag == "{%s}html" % (namespaces["html"],) def test_namespace_html_elements_1_etree(): - parser = html5parser.HTMLParser(namespaceHTMLElements=False) - doc = parser.parse("") + doc = parse("", + treebuilder="etree", + namespaceHTMLElements=False) assert doc.tag == "html" def test_unicode_file(): - parser = html5parser.HTMLParser() - parser.parse(io.StringIO("a")) + assert parse(io.StringIO("a")) is not None