From 226247f7f0c2c08c5682af876e62e1452a039f3b Mon Sep 17 00:00:00 2001 From: Gabi Davar Date: Sat, 12 Dec 2015 11:23:36 +0200 Subject: [PATCH 1/2] setup refactor v1.5: * drop usage of charade - no longer maintained * refactor setup for simplicity --- README.rst | 5 ++--- debug-info.py | 2 +- html5lib/inputstream.py | 7 ++---- html5lib/tests/test_encoding.py | 7 ++---- requirements-optional.txt | 4 ++-- setup.py | 40 +++++++++++++-------------------- 6 files changed, 24 insertions(+), 41 deletions(-) diff --git a/README.rst b/README.rst index 879dabad..e73b1639 100644 --- a/README.rst +++ b/README.rst @@ -113,9 +113,8 @@ functionality: - ``genshi`` has a treewalker (but not builder); and -- ``charade`` can be used as a fallback when character encoding cannot - be determined; ``chardet``, from which it was forked, can also be used - on Python 2. +- ``chardet`` can be used as a fallback when character encoding cannot + be determined. - ``ordereddict`` can be used under Python 2.6 (``collections.OrderedDict`` is used instead on later versions) to diff --git a/debug-info.py b/debug-info.py index b5d2bb6a..f93fbdbe 100644 --- a/debug-info.py +++ b/debug-info.py @@ -12,7 +12,7 @@ "maxsize": sys.maxsize } -search_modules = ["charade", "chardet", "datrie", "genshi", "html5lib", "lxml", "six"] +search_modules = ["chardet", "datrie", "genshi", "html5lib", "lxml", "six"] found_modules = [] for m in search_modules: diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py index 5694efe3..6e8dbc54 100644 --- a/html5lib/inputstream.py +++ b/html5lib/inputstream.py @@ -471,10 +471,7 @@ def detectEncoding(self, parseMeta=True, chardet=True): if encoding is None and chardet: confidence = "tentative" try: - try: - from charade.universaldetector import UniversalDetector - except ImportError: - from chardet.universaldetector import UniversalDetector + from chardet.universaldetector import UniversalDetector buffers = [] detector = UniversalDetector() while not detector.done: @@ -490,7 +487,7 @@ def detectEncoding(self, parseMeta=True, chardet=True): except ImportError: pass # If all else fails use the default encoding - if encoding is None: + if not encoding: confidence = "tentative" encoding = lookupEncoding(self.defaultEncoding) diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index 837e989f..0a6131f9 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -40,12 +40,9 @@ def test_encoding(): yield (runPreScanEncodingTest, test[b'data'], test[b'encoding']) try: - try: - import charade # flake8: noqa - except ImportError: - import chardet # flake8: noqa + import chardet # flake8: noqa except ImportError: - print("charade/chardet not found, skipping chardet tests") + print("chardet not found, skipping chardet tests") else: def test_chardet(): with open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt"), "rb") as fp: diff --git a/requirements-optional.txt b/requirements-optional.txt index ac6539cb..f93805d9 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -4,9 +4,9 @@ # streams. genshi -# charade can be used as a fallback in case we are unable to determine +# chardet can be used as a fallback in case we are unable to determine # the encoding of a document. -charade +chardet>2.2 # lxml is supported with its own treebuilder ("lxml") and otherwise # uses the standard ElementTree support diff --git a/setup.py b/setup.py index 034bafbc..76495ccb 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,6 @@ -import ast -import os import codecs - -from setuptools import setup +from os.path import join, dirname +from setuptools import setup, find_packages classifiers=[ @@ -22,27 +20,19 @@ 'Topic :: Text Processing :: Markup :: HTML' ] -packages = ['html5lib'] + ['html5lib.'+name - for name in os.listdir(os.path.join('html5lib')) - if os.path.isdir(os.path.join('html5lib', name)) and - not name.startswith('.') and name != 'tests'] - -current_dir = os.path.dirname(__file__) -with codecs.open(os.path.join(current_dir, 'README.rst'), 'r', 'utf8') as readme_file: - with codecs.open(os.path.join(current_dir, 'CHANGES.rst'), 'r', 'utf8') as changes_file: +here = dirname(__file__) +with codecs.open(join(here, 'README.rst'), 'r', 'utf8') as readme_file: + with codecs.open(join(here, 'CHANGES.rst'), 'r', 'utf8') as changes_file: long_description = readme_file.read() + '\n' + changes_file.read() version = None -with open(os.path.join("html5lib", "__init__.py"), "rb") as init_file: - t = ast.parse(init_file.read(), filename="__init__.py", mode="exec") - assert isinstance(t, ast.Module) - assignments = filter(lambda x: isinstance(x, ast.Assign), t.body) - for a in assignments: - if (len(a.targets) == 1 and - isinstance(a.targets[0], ast.Name) and - a.targets[0].id == "__version__" and - isinstance(a.value, ast.Str)): - version = a.value.s +with open(join(here, 'html5lib', '__init__.py')) as fp: + for line in fp: + _locals = {} + if line.startswith('__version__'): + exec(line, None, _locals) + version = _locals['__version__'] + break setup(name='html5lib', version=version, @@ -53,7 +43,7 @@ classifiers=classifiers, maintainer='James Graham', maintainer_email='james@hoppipolla.co.uk', - packages=packages, + packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), install_requires=[ 'six', 'webencodings', @@ -70,13 +60,13 @@ # Standard extras, will be installed when the extra is requested. "genshi": ["genshi"], - "charade": ["charade"], + "chardet": ["chardet>=2.2"], # The all extra combines a standard extra which will be used anytime # the all extra is requested, and it extends it with a conditional # extra that will be installed whenever the condition matches and the # all extra is requested. - "all": ["genshi", "charade"], + "all": ["genshi", "chardet>=2.2"], "all:python_implementation == 'CPython'": ["datrie", "lxml"], }, ) From cfdb7b47ae43682dc17d6ec193cc9a193a699361 Mon Sep 17 00:00:00 2001 From: Gabi Davar Date: Sat, 19 Mar 2016 11:28:07 +0200 Subject: [PATCH 2/2] fix env markers to match official setuptools ones --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 76495ccb..bb4f6f99 100644 --- a/setup.py +++ b/setup.py @@ -55,8 +55,8 @@ # A conditional extra will only install these items when the extra is # requested and the condition matches. - "datrie:python_implementation == 'CPython'": ["datrie"], - "lxml:python_implementation == 'CPython'": ["lxml"], + "datrie:platform_python_implementation == 'CPython'": ["datrie"], + "lxml:platform_python_implementation == 'CPython'": ["lxml"], # Standard extras, will be installed when the extra is requested. "genshi": ["genshi"], @@ -67,6 +67,6 @@ # extra that will be installed whenever the condition matches and the # all extra is requested. "all": ["genshi", "chardet>=2.2"], - "all:python_implementation == 'CPython'": ["datrie", "lxml"], + "all:platform_python_implementation == 'CPython'": ["datrie", "lxml"], }, )