From 5c159e120f65c0fc13e4586f2b4dd19286f9cdb7 Mon Sep 17 00:00:00 2001 From: Fred Brennan Date: Sat, 30 Jul 2022 14:30:38 -0400 Subject: [PATCH 1/5] [xml.etree.ElementTree] Add fine-grained formatting classes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ShortEmptyElements — make it possible to remove space between end of tag and slash, also make it possible to turn this on and off based on tag being processed via `defaultdict`. * XMLDeclarationQuotes — change quote char used in XML declaration from `'` to `"`. --- Lib/xml/etree/ElementTree.py | 78 +++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index ebbe2b703bfd8f..bc4027775d371e 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -88,7 +88,9 @@ "XMLParser", "XMLPullParser", "register_namespace", "canonicalize", "C14NWriterTarget", - ] + "XMLDeclarationQuotes", + "ShortEmptyElements" +] VERSION = "1.3.0" @@ -99,6 +101,7 @@ import collections import collections.abc import contextlib +import enum from . import ElementPath @@ -508,6 +511,58 @@ def __eq__(self, other): # -------------------------------------------------------------------- +class XMLDeclarationQuotes(enum.Enum): + """ + Whether or not single quotes or double quotes ought to be used in the XML + declaration. + + *SINGLE* (default): + *DOUBLE*: + """ + SINGLE = "'" + DOUBLE = '"' + + def __str__(self): + return self.value + +class ShortEmptyElements(enum.Enum): + """ + This class creates backwards compatibility with the boolean value of + *short_empty_elements* that existed prior to 3.??. + + Assuming the tag ``, the results will be: + + *SPACE* (default): `` + *NOSPACE*: `` + *NONE*: `` + """ + SPACE = " " + NOSPACE = "" + NONE = False + + def __bool__(self): + return self != ShortEmptyElements.NONE + + @classmethod + def _missing_(cls, value): + if value is enum.no_arg: + return cls.SPACE + elif isinstance(value, bool): + return cls.SPACE if value else cls.NONE + else: + return super()._missing_(value) + + @classmethod + def tag_defaultdict(cls, short_empty_elements): + if not isinstance(short_empty_elements, collections.defaultdict): + if isinstance(short_empty_elements, ShortEmptyElements): + return collections.defaultdict(lambda: short_empty_elements) + elif bool(short_empty_elements) is True: + return collections.defaultdict(lambda: ShortEmptyElements.SPACE) + else: + return collections.defaultdict(lambda: ShortEmptyElements.NONE) + else: + return short_empty_elements class ElementTree: """An XML element hierarchy. @@ -680,6 +735,7 @@ def iterfind(self, path, namespaces=None): def write(self, file_or_filename, encoding=None, xml_declaration=None, + xml_declaration_quotes=XMLDeclarationQuotes.SINGLE, default_namespace=None, method=None, *, short_empty_elements=True): @@ -695,6 +751,9 @@ def write(self, file_or_filename, is added if encoding IS NOT either of: US-ASCII, UTF-8, or Unicode + *xml_declaration_quotes* -- Changes character used in XML declaration, + see *XMLDeclarationQuotes*. + *default_namespace* -- sets the default XML namespace (for "xmlns") *method* -- either "xml" (default), "html, "text", or "c14n" @@ -703,8 +762,12 @@ def write(self, file_or_filename, that contain no content. If True (default) they are emitted as a single self-closed tag, otherwise they are emitted as a pair - of start/end tags + of start/end tags. + For more control, can be a + *ShortEmptyElements* object, or a + defaultdict keyed by tags as strings and + valued with such objects. """ if not method: method = "xml" @@ -720,13 +783,16 @@ def write(self, file_or_filename, (xml_declaration is None and encoding.lower() != "unicode" and declared_encoding.lower() not in ("utf-8", "us-ascii"))): - write("\n" % ( - declared_encoding,)) + write("\n" + .format(xml_declaration_quotes, declared_encoding)) + if not isinstance(xml_declaration_quotes, XMLDeclarationQuotes): + raise ValueError("Unknown type for `xml_declaration_quotes`") if method == "text": _serialize_text(write, self._root) else: qnames, namespaces = _namespaces(self._root, default_namespace) serialize = _serialize[method] + short_empty_elements = ShortEmptyElements.tag_defaultdict(short_empty_elements) serialize(write, self._root, qnames, namespaces, short_empty_elements=short_empty_elements) @@ -885,7 +951,7 @@ def _serialize_xml(write, elem, qnames, namespaces, else: v = _escape_attrib(v) write(" %s=\"%s\"" % (qnames[k], v)) - if text or len(elem) or not short_empty_elements: + if text or len(elem) or not bool(short_empty_elements[tag]): write(">") if text: write(_escape_cdata(text)) @@ -894,7 +960,7 @@ def _serialize_xml(write, elem, qnames, namespaces, short_empty_elements=short_empty_elements) write("") else: - write(" />") + write(short_empty_elements[tag].value+"/>") if elem.tail: write(_escape_cdata(elem.tail)) From fb47e0d287f97d90ee3618e3f0ba1f1a8e8b7be1 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sat, 30 Jul 2022 19:41:52 +0000 Subject: [PATCH 2/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Fredrick Brennan --- .../next/Library/2022-07-30-19-41-52.gh-issue-95472.7GOJ4U.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-07-30-19-41-52.gh-issue-95472.7GOJ4U.rst diff --git a/Misc/NEWS.d/next/Library/2022-07-30-19-41-52.gh-issue-95472.7GOJ4U.rst b/Misc/NEWS.d/next/Library/2022-07-30-19-41-52.gh-issue-95472.7GOJ4U.rst new file mode 100644 index 00000000000000..68fbee9694294a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-07-30-19-41-52.gh-issue-95472.7GOJ4U.rst @@ -0,0 +1 @@ +Added two fine-grained formatting options to ``xml.etree.ElementTree`` so programmers can prevent Git diffs from occurring due to incompatible XML libraries. Spaces can now be eliminated in e.g. ```` (as ````) (``ShortEmptyElements``), and double quotes can be used in the XML declaration (``XMLDeclarationQuotes``). From 09de97b284746bb8c867d14ca1fc460dda5244fe Mon Sep 17 00:00:00 2001 From: Fredrick Brennan Date: Sat, 6 Aug 2022 15:43:58 -0400 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: scoder --- Lib/xml/etree/ElementTree.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index bc4027775d371e..2523329a86821e 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -525,6 +525,7 @@ class XMLDeclarationQuotes(enum.Enum): def __str__(self): return self.value + class ShortEmptyElements(enum.Enum): """ This class creates backwards compatibility with the boolean value of @@ -557,13 +558,14 @@ def tag_defaultdict(cls, short_empty_elements): if not isinstance(short_empty_elements, collections.defaultdict): if isinstance(short_empty_elements, ShortEmptyElements): return collections.defaultdict(lambda: short_empty_elements) - elif bool(short_empty_elements) is True: + elif short_empty_elements: return collections.defaultdict(lambda: ShortEmptyElements.SPACE) else: return collections.defaultdict(lambda: ShortEmptyElements.NONE) else: return short_empty_elements + class ElementTree: """An XML element hierarchy. @@ -951,7 +953,7 @@ def _serialize_xml(write, elem, qnames, namespaces, else: v = _escape_attrib(v) write(" %s=\"%s\"" % (qnames[k], v)) - if text or len(elem) or not bool(short_empty_elements[tag]): + if text or len(elem) or not short_empty_elements[tag]: write(">") if text: write(_escape_cdata(text)) From 602aaa3aeb3e29b2e40c470cc0f8d6a15c735031 Mon Sep 17 00:00:00 2001 From: Fredrick Brennan Date: Sat, 6 Aug 2022 15:44:17 -0400 Subject: [PATCH 4/5] Update Lib/xml/etree/ElementTree.py Co-authored-by: scoder --- Lib/xml/etree/ElementTree.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 2523329a86821e..63313cec886ef3 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -511,6 +511,7 @@ def __eq__(self, other): # -------------------------------------------------------------------- + class XMLDeclarationQuotes(enum.Enum): """ Whether or not single quotes or double quotes ought to be used in the XML From 236d258be5cdc472aa3f1e770710ceaced6dfe12 Mon Sep 17 00:00:00 2001 From: Fred Brennan Date: Sat, 6 Aug 2022 15:56:59 -0400 Subject: [PATCH 5/5] Change XmlDeclarationQuotes to str --- Lib/xml/etree/ElementTree.py | 47 ++++++++---------------------------- 1 file changed, 10 insertions(+), 37 deletions(-) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 63313cec886ef3..1fec838c7529cb 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -72,24 +72,12 @@ __all__ = [ # public symbols - "Comment", - "dump", - "Element", "ElementTree", - "fromstring", "fromstringlist", - "indent", "iselement", "iterparse", - "parse", "ParseError", - "PI", "ProcessingInstruction", - "QName", - "SubElement", - "tostring", "tostringlist", - "TreeBuilder", - "VERSION", - "XML", "XMLID", - "XMLParser", "XMLPullParser", - "register_namespace", - "canonicalize", "C14NWriterTarget", - "XMLDeclarationQuotes", - "ShortEmptyElements" + "Comment", "dump", "Element", "ElementTree", "fromstring", + "fromstringlist", "indent", "iselement", "iterparse", "parse", + "ParseError", "PI", "ProcessingInstruction", "QName", "SubElement", + "tostring", "tostringlist", "TreeBuilder", "VERSION", "XML", "XMLID", + "XMLParser", "XMLPullParser", "register_namespace", "canonicalize", + "C14NWriterTarget", "ShortEmptyElements" ] VERSION = "1.3.0" @@ -512,21 +500,6 @@ def __eq__(self, other): # -------------------------------------------------------------------- -class XMLDeclarationQuotes(enum.Enum): - """ - Whether or not single quotes or double quotes ought to be used in the XML - declaration. - - *SINGLE* (default): - *DOUBLE*: - """ - SINGLE = "'" - DOUBLE = '"' - - def __str__(self): - return self.value - - class ShortEmptyElements(enum.Enum): """ This class creates backwards compatibility with the boolean value of @@ -738,7 +711,7 @@ def iterfind(self, path, namespaces=None): def write(self, file_or_filename, encoding=None, xml_declaration=None, - xml_declaration_quotes=XMLDeclarationQuotes.SINGLE, + xml_declaration_quotes="'", default_namespace=None, method=None, *, short_empty_elements=True): @@ -755,7 +728,7 @@ def write(self, file_or_filename, US-ASCII, UTF-8, or Unicode *xml_declaration_quotes* -- Changes character used in XML declaration, - see *XMLDeclarationQuotes*. + should be a *str*. *default_namespace* -- sets the default XML namespace (for "xmlns") @@ -781,6 +754,8 @@ def write(self, file_or_filename, encoding = "utf-8" else: encoding = "us-ascii" + if not xml_declaration_quotes in ['"', "'"]: + raise ValueError("xml_declaration_quotes must be either ' or \"") with _get_writer(file_or_filename, encoding) as (write, declared_encoding): if method == "xml" and (xml_declaration or (xml_declaration is None and @@ -788,8 +763,6 @@ def write(self, file_or_filename, declared_encoding.lower() not in ("utf-8", "us-ascii"))): write("\n" .format(xml_declaration_quotes, declared_encoding)) - if not isinstance(xml_declaration_quotes, XMLDeclarationQuotes): - raise ValueError("Unknown type for `xml_declaration_quotes`") if method == "text": _serialize_text(write, self._root) else: