diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index ebbe2b703bfd8f..1fec838c7529cb 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -72,23 +72,13 @@ __all__ = [ # public symbols - "Comment", - "dump", - "Element", "ElementTree", - "fromstring", "fromstringlist", - "indent", "iselement", "iterparse", - "parse", "ParseError", - "PI", "ProcessingInstruction", - "QName", - "SubElement", - "tostring", "tostringlist", - "TreeBuilder", - "VERSION", - "XML", "XMLID", - "XMLParser", "XMLPullParser", - "register_namespace", - "canonicalize", "C14NWriterTarget", - ] + "Comment", "dump", "Element", "ElementTree", "fromstring", + "fromstringlist", "indent", "iselement", "iterparse", "parse", + "ParseError", "PI", "ProcessingInstruction", "QName", "SubElement", + "tostring", "tostringlist", "TreeBuilder", "VERSION", "XML", "XMLID", + "XMLParser", "XMLPullParser", "register_namespace", "canonicalize", + "C14NWriterTarget", "ShortEmptyElements" +] VERSION = "1.3.0" @@ -99,6 +89,7 @@ import collections import collections.abc import contextlib +import enum from . import ElementPath @@ -509,6 +500,46 @@ def __eq__(self, other): # -------------------------------------------------------------------- +class ShortEmptyElements(enum.Enum): + """ + This class creates backwards compatibility with the boolean value of + *short_empty_elements* that existed prior to 3.??. + + Assuming the tag ``, the results will be: + + *SPACE* (default): `` + *NOSPACE*: `` + *NONE*: `` + """ + SPACE = " " + NOSPACE = "" + NONE = False + + def __bool__(self): + return self != ShortEmptyElements.NONE + + @classmethod + def _missing_(cls, value): + if value is enum.no_arg: + return cls.SPACE + elif isinstance(value, bool): + return cls.SPACE if value else cls.NONE + else: + return super()._missing_(value) + + @classmethod + def tag_defaultdict(cls, short_empty_elements): + if not isinstance(short_empty_elements, collections.defaultdict): + if isinstance(short_empty_elements, ShortEmptyElements): + return collections.defaultdict(lambda: short_empty_elements) + elif short_empty_elements: + return collections.defaultdict(lambda: ShortEmptyElements.SPACE) + else: + return collections.defaultdict(lambda: ShortEmptyElements.NONE) + else: + return short_empty_elements + + class ElementTree: """An XML element hierarchy. @@ -680,6 +711,7 @@ def iterfind(self, path, namespaces=None): def write(self, file_or_filename, encoding=None, xml_declaration=None, + xml_declaration_quotes="'", default_namespace=None, method=None, *, short_empty_elements=True): @@ -695,6 +727,9 @@ def write(self, file_or_filename, is added if encoding IS NOT either of: US-ASCII, UTF-8, or Unicode + *xml_declaration_quotes* -- Changes character used in XML declaration, + should be a *str*. + *default_namespace* -- sets the default XML namespace (for "xmlns") *method* -- either "xml" (default), "html, "text", or "c14n" @@ -703,8 +738,12 @@ def write(self, file_or_filename, that contain no content. If True (default) they are emitted as a single self-closed tag, otherwise they are emitted as a pair - of start/end tags + of start/end tags. + For more control, can be a + *ShortEmptyElements* object, or a + defaultdict keyed by tags as strings and + valued with such objects. """ if not method: method = "xml" @@ -715,18 +754,21 @@ def write(self, file_or_filename, encoding = "utf-8" else: encoding = "us-ascii" + if not xml_declaration_quotes in ['"', "'"]: + raise ValueError("xml_declaration_quotes must be either ' or \"") with _get_writer(file_or_filename, encoding) as (write, declared_encoding): if method == "xml" and (xml_declaration or (xml_declaration is None and encoding.lower() != "unicode" and declared_encoding.lower() not in ("utf-8", "us-ascii"))): - write("\n" % ( - declared_encoding,)) + write("\n" + .format(xml_declaration_quotes, declared_encoding)) if method == "text": _serialize_text(write, self._root) else: qnames, namespaces = _namespaces(self._root, default_namespace) serialize = _serialize[method] + short_empty_elements = ShortEmptyElements.tag_defaultdict(short_empty_elements) serialize(write, self._root, qnames, namespaces, short_empty_elements=short_empty_elements) @@ -885,7 +927,7 @@ def _serialize_xml(write, elem, qnames, namespaces, else: v = _escape_attrib(v) write(" %s=\"%s\"" % (qnames[k], v)) - if text or len(elem) or not short_empty_elements: + if text or len(elem) or not short_empty_elements[tag]: write(">") if text: write(_escape_cdata(text)) @@ -894,7 +936,7 @@ def _serialize_xml(write, elem, qnames, namespaces, short_empty_elements=short_empty_elements) write("") else: - write(" />") + write(short_empty_elements[tag].value+"/>") if elem.tail: write(_escape_cdata(elem.tail)) diff --git a/Misc/NEWS.d/next/Library/2022-07-30-19-41-52.gh-issue-95472.7GOJ4U.rst b/Misc/NEWS.d/next/Library/2022-07-30-19-41-52.gh-issue-95472.7GOJ4U.rst new file mode 100644 index 00000000000000..68fbee9694294a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-07-30-19-41-52.gh-issue-95472.7GOJ4U.rst @@ -0,0 +1 @@ +Added two fine-grained formatting options to ``xml.etree.ElementTree`` so programmers can prevent Git diffs from occurring due to incompatible XML libraries. Spaces can now be eliminated in e.g. ```` (as ````) (``ShortEmptyElements``), and double quotes can be used in the XML declaration (``XMLDeclarationQuotes``).