From c6e3a5d551ae377dacebc3454e6593b1dd61b67d Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Thu, 15 May 2025 11:34:41 -0700 Subject: [PATCH 1/4] Parse TDL comments after letter-set and wild-card --- CHANGELOG.md | 5 +++++ delphin/tdl.py | 4 ++-- tests/tdl_test.py | 10 ++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 48cea65..9e22c20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ * Removed Python 3.8 support ([#396]) * Added Python 3.13 support ([#396]) +### Fixed + +* Parse TDL comments after letter-sets and wild-cards ([#395]) + ## [v1.9.1] @@ -1653,4 +1657,5 @@ information about changes, except for [#379]: https://github.com/delph-in/pydelphin/issues/379 [#383]: https://github.com/delph-in/pydelphin/issues/383 [#386]: https://github.com/delph-in/pydelphin/issues/386 +[#395]: https://github.com/delph-in/pydelphin/issues/395 [#396]: https://github.com/delph-in/pydelphin/issues/396 diff --git a/delphin/tdl.py b/delphin/tdl.py index d710fca..d172495 100644 --- a/delphin/tdl.py +++ b/delphin/tdl.py @@ -891,7 +891,7 @@ class BlockComment(str): |(!>) # 17 diff list close |(>) # 18 cons list close |\#({identifier}) # 19 coreference - |%\s*\((.*)\)\s*$ # 20 letter-set or wild-card + |%\s*\((.*)\) # 20 letter-set or wild-card |%(prefix|suffix) # 21 start of affixing pattern |\(([^ ]+\s+(?:[^ )\\]|\\.)+)\) # 22 affix subpattern |(\/) # 23 defaults (currently unused) @@ -1148,7 +1148,7 @@ def _parse_tdl_definition(identifier, tokens): def _parse_letterset(token, line_no): - end = r'\s+((?:[^) \\]|\\.)+)\)\s*$' + end = r'\s+((?:[^) \\]|\\.)+)\)' m = re.match(r'\s*letter-set\s*\((!.)' + end, token) if m is not None: chars = re.sub(r'\\(.)', r'\1', m.group(2)) diff --git a/tests/tdl_test.py b/tests/tdl_test.py index 1182c01..d84e3e2 100644 --- a/tests/tdl_test.py +++ b/tests/tdl_test.py @@ -721,6 +721,16 @@ def test_parse_wildcard(): tdlparse('%(wild-card (?a ab c))') +def test_parse_comment_issue_395(): + # https://github.com/delph-in/pydelphin/issues/395 + ls = tdlparse(r'%(letter-set (!a abc)) ; comment') + assert ls.characters == 'abc' + ls = tdlparse(r'%(letter-set (!a abc)) #| comment |#') + assert ls.characters == 'abc' + wc = tdlparse(r'%(wild-card (?a abc)) ; comment') + assert wc.characters == 'abc' + + def test_parse_linecomment(): lc = tdlparse('; this is a comment\n') assert lc == ' this is a comment' From 0ac26b0188f9ab47b7d7d0fa0d90690081837a8b Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Fri, 16 May 2025 14:48:46 -0700 Subject: [PATCH 2/4] More strictly type hierarchy and tfs modules --- delphin/hierarchy.py | 138 ++++++++++++++++++++++++++++++------------ delphin/tfs.py | 140 +++++++++++++++++++++---------------------- 2 files changed, 169 insertions(+), 109 deletions(-) diff --git a/delphin/hierarchy.py b/delphin/hierarchy.py index a30036b..6f5c19f 100644 --- a/delphin/hierarchy.py +++ b/delphin/hierarchy.py @@ -3,6 +3,17 @@ Basic support for hierarchies. """ +from collections.abc import Hashable, Mapping +from typing import ( + Any, + Callable, + Generic, + Iterable, + Iterator, + Optional, + TypeVar, +) + # Default modules need to import the PyDelphin version from delphin.__about__ import __version__ # noqa: F401 from delphin.exceptions import PyDelphinException @@ -12,12 +23,23 @@ class HierarchyError(PyDelphinException): """Raised for invalid operations on hierarchies.""" -def _norm_id(id): +H = TypeVar("H", bound=Hashable) +# generic types +Identifiers = Iterable[H] +HierarchyMap = Mapping[H, Identifiers] +DataMap = Mapping[H, Any] +# explicit types +HierarchyDict = dict[H, tuple[H, ...]] +DataDict = dict[H, Any] +IdentifierNormalizer = Callable[[H], H] + + +def _norm_id(id: H) -> H: """Default id normalizer does nothing.""" return id -class MultiHierarchy: +class MultiHierarchy(Generic[H]): """ A Multiply-inheriting Hierarchy. @@ -30,6 +52,10 @@ class MultiHierarchy: data. Data for identifiers may be get and set individually with dictionary key-access. + While MultiHierarchy can model non-string hierarchies, the data + type of all node identifiers must be hashable and consistent + within the hierarchy. + >>> h = MultiHierarchy('*top*', {'food': '*top*', ... 'utensil': '*top*'}) >>> th.update({'fruit': 'food', 'apple': 'fruit'}) @@ -72,8 +98,19 @@ class MultiHierarchy: top: the hierarchy's top node identifier """ - def __init__(self, top, hierarchy=None, data=None, - normalize_identifier=None): + _top: H + _hier: HierarchyDict + _loer: dict[H, set[H]] + _data: DataDict + _norm: IdentifierNormalizer + + def __init__( + self, + top: H, + hierarchy: Optional[HierarchyMap] = None, + data: Optional[DataMap] = None, + normalize_identifier: Optional[IdentifierNormalizer] = None, + ): if not normalize_identifier: self._norm = _norm_id elif not callable(normalize_identifier): @@ -89,17 +126,19 @@ def __init__(self, top, hierarchy=None, data=None, self.update(hierarchy, data) @property - def top(self): + def top(self) -> H: return self._top - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if not isinstance(other, self.__class__): return NotImplemented - return (self._top == other._top - and self._hier == other._hier - and self._data == other._data) + return ( + self._top == other._top + and self._hier == other._hier + and self._data == other._data + ) - def __getitem__(self, identifier): + def __getitem__(self, identifier: H) -> Any: identifier = self._norm(identifier) data = None try: @@ -109,31 +148,37 @@ def __getitem__(self, identifier): raise return data - def __setitem__(self, identifier, data): + def __setitem__(self, identifier: H, data: Any) -> None: identifier = self._norm(identifier) if identifier not in self: raise HierarchyError( f'cannot set data; not in hierarchy: {identifier}') self._data[identifier] = data - def __iter__(self): - return iter(identifier for identifier in self._hier - if identifier != self._top) + def __iter__(self) -> Iterator[H]: + return iter( + identifier for identifier in self._hier + if identifier != self._top + ) - def __contains__(self, identifier): + def __contains__(self, identifier: H) -> bool: return self._norm(identifier) in self._hier - def __len__(self): + def __len__(self) -> int: return len(self._hier) - 1 # ignore top - def items(self): + def items(self) -> Iterable[tuple[H, Any]]: """ Return the (identifier, data) pairs excluding the top node. """ value = self.__getitem__ return [(identifier, value(identifier)) for identifier in self] - def update(self, subhierarchy=None, data=None): + def update( + self, + subhierarchy: Optional[HierarchyMap] = None, + data: Optional[DataMap] = None, + ) -> None: """ Incorporate *subhierarchy* and *data* into the hierarchy. @@ -166,7 +211,7 @@ def update(self, subhierarchy=None, data=None): loer = dict(self._loer) while subhierarchy: - eligible = _get_eligible(hier, subhierarchy) + eligible: list[H] = _get_eligible(hier, subhierarchy) for identifier in eligible: parents = subhierarchy.pop(identifier) @@ -181,22 +226,22 @@ def update(self, subhierarchy=None, data=None): self._loer = loer self._data.update(data) - def parents(self, identifier): + def parents(self, identifier: H) -> tuple[H, ...]: """Return the immediate parents of *identifier*.""" identifier = self._norm(identifier) return self._hier[identifier] - def children(self, identifier): + def children(self, identifier: H) -> set[H]: """Return the immediate children of *identifier*.""" identifier = self._norm(identifier) return self._loer[identifier] - def ancestors(self, identifier): + def ancestors(self, identifier: H) -> set[H]: """Return the ancestors of *identifier*.""" identifier = self._norm(identifier) return _ancestors(identifier, self._hier) - def descendants(self, identifier): + def descendants(self, identifier: H) -> set[H]: """Return the descendants of *identifier*.""" identifier = self._norm(identifier) xs = set() @@ -205,7 +250,7 @@ def descendants(self, identifier): xs.update(self.descendants(child)) return xs - def subsumes(self, a, b): + def subsumes(self, a: H, b: H) -> bool: """ Return `True` if node *a* subsumes node *b*. @@ -234,7 +279,7 @@ def subsumes(self, a, b): a, b = norm(a), norm(b) return a == b or b in self.descendants(a) - def compatible(self, a, b): + def compatible(self, a: H, b: H) -> bool: """ Return `True` if node *a* is compatible with node *b*. @@ -262,7 +307,11 @@ def compatible(self, a, b): b_lineage = self.descendants(b).union([b]) return len(a_lineage.intersection(b_lineage)) > 0 - def validate_update(self, subhierarchy, data): + def validate_update( + self, + subhierarchy: Optional[HierarchyMap], + data: Optional[DataMap], + ) -> tuple[HierarchyDict, DataDict]: """ Check if the update can apply to the current hierarchy. @@ -277,17 +326,17 @@ def validate_update(self, subhierarchy, data): ids = set(self._hier).intersection(subhierarchy) if ids: raise HierarchyError( - 'already in hierarchy: {}'.format(', '.join(ids))) + 'already in hierarchy: {}'.format(', '.join(map(str, ids)))) ids = set(data).difference(set(self._hier).union(subhierarchy)) if ids: raise HierarchyError( 'cannot update data; not in hierarchy: {}' - .format(', '.join(ids))) + .format(', '.join(map(str, ids)))) return subhierarchy, data -def _ancestors(id, hier): +def _ancestors(id: H, hier: dict[H, tuple[H, ...]]) -> set[H]: xs = set() for parent in hier[id]: xs.add(parent) @@ -295,8 +344,13 @@ def _ancestors(id, hier): return xs -def _normalize_update(norm, subhierarchy, data): - sub = {} +def _normalize_update( + norm: IdentifierNormalizer, + subhierarchy: Optional[HierarchyMap], + data: Optional[DataMap], +) -> tuple[HierarchyDict, DataDict]: + sub: HierarchyDict = {} + parents: Identifiers if subhierarchy: for id, parents in subhierarchy.items(): if isinstance(parents, str): @@ -304,31 +358,39 @@ def _normalize_update(norm, subhierarchy, data): id = norm(id) parents = tuple(map(norm, parents)) sub[id] = parents - dat = {} + dat: DataDict = {} if data: dat = {norm(id): obj for id, obj in data.items()} return sub, dat -def _get_eligible(hier, sub): +def _get_eligible( + hier: HierarchyDict, + sub: HierarchyDict, +) -> list[H]: eligible = [id for id, parents in sub.items() if all(parent in hier for parent in parents)] if not eligible: raise HierarchyError( 'disconnected or cyclic hierarchy; remaining: {}' - .format(', '.join(sub))) + .format(', '.join(map(str, sub)))) return eligible -def _validate_parentage(id, parents, hier): - ancestors = set() +def _validate_parentage( + id: H, + parents: tuple[H, ...], + hier: HierarchyDict, +) -> None: + ancestors: set[H] = set() for parent in parents: ancestors.update(_ancestors(parent, hier)) - redundant = ancestors.intersection(parents) + redundant = sorted(map(str, ancestors.intersection(parents))) if redundant: raise HierarchyError( '{} has redundant parents: {}' - .format(id, ', '.join(sorted(redundant)))) + .format(id, ', '.join(redundant)) + ) # single-parented hierarchy might be something like this: diff --git a/delphin/tfs.py b/delphin/tfs.py index 0bfd9ee..a74ffde 100644 --- a/delphin/tfs.py +++ b/delphin/tfs.py @@ -3,6 +3,9 @@ Basic classes for modeling feature structures. """ +from collections.abc import Mapping, Sequence +from typing import Any, Callable, Iterable, Optional, Union + # Default modules need to import the PyDelphin version from delphin.__about__ import __version__ # noqa: F401 from delphin.exceptions import PyDelphinException @@ -13,6 +16,14 @@ class TFSError(PyDelphinException): """Raised on invalid feature structure operations.""" +# generic input argument types +FeatureSeq = Sequence[tuple[str, Any]] +FeatureMap = Mapping[str, Any] +# explicit types +FeatureList = list[tuple[str, Any]] +FeatureDict = dict[str, Any] + + class FeatureStructure: """ A feature structure. @@ -27,27 +38,33 @@ class FeatureStructure: __slots__ = ('_avm', '_feats') - def __init__(self, featvals=None): + _avm: FeatureDict + _feats: list[str] + + def __init__( + self, + featvals: Union[FeatureSeq, FeatureMap, None] = None, + ) -> None: self._avm = {} self._feats = [] - if isinstance(featvals, dict): - featvals = featvals.items() + if featvals and hasattr(featvals, 'items'): + featvals = list(featvals.items()) for feat, val in list(featvals or []): self[feat] = val @classmethod - def _default(cls): + def _default(cls) -> 'FeatureStructure': return cls(None) - def __repr__(self): + def __repr__(self) -> str: return '<{} object at {}>'.format(self.__class__.__name__, id(self)) - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if not isinstance(other, FeatureStructure): return NotImplemented return self._avm == other._avm - def __setitem__(self, key, val): + def __setitem__(self, key: str, val: Any) -> None: avm = self._avm subkeys = key.split('.', 1) subkey = subkeys[0].upper() @@ -66,14 +83,14 @@ def __setitem__(self, key, val): subdef = avm[subkey] = self._default() subdef[subkeys[1]] = val - def __getitem__(self, key): + def __getitem__(self, key: str) -> Any: first, _, remainder = key.partition('.') val = self._avm[first.upper()] if remainder: val = val[remainder] return val - def __delitem__(self, key): + def __delitem__(self, key: str) -> None: first, _, remainder = key.partition('.') if remainder: fs = self._avm[first.upper()] @@ -81,7 +98,7 @@ def __delitem__(self, key): else: del self._avm[first.upper()] - def __contains__(self, key): + def __contains__(self, key: str) -> bool: subkeys = key.split('.', 1) subkey = subkeys[0].upper() if subkey in self._avm: @@ -91,7 +108,7 @@ def __contains__(self, key): return True return False - def get(self, key, default=None): + def get(self, key: str, default: Any = None) -> Any: """ Return the value for *key* if it exists, otherwise *default*. """ @@ -101,7 +118,7 @@ def get(self, key, default=None): val = default return val - def _is_notable(self): + def _is_notable(self) -> bool: """ Notability determines if the FeatureStructure should be listed as the value of a feature or if the feature should just "pass @@ -110,7 +127,7 @@ def _is_notable(self): """ return self._avm is None or len(self._avm) != 1 - def features(self, expand=False): + def features(self, expand: bool = False) -> FeatureList: """ Return the list of tuples of feature paths and feature values. @@ -153,92 +170,73 @@ class TypedFeatureStructure(FeatureStructure): """ __slots__ = '_type' - def __init__(self, type, featvals=None): + _type: str + + def __init__( + self, + type: str, + featvals: Union[FeatureSeq, FeatureMap, None] = None, + ) -> None: self._type = type super().__init__(featvals) - def __repr__(self): + def __repr__(self) -> str: return ''.format( self.type, id(self) ) - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if not isinstance(other, TypedFeatureStructure): return NotImplemented return self._type == other._type and self._avm == other._avm @property - def type(self): + def type(self) -> str: """The type assigned to the feature structure.""" return self._type @type.setter - def type(self, value): + def type(self, value: str) -> None: self._type = value -class TypeHierarchy(MultiHierarchy): +class TypeHierarchy(MultiHierarchy[str]): """ A Type Hierarchy. - Type hierarchies have certain properties, such as a unique top - node, multiple inheritance, case insensitivity, and unique - greatest-lower-bound (glb) types. + Type hierarchies are instances of + :class:`delphin.hierarchy.MultiHierarchy` constrained to use + case-insensitive (downcased) strings for node identifiers and + unique greatest-lower-bound (glb) types. Note: Checks for unique glbs is not yet implemented. - TypeHierarchies may be constructed when instantiating the class or - via the :meth:`update` method using a dictionary mapping type - names to node values, or one-by-one using dictionary-like access. - In both cases, the node values may be an individual parent name, - an iterable of parent names, or a :class:`TypeHierarchyNode` - object. Retrieving a node via dictionary access on the typename - returns a :class:`TypeHierarchyNode` regardless of the method used - to create the node. - - >>> th = TypeHierarchy('*top*', {'can-fly': '*top*'}) - >>> th.update({'can-swim': '*top*', 'can-walk': '*top*'}) - >>> th['butterfly'] = ('can-fly', 'can-walk') - >>> th['duck'] = TypeHierarchyNode( - ... ('can-fly', 'can-swim', 'can-walk'), - ... data='some info relating to ducks...') - >>> th['butterfly'].data = 'some info relating to butterflies' - - In some ways the TypeHierarchy behaves like a dictionary, but it - is not a subclass of :py:class:`dict` and does not implement all - its methods. Also note that some methods ignore the top node, - which make certain actions easier: - - >>> th = TypeHierarchy('*top*', {'a': '*top*', 'b': 'a', 'c': 'a'}) - >>> len(th) - 3 - >>> list(th) - ['a', 'b', 'c'] - >>> TypeHierarchy('*top*', dict(th.items())) == th - True - - But others do not ignore the top node, namely those where you can - request it specifically: - - >>> '*top*' in th - True - >>> th['*top*'] - + >>> th = TypeHierarchy( + ... '*top*', + ... {'can-fly': '*top*', 'can-swim': '*top*', 'can-walk': '*top*'} + ... ) + >>> th.update({'butterfly': ('can-fly', 'can-walk')}) + >>> th['butterfly'] = 'some info relating to butterflies' + >>> th.update( + ... {'duck': ('can-fly', 'can-swim', 'can-walk')}, + ... data={'duck': 'some info relating to ducks...'} + ... ) - Args: - top (str): unique top type - hierarchy (dict): mapping of `{child: node}` (see description - above concerning the `node` values) - Attributes: - top: the hierarchy's top type """ - def __init__(self, top, hierarchy=None, data=None, - normalize_identifier=None): + def __init__( + self, + top: str, + hierarchy: Optional[Mapping[str, Iterable[str]]] = None, + data: Optional[Mapping[str, Any]] = None, + normalize_identifier: Optional[Callable[[str], str]] = None + ) -> None: if not normalize_identifier: normalize_identifier = str.lower - super().__init__(top, - hierarchy=hierarchy, - data=data, - normalize_identifier=normalize_identifier) + super().__init__( + top, + hierarchy=hierarchy, + data=data, + normalize_identifier=normalize_identifier + ) From 72a2f12e0516c13fb61a0bf5d08db08b546c711d Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Mon, 19 May 2025 11:18:28 -0700 Subject: [PATCH 3/4] Remove FeatureStructure._feats; dicts are ordered I'm pretty sure the internal _feats list was just to ensure the order of features so reserializations of AVMs would be in the same order. This is no longer needed as dicts are now ordered in Python. --- delphin/tdl.py | 1 - delphin/tfs.py | 13 ++----------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/delphin/tdl.py b/delphin/tdl.py index d172495..a3a8800 100644 --- a/delphin/tdl.py +++ b/delphin/tdl.py @@ -397,7 +397,6 @@ def __init__(self, values=None, docstring=None): tmplist = ConsList(values, end=cr) dl_list = _ImplicitAVM() dl_list._avm.update(tmplist._avm) - dl_list._feats = tmplist._feats self.last = 'LIST.' + tmplist._last_path else: dl_list = cr diff --git a/delphin/tfs.py b/delphin/tfs.py index a74ffde..1c19d67 100644 --- a/delphin/tfs.py +++ b/delphin/tfs.py @@ -36,17 +36,15 @@ class FeatureStructure: to feature values """ - __slots__ = ('_avm', '_feats') + __slots__ = '_avm', _avm: FeatureDict - _feats: list[str] def __init__( self, featvals: Union[FeatureSeq, FeatureMap, None] = None, ) -> None: self._avm = {} - self._feats = [] if featvals and hasattr(featvals, 'items'): featvals = list(featvals.items()) for feat, val in list(featvals or []): @@ -68,8 +66,6 @@ def __setitem__(self, key: str, val: Any) -> None: avm = self._avm subkeys = key.split('.', 1) subkey = subkeys[0].upper() - if subkey not in avm: - self._feats.append(subkey) if len(subkeys) == 1: avm[subkey] = val else: @@ -142,12 +138,7 @@ def features(self, expand: bool = False) -> FeatureList: """ fs = [] if self._avm is not None: - if len(self._feats) == len(self._avm): - feats = self._feats - else: - feats = list(self._avm) - for feat in feats: - val = self._avm[feat] + for feat, val in self._avm.items(): if isinstance(val, FeatureStructure): if not expand and val._is_notable(): fs.append((feat, val)) From c2c07da504f5133e859f123a3f9bf78ad6fbac5d Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Mon, 19 May 2025 17:03:22 -0700 Subject: [PATCH 4/4] Add tdl.AVM.aggregate(); use in init This helps with merging repeated attributes instead of reassigning them or getting strange errors. Resolves #395 --- CHANGELOG.md | 10 +++++ delphin/tdl.py | 93 ++++++++++++++++++++++++++++++++++++++++------- delphin/tfs.py | 8 ++-- tests/tdl_test.py | 17 +++++++++ 4 files changed, 110 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e22c20..850e3f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,19 @@ * Removed Python 3.8 support ([#396]) * Added Python 3.13 support ([#396]) +### Added + +* `delphin.tdl.AVM.aggregate()` ([#395]) + ### Fixed * Parse TDL comments after letter-sets and wild-cards ([#395]) +* Repeated features in AVMs get merged instead of reassigned ([#395]) + +### Changed + +* `tdl.AVM` initialization uses `AVM.aggregate()` instead of + assignment of values on features ([#395]) ## [v1.9.1] diff --git a/delphin/tdl.py b/delphin/tdl.py index a3a8800..12099e8 100644 --- a/delphin/tdl.py +++ b/delphin/tdl.py @@ -5,8 +5,9 @@ import re import textwrap import warnings +from collections.abc import Mapping, Sequence from pathlib import Path -from typing import Generator, Tuple, Union +from typing import Generator, Optional, Tuple, Union from delphin import util @@ -33,6 +34,9 @@ _line_width = 79 # try not to go beyond this number of characters +AttrSeq = Sequence[tuple[str, Union['Conjunction', 'Term']]] +AttrMap = Mapping[str, Union['Conjunction', 'Term']] + # Exceptions class TDLError(PyDelphinException): @@ -190,21 +194,78 @@ class AVM(FeatureStructure, Term): docstring (str): documentation string """ - def __init__(self, featvals=None, docstring=None): + def __init__( + self, + featvals: Union[AttrSeq, AttrMap, None] = None, + docstring=None, + ) -> None: # super() doesn't work because I need to split the parameters - FeatureStructure.__init__(self, featvals) + FeatureStructure.__init__(self) Term.__init__(self, docstring=docstring) + if featvals is not None: + self.aggregate(featvals) @classmethod def _default(cls): - return AVM() + return _ImplicitAVM() - def __setitem__(self, key, val): + def __setitem__(self, key: str, val: Union['Conjunction', Term]) -> None: if not (val is None or isinstance(val, (Term, Conjunction))): - raise TypeError('invalid attribute value type: {}'.format( - type(val).__name__)) + raise TypeError( + 'invalid attribute value type: {}'.format(type(val).__name__) + ) super(AVM, self).__setitem__(key, val) + def aggregate(self, featvals: Union[AttrSeq, AttrMap]) -> None: + """Combine features in a single AVM. + + This function takes feature paths and values and merges them + into the AVM, but does not do full unification. For example: + + >>> avm = tdl.AVM([("FEAT", tdl.TypeIdentifier("val1"))]) + >>> avm.aggregate([ + ... ("FEAT", tdl.TypeIdentifier("val2")), + ... ("FEAT.SUB", tdl.TypeIdentifier("val3")), + ... ]) + >>> print(tdl.format(avm)) + [ FEAT val1 & val2 & [ SUB val3 ] ] + + The *featvals* argument may be an sequence of (feature, value) + pairs or a mapping of features to values. + + """ + if hasattr(featvals, 'items'): + featvals = list(featvals.items()) + for feat, val in featvals: + avm = self + feat = feat.upper() + while feat: + subkey, _, rest = feat.partition(".") + cur_val = avm.get(subkey) + # new feature, just assign + if subkey not in avm: + avm[feat] = val + break + # last feature on path, conjoin + elif not rest: + avm[subkey] = cur_val & val + # non-conjunction implicit AVM; follow the dots + elif isinstance(cur_val, _ImplicitAVM): + avm = cur_val + # conjunction with implicit AVM; follow the AVM's dots + elif ( + isinstance(cur_val, Conjunction) + and (avm_ := cur_val._last_avm()) + and isinstance(avm_, _ImplicitAVM) + ): + avm = avm_ + # some other term; create conjunction with implicit AVM + else: + avm_ = _ImplicitAVM() + avm[subkey] = cur_val & avm_ + avm = avm_ + feat = rest + def normalize(self): """ Reduce trivial AVM conjunctions to just the AVM. @@ -255,7 +316,7 @@ def features(self, expand=False): class _ImplicitAVM(AVM): - """AVM implicitly constructed by list syntax.""" + """AVM implicitly constructed by dot-notation and list syntax.""" class ConsList(AVM): @@ -514,13 +575,10 @@ def __getitem__(self, key): def __setitem__(self, key, val): """Set *key* to *val* in the last AVM in the conjunction""" - avm = None - for term in self._terms: - if isinstance(term, AVM): - avm = term - if avm is None: + if avm := self._last_avm(): + avm[key] = val + else: raise TDLError('no AVM in Conjunction') - avm[key] = val def __delitem__(self, key): """Delete *key* from all AVMs in the conjunction""" @@ -614,6 +672,12 @@ def string(self): return str(term) return None # conjunction does not have a string type (not an error) + def _last_avm(self) -> Optional[AVM]: + for term in reversed(self._terms): + if isinstance(term, AVM): + return term + return None + class TypeDefinition: """ @@ -1399,6 +1463,7 @@ def _format_term(term, indent): Regex: _format_regex, Coreference: _format_coref, AVM: _format_avm, + _ImplicitAVM: _format_avm, ConsList: _format_conslist, DiffList: _format_difflist, }.get(term.__class__, None) diff --git a/delphin/tfs.py b/delphin/tfs.py index 1c19d67..1040ace 100644 --- a/delphin/tfs.py +++ b/delphin/tfs.py @@ -64,9 +64,9 @@ def __eq__(self, other: Any) -> bool: def __setitem__(self, key: str, val: Any) -> None: avm = self._avm - subkeys = key.split('.', 1) - subkey = subkeys[0].upper() - if len(subkeys) == 1: + subkey, _, rest = key.partition(".") + subkey = subkey.upper() + if not rest: avm[subkey] = val else: if subkey in avm: @@ -77,7 +77,7 @@ def __setitem__(self, key: str, val: Any) -> None: f'{subkey} does not support item assignment') else: subdef = avm[subkey] = self._default() - subdef[subkeys[1]] = val + subdef[rest] = val def __getitem__(self, key: str) -> Any: first, _, remainder = key.partition('.') diff --git a/tests/tdl_test.py b/tests/tdl_test.py index d84e3e2..8af3979 100644 --- a/tests/tdl_test.py +++ b/tests/tdl_test.py @@ -1008,3 +1008,20 @@ def test_issue_357(): assert isinstance(c, ConsList) assert len(c.values()) == 2 assert tdl.format(t) == 'id := < a . < b . c > >.' + + +def test_promote_conjunction_issue_395(): + t = tdlparse('a := b & [ ABC x, ABC [ DEF y ] ].') + assert tdl.format(t) == 'a := b &\n [ ABC x & [ DEF y ] ].' + t = tdlparse('a := b & [ ABC x, ABC.DEF y ].') + assert tdl.format(t) == 'a := b &\n [ ABC x & [ DEF y ] ].' + t = tdlparse('a := b & [ ABC.DEF y, ABC x ].') + assert tdl.format(t) == 'a := b &\n [ ABC [ DEF y ] & x ].' + t = tdlparse('a := b & [ ABC [ DEF x ], ABC [ GHI y ] ].') + assert tdl.format(t) == 'a := b &\n [ ABC [ DEF x ] & [ GHI y ] ].' + t = tdlparse('a := b & [ ABC [ DEF x ], ABC.DEF y ].') + assert tdl.format(t) == 'a := b &\n [ ABC [ DEF x ] & [ DEF y ] ].' + t = tdlparse('a := b & [ ABC [ DEF x ], ABC.GHI y ].') + assert tdl.format(t) == 'a := b &\n [ ABC [ DEF x ] & [ GHI y ] ].' + t = tdlparse('a := b & [ ABC x & [ DEF y ], ABC z ].') + assert tdl.format(t) == 'a := b &\n [ ABC x & [ DEF y ] & z ].'