Skip to content

Commit c2c07da

Browse files
committed
Add tdl.AVM.aggregate(); use in init
This helps with merging repeated attributes instead of reassigning them or getting strange errors. Resolves #395
1 parent 72a2f12 commit c2c07da

File tree

4 files changed

+110
-18
lines changed

4 files changed

+110
-18
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,19 @@
77
* Removed Python 3.8 support ([#396])
88
* Added Python 3.13 support ([#396])
99

10+
### Added
11+
12+
* `delphin.tdl.AVM.aggregate()` ([#395])
13+
1014
### Fixed
1115

1216
* Parse TDL comments after letter-sets and wild-cards ([#395])
17+
* Repeated features in AVMs get merged instead of reassigned ([#395])
18+
19+
### Changed
20+
21+
* `tdl.AVM` initialization uses `AVM.aggregate()` instead of
22+
assignment of values on features ([#395])
1323

1424

1525
## [v1.9.1]

delphin/tdl.py

Lines changed: 79 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
import re
66
import textwrap
77
import warnings
8+
from collections.abc import Mapping, Sequence
89
from pathlib import Path
9-
from typing import Generator, Tuple, Union
10+
from typing import Generator, Optional, Tuple, Union
1011

1112
from delphin import util
1213

@@ -33,6 +34,9 @@
3334
_line_width = 79 # try not to go beyond this number of characters
3435

3536

37+
AttrSeq = Sequence[tuple[str, Union['Conjunction', 'Term']]]
38+
AttrMap = Mapping[str, Union['Conjunction', 'Term']]
39+
3640
# Exceptions
3741

3842
class TDLError(PyDelphinException):
@@ -190,21 +194,78 @@ class AVM(FeatureStructure, Term):
190194
docstring (str): documentation string
191195
"""
192196

193-
def __init__(self, featvals=None, docstring=None):
197+
def __init__(
198+
self,
199+
featvals: Union[AttrSeq, AttrMap, None] = None,
200+
docstring=None,
201+
) -> None:
194202
# super() doesn't work because I need to split the parameters
195-
FeatureStructure.__init__(self, featvals)
203+
FeatureStructure.__init__(self)
196204
Term.__init__(self, docstring=docstring)
205+
if featvals is not None:
206+
self.aggregate(featvals)
197207

198208
@classmethod
199209
def _default(cls):
200-
return AVM()
210+
return _ImplicitAVM()
201211

202-
def __setitem__(self, key, val):
212+
def __setitem__(self, key: str, val: Union['Conjunction', Term]) -> None:
203213
if not (val is None or isinstance(val, (Term, Conjunction))):
204-
raise TypeError('invalid attribute value type: {}'.format(
205-
type(val).__name__))
214+
raise TypeError(
215+
'invalid attribute value type: {}'.format(type(val).__name__)
216+
)
206217
super(AVM, self).__setitem__(key, val)
207218

219+
def aggregate(self, featvals: Union[AttrSeq, AttrMap]) -> None:
220+
"""Combine features in a single AVM.
221+
222+
This function takes feature paths and values and merges them
223+
into the AVM, but does not do full unification. For example:
224+
225+
>>> avm = tdl.AVM([("FEAT", tdl.TypeIdentifier("val1"))])
226+
>>> avm.aggregate([
227+
... ("FEAT", tdl.TypeIdentifier("val2")),
228+
... ("FEAT.SUB", tdl.TypeIdentifier("val3")),
229+
... ])
230+
>>> print(tdl.format(avm))
231+
[ FEAT val1 & val2 & [ SUB val3 ] ]
232+
233+
The *featvals* argument may be an sequence of (feature, value)
234+
pairs or a mapping of features to values.
235+
236+
"""
237+
if hasattr(featvals, 'items'):
238+
featvals = list(featvals.items())
239+
for feat, val in featvals:
240+
avm = self
241+
feat = feat.upper()
242+
while feat:
243+
subkey, _, rest = feat.partition(".")
244+
cur_val = avm.get(subkey)
245+
# new feature, just assign
246+
if subkey not in avm:
247+
avm[feat] = val
248+
break
249+
# last feature on path, conjoin
250+
elif not rest:
251+
avm[subkey] = cur_val & val
252+
# non-conjunction implicit AVM; follow the dots
253+
elif isinstance(cur_val, _ImplicitAVM):
254+
avm = cur_val
255+
# conjunction with implicit AVM; follow the AVM's dots
256+
elif (
257+
isinstance(cur_val, Conjunction)
258+
and (avm_ := cur_val._last_avm())
259+
and isinstance(avm_, _ImplicitAVM)
260+
):
261+
avm = avm_
262+
# some other term; create conjunction with implicit AVM
263+
else:
264+
avm_ = _ImplicitAVM()
265+
avm[subkey] = cur_val & avm_
266+
avm = avm_
267+
feat = rest
268+
208269
def normalize(self):
209270
"""
210271
Reduce trivial AVM conjunctions to just the AVM.
@@ -255,7 +316,7 @@ def features(self, expand=False):
255316

256317

257318
class _ImplicitAVM(AVM):
258-
"""AVM implicitly constructed by list syntax."""
319+
"""AVM implicitly constructed by dot-notation and list syntax."""
259320

260321

261322
class ConsList(AVM):
@@ -514,13 +575,10 @@ def __getitem__(self, key):
514575

515576
def __setitem__(self, key, val):
516577
"""Set *key* to *val* in the last AVM in the conjunction"""
517-
avm = None
518-
for term in self._terms:
519-
if isinstance(term, AVM):
520-
avm = term
521-
if avm is None:
578+
if avm := self._last_avm():
579+
avm[key] = val
580+
else:
522581
raise TDLError('no AVM in Conjunction')
523-
avm[key] = val
524582

525583
def __delitem__(self, key):
526584
"""Delete *key* from all AVMs in the conjunction"""
@@ -614,6 +672,12 @@ def string(self):
614672
return str(term)
615673
return None # conjunction does not have a string type (not an error)
616674

675+
def _last_avm(self) -> Optional[AVM]:
676+
for term in reversed(self._terms):
677+
if isinstance(term, AVM):
678+
return term
679+
return None
680+
617681

618682
class TypeDefinition:
619683
"""
@@ -1399,6 +1463,7 @@ def _format_term(term, indent):
13991463
Regex: _format_regex,
14001464
Coreference: _format_coref,
14011465
AVM: _format_avm,
1466+
_ImplicitAVM: _format_avm,
14021467
ConsList: _format_conslist,
14031468
DiffList: _format_difflist,
14041469
}.get(term.__class__, None)

delphin/tfs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ def __eq__(self, other: Any) -> bool:
6464

6565
def __setitem__(self, key: str, val: Any) -> None:
6666
avm = self._avm
67-
subkeys = key.split('.', 1)
68-
subkey = subkeys[0].upper()
69-
if len(subkeys) == 1:
67+
subkey, _, rest = key.partition(".")
68+
subkey = subkey.upper()
69+
if not rest:
7070
avm[subkey] = val
7171
else:
7272
if subkey in avm:
@@ -77,7 +77,7 @@ def __setitem__(self, key: str, val: Any) -> None:
7777
f'{subkey} does not support item assignment')
7878
else:
7979
subdef = avm[subkey] = self._default()
80-
subdef[subkeys[1]] = val
80+
subdef[rest] = val
8181

8282
def __getitem__(self, key: str) -> Any:
8383
first, _, remainder = key.partition('.')

tests/tdl_test.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,3 +1008,20 @@ def test_issue_357():
10081008
assert isinstance(c, ConsList)
10091009
assert len(c.values()) == 2
10101010
assert tdl.format(t) == 'id := < a . < b . c > >.'
1011+
1012+
1013+
def test_promote_conjunction_issue_395():
1014+
t = tdlparse('a := b & [ ABC x, ABC [ DEF y ] ].')
1015+
assert tdl.format(t) == 'a := b &\n [ ABC x & [ DEF y ] ].'
1016+
t = tdlparse('a := b & [ ABC x, ABC.DEF y ].')
1017+
assert tdl.format(t) == 'a := b &\n [ ABC x & [ DEF y ] ].'
1018+
t = tdlparse('a := b & [ ABC.DEF y, ABC x ].')
1019+
assert tdl.format(t) == 'a := b &\n [ ABC [ DEF y ] & x ].'
1020+
t = tdlparse('a := b & [ ABC [ DEF x ], ABC [ GHI y ] ].')
1021+
assert tdl.format(t) == 'a := b &\n [ ABC [ DEF x ] & [ GHI y ] ].'
1022+
t = tdlparse('a := b & [ ABC [ DEF x ], ABC.DEF y ].')
1023+
assert tdl.format(t) == 'a := b &\n [ ABC [ DEF x ] & [ DEF y ] ].'
1024+
t = tdlparse('a := b & [ ABC [ DEF x ], ABC.GHI y ].')
1025+
assert tdl.format(t) == 'a := b &\n [ ABC [ DEF x ] & [ GHI y ] ].'
1026+
t = tdlparse('a := b & [ ABC x & [ DEF y ], ABC z ].')
1027+
assert tdl.format(t) == 'a := b &\n [ ABC x & [ DEF y ] & z ].'

0 commit comments

Comments
 (0)