|
5 | 5 | import re
|
6 | 6 | import textwrap
|
7 | 7 | import warnings
|
| 8 | +from collections.abc import Mapping, Sequence |
8 | 9 | from pathlib import Path
|
9 |
| -from typing import Generator, Tuple, Union |
| 10 | +from typing import Generator, Optional, Tuple, Union |
10 | 11 |
|
11 | 12 | from delphin import util
|
12 | 13 |
|
|
33 | 34 | _line_width = 79 # try not to go beyond this number of characters
|
34 | 35 |
|
35 | 36 |
|
| 37 | +AttrSeq = Sequence[tuple[str, Union['Conjunction', 'Term']]] |
| 38 | +AttrMap = Mapping[str, Union['Conjunction', 'Term']] |
| 39 | + |
36 | 40 | # Exceptions
|
37 | 41 |
|
38 | 42 | class TDLError(PyDelphinException):
|
@@ -190,21 +194,78 @@ class AVM(FeatureStructure, Term):
|
190 | 194 | docstring (str): documentation string
|
191 | 195 | """
|
192 | 196 |
|
193 |
| - def __init__(self, featvals=None, docstring=None): |
| 197 | + def __init__( |
| 198 | + self, |
| 199 | + featvals: Union[AttrSeq, AttrMap, None] = None, |
| 200 | + docstring=None, |
| 201 | + ) -> None: |
194 | 202 | # super() doesn't work because I need to split the parameters
|
195 |
| - FeatureStructure.__init__(self, featvals) |
| 203 | + FeatureStructure.__init__(self) |
196 | 204 | Term.__init__(self, docstring=docstring)
|
| 205 | + if featvals is not None: |
| 206 | + self.aggregate(featvals) |
197 | 207 |
|
198 | 208 | @classmethod
|
199 | 209 | def _default(cls):
|
200 |
| - return AVM() |
| 210 | + return _ImplicitAVM() |
201 | 211 |
|
202 |
| - def __setitem__(self, key, val): |
| 212 | + def __setitem__(self, key: str, val: Union['Conjunction', Term]) -> None: |
203 | 213 | if not (val is None or isinstance(val, (Term, Conjunction))):
|
204 |
| - raise TypeError('invalid attribute value type: {}'.format( |
205 |
| - type(val).__name__)) |
| 214 | + raise TypeError( |
| 215 | + 'invalid attribute value type: {}'.format(type(val).__name__) |
| 216 | + ) |
206 | 217 | super(AVM, self).__setitem__(key, val)
|
207 | 218 |
|
| 219 | + def aggregate(self, featvals: Union[AttrSeq, AttrMap]) -> None: |
| 220 | + """Combine features in a single AVM. |
| 221 | +
|
| 222 | + This function takes feature paths and values and merges them |
| 223 | + into the AVM, but does not do full unification. For example: |
| 224 | +
|
| 225 | + >>> avm = tdl.AVM([("FEAT", tdl.TypeIdentifier("val1"))]) |
| 226 | + >>> avm.aggregate([ |
| 227 | + ... ("FEAT", tdl.TypeIdentifier("val2")), |
| 228 | + ... ("FEAT.SUB", tdl.TypeIdentifier("val3")), |
| 229 | + ... ]) |
| 230 | + >>> print(tdl.format(avm)) |
| 231 | + [ FEAT val1 & val2 & [ SUB val3 ] ] |
| 232 | +
|
| 233 | + The *featvals* argument may be an sequence of (feature, value) |
| 234 | + pairs or a mapping of features to values. |
| 235 | +
|
| 236 | + """ |
| 237 | + if hasattr(featvals, 'items'): |
| 238 | + featvals = list(featvals.items()) |
| 239 | + for feat, val in featvals: |
| 240 | + avm = self |
| 241 | + feat = feat.upper() |
| 242 | + while feat: |
| 243 | + subkey, _, rest = feat.partition(".") |
| 244 | + cur_val = avm.get(subkey) |
| 245 | + # new feature, just assign |
| 246 | + if subkey not in avm: |
| 247 | + avm[feat] = val |
| 248 | + break |
| 249 | + # last feature on path, conjoin |
| 250 | + elif not rest: |
| 251 | + avm[subkey] = cur_val & val |
| 252 | + # non-conjunction implicit AVM; follow the dots |
| 253 | + elif isinstance(cur_val, _ImplicitAVM): |
| 254 | + avm = cur_val |
| 255 | + # conjunction with implicit AVM; follow the AVM's dots |
| 256 | + elif ( |
| 257 | + isinstance(cur_val, Conjunction) |
| 258 | + and (avm_ := cur_val._last_avm()) |
| 259 | + and isinstance(avm_, _ImplicitAVM) |
| 260 | + ): |
| 261 | + avm = avm_ |
| 262 | + # some other term; create conjunction with implicit AVM |
| 263 | + else: |
| 264 | + avm_ = _ImplicitAVM() |
| 265 | + avm[subkey] = cur_val & avm_ |
| 266 | + avm = avm_ |
| 267 | + feat = rest |
| 268 | + |
208 | 269 | def normalize(self):
|
209 | 270 | """
|
210 | 271 | Reduce trivial AVM conjunctions to just the AVM.
|
@@ -255,7 +316,7 @@ def features(self, expand=False):
|
255 | 316 |
|
256 | 317 |
|
257 | 318 | class _ImplicitAVM(AVM):
|
258 |
| - """AVM implicitly constructed by list syntax.""" |
| 319 | + """AVM implicitly constructed by dot-notation and list syntax.""" |
259 | 320 |
|
260 | 321 |
|
261 | 322 | class ConsList(AVM):
|
@@ -514,13 +575,10 @@ def __getitem__(self, key):
|
514 | 575 |
|
515 | 576 | def __setitem__(self, key, val):
|
516 | 577 | """Set *key* to *val* in the last AVM in the conjunction"""
|
517 |
| - avm = None |
518 |
| - for term in self._terms: |
519 |
| - if isinstance(term, AVM): |
520 |
| - avm = term |
521 |
| - if avm is None: |
| 578 | + if avm := self._last_avm(): |
| 579 | + avm[key] = val |
| 580 | + else: |
522 | 581 | raise TDLError('no AVM in Conjunction')
|
523 |
| - avm[key] = val |
524 | 582 |
|
525 | 583 | def __delitem__(self, key):
|
526 | 584 | """Delete *key* from all AVMs in the conjunction"""
|
@@ -614,6 +672,12 @@ def string(self):
|
614 | 672 | return str(term)
|
615 | 673 | return None # conjunction does not have a string type (not an error)
|
616 | 674 |
|
| 675 | + def _last_avm(self) -> Optional[AVM]: |
| 676 | + for term in reversed(self._terms): |
| 677 | + if isinstance(term, AVM): |
| 678 | + return term |
| 679 | + return None |
| 680 | + |
617 | 681 |
|
618 | 682 | class TypeDefinition:
|
619 | 683 | """
|
@@ -1399,6 +1463,7 @@ def _format_term(term, indent):
|
1399 | 1463 | Regex: _format_regex,
|
1400 | 1464 | Coreference: _format_coref,
|
1401 | 1465 | AVM: _format_avm,
|
| 1466 | + _ImplicitAVM: _format_avm, |
1402 | 1467 | ConsList: _format_conslist,
|
1403 | 1468 | DiffList: _format_difflist,
|
1404 | 1469 | }.get(term.__class__, None)
|
|
0 commit comments