Skip to content

Commit 9cda5cd

Browse files
authored
Merge pull request #398 from delph-in/gh-395-tdl-bugs
Refine TDL parsing for repeated features and comments in irules
2 parents 03b80ff + c2c07da commit 9cda5cd

File tree

5 files changed

+297
-140
lines changed

5 files changed

+297
-140
lines changed

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,20 @@
77
* Removed Python 3.8 support ([#396])
88
* Added Python 3.13 support ([#396])
99

10+
### Added
11+
12+
* `delphin.tdl.AVM.aggregate()` ([#395])
13+
14+
### Fixed
15+
16+
* Parse TDL comments after letter-sets and wild-cards ([#395])
17+
* Repeated features in AVMs get merged instead of reassigned ([#395])
18+
19+
### Changed
20+
21+
* `tdl.AVM` initialization uses `AVM.aggregate()` instead of
22+
assignment of values on features ([#395])
23+
1024

1125
## [v1.9.1]
1226

@@ -1653,4 +1667,5 @@ information about changes, except for
16531667
[#379]: https://github.com/delph-in/pydelphin/issues/379
16541668
[#383]: https://github.com/delph-in/pydelphin/issues/383
16551669
[#386]: https://github.com/delph-in/pydelphin/issues/386
1670+
[#395]: https://github.com/delph-in/pydelphin/issues/395
16561671
[#396]: https://github.com/delph-in/pydelphin/issues/396

delphin/hierarchy.py

Lines changed: 100 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,17 @@
33
Basic support for hierarchies.
44
"""
55

6+
from collections.abc import Hashable, Mapping
7+
from typing import (
8+
Any,
9+
Callable,
10+
Generic,
11+
Iterable,
12+
Iterator,
13+
Optional,
14+
TypeVar,
15+
)
16+
617
# Default modules need to import the PyDelphin version
718
from delphin.__about__ import __version__ # noqa: F401
819
from delphin.exceptions import PyDelphinException
@@ -12,12 +23,23 @@ class HierarchyError(PyDelphinException):
1223
"""Raised for invalid operations on hierarchies."""
1324

1425

15-
def _norm_id(id):
26+
H = TypeVar("H", bound=Hashable)
27+
# generic types
28+
Identifiers = Iterable[H]
29+
HierarchyMap = Mapping[H, Identifiers]
30+
DataMap = Mapping[H, Any]
31+
# explicit types
32+
HierarchyDict = dict[H, tuple[H, ...]]
33+
DataDict = dict[H, Any]
34+
IdentifierNormalizer = Callable[[H], H]
35+
36+
37+
def _norm_id(id: H) -> H:
1638
"""Default id normalizer does nothing."""
1739
return id
1840

1941

20-
class MultiHierarchy:
42+
class MultiHierarchy(Generic[H]):
2143
"""
2244
A Multiply-inheriting Hierarchy.
2345
@@ -30,6 +52,10 @@ class MultiHierarchy:
3052
data. Data for identifiers may be get and set individually with
3153
dictionary key-access.
3254
55+
While MultiHierarchy can model non-string hierarchies, the data
56+
type of all node identifiers must be hashable and consistent
57+
within the hierarchy.
58+
3359
>>> h = MultiHierarchy('*top*', {'food': '*top*',
3460
... 'utensil': '*top*'})
3561
>>> th.update({'fruit': 'food', 'apple': 'fruit'})
@@ -72,8 +98,19 @@ class MultiHierarchy:
7298
top: the hierarchy's top node identifier
7399
"""
74100

75-
def __init__(self, top, hierarchy=None, data=None,
76-
normalize_identifier=None):
101+
_top: H
102+
_hier: HierarchyDict
103+
_loer: dict[H, set[H]]
104+
_data: DataDict
105+
_norm: IdentifierNormalizer
106+
107+
def __init__(
108+
self,
109+
top: H,
110+
hierarchy: Optional[HierarchyMap] = None,
111+
data: Optional[DataMap] = None,
112+
normalize_identifier: Optional[IdentifierNormalizer] = None,
113+
):
77114
if not normalize_identifier:
78115
self._norm = _norm_id
79116
elif not callable(normalize_identifier):
@@ -89,17 +126,19 @@ def __init__(self, top, hierarchy=None, data=None,
89126
self.update(hierarchy, data)
90127

91128
@property
92-
def top(self):
129+
def top(self) -> H:
93130
return self._top
94131

95-
def __eq__(self, other):
132+
def __eq__(self, other: Any) -> bool:
96133
if not isinstance(other, self.__class__):
97134
return NotImplemented
98-
return (self._top == other._top
99-
and self._hier == other._hier
100-
and self._data == other._data)
135+
return (
136+
self._top == other._top
137+
and self._hier == other._hier
138+
and self._data == other._data
139+
)
101140

102-
def __getitem__(self, identifier):
141+
def __getitem__(self, identifier: H) -> Any:
103142
identifier = self._norm(identifier)
104143
data = None
105144
try:
@@ -109,31 +148,37 @@ def __getitem__(self, identifier):
109148
raise
110149
return data
111150

112-
def __setitem__(self, identifier, data):
151+
def __setitem__(self, identifier: H, data: Any) -> None:
113152
identifier = self._norm(identifier)
114153
if identifier not in self:
115154
raise HierarchyError(
116155
f'cannot set data; not in hierarchy: {identifier}')
117156
self._data[identifier] = data
118157

119-
def __iter__(self):
120-
return iter(identifier for identifier in self._hier
121-
if identifier != self._top)
158+
def __iter__(self) -> Iterator[H]:
159+
return iter(
160+
identifier for identifier in self._hier
161+
if identifier != self._top
162+
)
122163

123-
def __contains__(self, identifier):
164+
def __contains__(self, identifier: H) -> bool:
124165
return self._norm(identifier) in self._hier
125166

126-
def __len__(self):
167+
def __len__(self) -> int:
127168
return len(self._hier) - 1 # ignore top
128169

129-
def items(self):
170+
def items(self) -> Iterable[tuple[H, Any]]:
130171
"""
131172
Return the (identifier, data) pairs excluding the top node.
132173
"""
133174
value = self.__getitem__
134175
return [(identifier, value(identifier)) for identifier in self]
135176

136-
def update(self, subhierarchy=None, data=None):
177+
def update(
178+
self,
179+
subhierarchy: Optional[HierarchyMap] = None,
180+
data: Optional[DataMap] = None,
181+
) -> None:
137182
"""
138183
Incorporate *subhierarchy* and *data* into the hierarchy.
139184
@@ -166,7 +211,7 @@ def update(self, subhierarchy=None, data=None):
166211
loer = dict(self._loer)
167212

168213
while subhierarchy:
169-
eligible = _get_eligible(hier, subhierarchy)
214+
eligible: list[H] = _get_eligible(hier, subhierarchy)
170215

171216
for identifier in eligible:
172217
parents = subhierarchy.pop(identifier)
@@ -181,22 +226,22 @@ def update(self, subhierarchy=None, data=None):
181226
self._loer = loer
182227
self._data.update(data)
183228

184-
def parents(self, identifier):
229+
def parents(self, identifier: H) -> tuple[H, ...]:
185230
"""Return the immediate parents of *identifier*."""
186231
identifier = self._norm(identifier)
187232
return self._hier[identifier]
188233

189-
def children(self, identifier):
234+
def children(self, identifier: H) -> set[H]:
190235
"""Return the immediate children of *identifier*."""
191236
identifier = self._norm(identifier)
192237
return self._loer[identifier]
193238

194-
def ancestors(self, identifier):
239+
def ancestors(self, identifier: H) -> set[H]:
195240
"""Return the ancestors of *identifier*."""
196241
identifier = self._norm(identifier)
197242
return _ancestors(identifier, self._hier)
198243

199-
def descendants(self, identifier):
244+
def descendants(self, identifier: H) -> set[H]:
200245
"""Return the descendants of *identifier*."""
201246
identifier = self._norm(identifier)
202247
xs = set()
@@ -205,7 +250,7 @@ def descendants(self, identifier):
205250
xs.update(self.descendants(child))
206251
return xs
207252

208-
def subsumes(self, a, b):
253+
def subsumes(self, a: H, b: H) -> bool:
209254
"""
210255
Return `True` if node *a* subsumes node *b*.
211256
@@ -234,7 +279,7 @@ def subsumes(self, a, b):
234279
a, b = norm(a), norm(b)
235280
return a == b or b in self.descendants(a)
236281

237-
def compatible(self, a, b):
282+
def compatible(self, a: H, b: H) -> bool:
238283
"""
239284
Return `True` if node *a* is compatible with node *b*.
240285
@@ -262,7 +307,11 @@ def compatible(self, a, b):
262307
b_lineage = self.descendants(b).union([b])
263308
return len(a_lineage.intersection(b_lineage)) > 0
264309

265-
def validate_update(self, subhierarchy, data):
310+
def validate_update(
311+
self,
312+
subhierarchy: Optional[HierarchyMap],
313+
data: Optional[DataMap],
314+
) -> tuple[HierarchyDict, DataDict]:
266315
"""
267316
Check if the update can apply to the current hierarchy.
268317
@@ -277,58 +326,71 @@ def validate_update(self, subhierarchy, data):
277326
ids = set(self._hier).intersection(subhierarchy)
278327
if ids:
279328
raise HierarchyError(
280-
'already in hierarchy: {}'.format(', '.join(ids)))
329+
'already in hierarchy: {}'.format(', '.join(map(str, ids))))
281330

282331
ids = set(data).difference(set(self._hier).union(subhierarchy))
283332
if ids:
284333
raise HierarchyError(
285334
'cannot update data; not in hierarchy: {}'
286-
.format(', '.join(ids)))
335+
.format(', '.join(map(str, ids))))
287336
return subhierarchy, data
288337

289338

290-
def _ancestors(id, hier):
339+
def _ancestors(id: H, hier: dict[H, tuple[H, ...]]) -> set[H]:
291340
xs = set()
292341
for parent in hier[id]:
293342
xs.add(parent)
294343
xs.update(_ancestors(parent, hier))
295344
return xs
296345

297346

298-
def _normalize_update(norm, subhierarchy, data):
299-
sub = {}
347+
def _normalize_update(
348+
norm: IdentifierNormalizer,
349+
subhierarchy: Optional[HierarchyMap],
350+
data: Optional[DataMap],
351+
) -> tuple[HierarchyDict, DataDict]:
352+
sub: HierarchyDict = {}
353+
parents: Identifiers
300354
if subhierarchy:
301355
for id, parents in subhierarchy.items():
302356
if isinstance(parents, str):
303357
parents = parents.split()
304358
id = norm(id)
305359
parents = tuple(map(norm, parents))
306360
sub[id] = parents
307-
dat = {}
361+
dat: DataDict = {}
308362
if data:
309363
dat = {norm(id): obj for id, obj in data.items()}
310364
return sub, dat
311365

312366

313-
def _get_eligible(hier, sub):
367+
def _get_eligible(
368+
hier: HierarchyDict,
369+
sub: HierarchyDict,
370+
) -> list[H]:
314371
eligible = [id for id, parents in sub.items()
315372
if all(parent in hier for parent in parents)]
316373
if not eligible:
317374
raise HierarchyError(
318375
'disconnected or cyclic hierarchy; remaining: {}'
319-
.format(', '.join(sub)))
376+
.format(', '.join(map(str, sub))))
320377
return eligible
321378

322379

323-
def _validate_parentage(id, parents, hier):
324-
ancestors = set()
380+
def _validate_parentage(
381+
id: H,
382+
parents: tuple[H, ...],
383+
hier: HierarchyDict,
384+
) -> None:
385+
ancestors: set[H] = set()
325386
for parent in parents:
326387
ancestors.update(_ancestors(parent, hier))
327-
redundant = ancestors.intersection(parents)
388+
redundant = sorted(map(str, ancestors.intersection(parents)))
328389
if redundant:
329390
raise HierarchyError(
330391
'{} has redundant parents: {}'
331-
.format(id, ', '.join(sorted(redundant))))
392+
.format(id, ', '.join(redundant))
393+
)
332394

333395

334396
# single-parented hierarchy might be something like this:

0 commit comments

Comments
 (0)