Skip to content

Commit 8b676d9

Browse files
Merge pull request #833 from dimitri-yatsenko/master
table.children and table.parents can now return table objects, not just names
2 parents fcaf01d + e6fa56b commit 8b676d9

19 files changed

+217
-105
lines changed

CHANGELOG.md

+9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
## Release notes
22

3+
### 0.12.8 -- Jan 12, 2021
4+
* table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833
5+
* Load dependencies before querying dependencies. (#179) PR #833
6+
* Fix display of part tables in `schema.save`. (#821) PR #833
7+
* Add `schema.list_tables`. (#838) PR #844
8+
* Fix minio new version regression. PR #847
9+
* Add more S3 logging for debugging. (#831) PR #832
10+
* Convert testing framework from TravisCI to GitHub Actions (#841) PR #840
11+
312
### 0.12.7 -- Oct 27, 2020
413
* Fix case sensitivity issues to adapt to MySQL 8+. PR #819
514
* Fix pymysql regression bug (#814) PR #816

LNX-docker-compose.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ services:
3232
interval: 1s
3333
fakeservices.datajoint.io:
3434
<<: *net
35-
image: raphaelguzman/nginx:v0.0.10
35+
image: raphaelguzman/nginx:v0.0.13
3636
environment:
3737
- ADD_db_TYPE=DATABASE
3838
- ADD_db_ENDPOINT=db:3306

datajoint/autopopulate.py

+11-17
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,19 @@ def key_source(self):
3232
The default value is the join of the parent relations.
3333
Users may override to change the granularity or the scope of populate() calls.
3434
"""
35-
def parent_gen(self):
36-
if self.target.full_table_name not in self.connection.dependencies:
37-
self.connection.dependencies.load()
38-
for parent_name, fk_props in self.target.parents(primary=True).items():
39-
if not parent_name.isdigit(): # simple foreign key
40-
yield FreeTable(self.connection, parent_name).proj()
41-
else:
42-
grandparent = list(self.connection.dependencies.in_edges(parent_name))[0][0]
43-
yield FreeTable(self.connection, grandparent).proj(**{
44-
attr: ref for attr, ref in fk_props['attr_map'].items() if ref != attr})
35+
def _rename_attributes(table, props):
36+
return (table.proj(
37+
**{attr: ref for attr, ref in props['attr_map'].items() if attr != ref})
38+
if props['aliased'] else table)
4539

4640
if self._key_source is None:
47-
parents = parent_gen(self)
48-
try:
49-
self._key_source = next(parents)
50-
except StopIteration:
51-
raise DataJointError('A relation must have primary dependencies for auto-populate to work') from None
52-
for q in parents:
53-
self._key_source *= q
41+
parents = self.target.parents(primary=True, as_objects=True, foreign_key_info=True)
42+
if not parents:
43+
raise DataJointError(
44+
'A relation must have primary dependencies for auto-populate to work') from None
45+
self._key_source = _rename_attributes(*parents[0])
46+
for q in parents[1:]:
47+
self._key_source *= _rename_attributes(*q)
5448
return self._key_source
5549

5650
def make(self, key):

datajoint/connection.py

+1
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ def close(self):
166166

167167
def register(self, schema):
168168
self.schemas[schema.database] = schema
169+
self.dependencies.clear()
169170

170171
def ping(self):
171172
"""

datajoint/dependencies.py

+45-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,36 @@
11
import networkx as nx
22
import itertools
3+
import re
34
from collections import defaultdict, OrderedDict
45
from .errors import DataJointError
56

67

8+
def unite_master_parts(lst):
9+
"""
10+
re-order a list of table names so that part tables immediately follow their master tables without breaking
11+
the topological order.
12+
Without this correction, a simple topological sort may insert other descendants between master and parts.
13+
The input list must be topologically sorted.
14+
:example:
15+
unite_master_parts(
16+
['`s`.`a`', '`s`.`a__q`', '`s`.`b`', '`s`.`c`', '`s`.`c__q`', '`s`.`b__q`', '`s`.`d`', '`s`.`a__r`']) ->
17+
['`s`.`a`', '`s`.`a__q`', '`s`.`a__r`', '`s`.`b`', '`s`.`b__q`', '`s`.`c`', '`s`.`c__q`', '`s`.`d`']
18+
"""
19+
for i in range(2, len(lst)):
20+
name = lst[i]
21+
match = re.match(r'(?P<master>`\w+`.`\w+)__\w+`', name)
22+
if match: # name is a part table
23+
master = match.group('master')
24+
for j in range(i-1, -1, -1):
25+
if lst[j] == master + '`' or lst[j].startswith(master + '__'):
26+
# move from the ith position to the (j+1)th position
27+
lst[j+1:i+1] = [name] + lst[j+1:i]
28+
break
29+
else:
30+
raise DataJointError("Found a part table {name} without its master table.".format(name=name))
31+
return lst
32+
33+
734
class Dependencies(nx.DiGraph):
835
"""
936
The graph of dependencies (foreign keys) between loaded tables.
@@ -16,15 +43,22 @@ class Dependencies(nx.DiGraph):
1643
def __init__(self, connection=None):
1744
self._conn = connection
1845
self._node_alias_count = itertools.count()
46+
self._loaded = False
1947
super().__init__(self)
2048

21-
def load(self):
49+
def clear(self):
50+
self._loaded = False
51+
super().clear()
52+
53+
def load(self, force=True):
2254
"""
2355
Load dependencies for all loaded schemas.
2456
This method gets called before any operation that requires dependencies: delete, drop, populate, progress.
2557
"""
26-
2758
# reload from scratch to prevent duplication of renamed edges
59+
if self._loaded and not force:
60+
return
61+
2862
self.clear()
2963

3064
# load primary key info
@@ -77,6 +111,7 @@ def load(self):
77111

78112
if not nx.is_directed_acyclic_graph(self): # pragma: no cover
79113
raise DataJointError('DataJoint can only work with acyclic dependencies')
114+
self._loaded = True
80115

81116
def parents(self, table_name, primary=None):
82117
"""
@@ -86,6 +121,7 @@ def parents(self, table_name, primary=None):
86121
attribute are considered.
87122
:return: dict of tables referenced by the foreign keys of table
88123
"""
124+
self.load(force=False)
89125
return {p[0]: p[2] for p in self.in_edges(table_name, data=True)
90126
if primary is None or p[2]['primary'] == primary}
91127

@@ -97,6 +133,7 @@ def children(self, table_name, primary=None):
97133
attribute are considered.
98134
:return: dict of tables referencing the table through foreign keys
99135
"""
136+
self.load(force=False)
100137
return {p[1]: p[2] for p in self.out_edges(table_name, data=True)
101138
if primary is None or p[2]['primary'] == primary}
102139

@@ -105,17 +142,19 @@ def descendants(self, full_table_name):
105142
:param full_table_name: In form `schema`.`table_name`
106143
:return: all dependent tables sorted in topological order. Self is included.
107144
"""
145+
self.load(force=False)
108146
nodes = self.subgraph(
109147
nx.algorithms.dag.descendants(self, full_table_name))
110-
return [full_table_name] + list(
111-
nx.algorithms.dag.topological_sort(nodes))
148+
return unite_master_parts([full_table_name] + list(
149+
nx.algorithms.dag.topological_sort(nodes)))
112150

113151
def ancestors(self, full_table_name):
114152
"""
115153
:param full_table_name: In form `schema`.`table_name`
116154
:return: all dependent tables sorted in topological order. Self is included.
117155
"""
156+
self.load(force=False)
118157
nodes = self.subgraph(
119158
nx.algorithms.dag.ancestors(self, full_table_name))
120-
return [full_table_name] + list(reversed(list(
121-
nx.algorithms.dag.topological_sort(nodes))))
159+
return list(reversed(unite_master_parts(list(
160+
nx.algorithms.dag.topological_sort(nodes)) + [full_table_name])))

datajoint/diagram.py

+3-23
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import warnings
66
import inspect
77
from .table import Table
8+
from .dependencies import unite_master_parts
89

910
try:
1011
from matplotlib import pyplot as plt
@@ -155,29 +156,8 @@ def is_part(part, master):
155156
return self
156157

157158
def topological_sort(self):
158-
"""
159-
:return: list of nodes in topological order
160-
"""
161-
162-
def _unite(lst):
163-
"""
164-
reorder list so that parts immediately follow their masters without breaking the topological order.
165-
Without this correction, simple topological sort may insert other descendants between master and parts
166-
:example:
167-
_unite(['a', 'a__q', 'b', 'c', 'c__q', 'b__q', 'd', 'a__r'])
168-
-> ['a', 'a__q', 'a__r', 'b', 'b__q', 'c', 'c__q', 'd']
169-
"""
170-
if len(lst) <= 2:
171-
return lst
172-
el = lst.pop()
173-
lst = _unite(lst)
174-
if '__' in el:
175-
master = el.split('__')[0]
176-
if not lst[-1].startswith(master):
177-
return _unite(lst[:-1] + [el, lst[-1]])
178-
return lst + [el]
179-
180-
return _unite(list(nx.algorithms.dag.topological_sort(
159+
""" :return: list of nodes in topological order """
160+
return unite_master_parts(list(nx.algorithms.dag.topological_sort(
181161
nx.DiGraph(self).subgraph(self.nodes_to_show))))
182162

183163
def __add__(self, arg):

datajoint/hash.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pathlib import Path
55

66

7-
def hash_key_values(mapping):
7+
def key_hash(mapping):
88
"""
99
32-byte hash of the mapping's key values sorted by the key name.
1010
This is often used to convert a long primary key value into a shorter hash.

datajoint/jobs.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from .hash import hash_key_values
2+
from .hash import key_hash
33
import platform
44
from .table import Table
55
from .settings import config
@@ -69,7 +69,7 @@ def reserve(self, table_name, key):
6969
"""
7070
job = dict(
7171
table_name=table_name,
72-
key_hash=hash_key_values(key),
72+
key_hash=key_hash(key),
7373
status='reserved',
7474
host=platform.node(),
7575
pid=os.getpid(),
@@ -89,7 +89,7 @@ def complete(self, table_name, key):
8989
:param table_name: `database`.`table_name`
9090
:param key: the dict of the job's primary key
9191
"""
92-
job_key = dict(table_name=table_name, key_hash=hash_key_values(key))
92+
job_key = dict(table_name=table_name, key_hash=key_hash(key))
9393
(self & job_key).delete_quick()
9494

9595
def error(self, table_name, key, error_message, error_stack=None):
@@ -107,7 +107,7 @@ def error(self, table_name, key, error_message, error_stack=None):
107107
self.insert1(
108108
dict(
109109
table_name=table_name,
110-
key_hash=hash_key_values(key),
110+
key_hash=key_hash(key),
111111
status="error",
112112
host=platform.node(),
113113
pid=os.getpid(),

datajoint/schemas.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,6 @@ def save(self, python_filename=None):
247247
This method is in preparation for a future release and is not officially supported.
248248
:return: a string containing the body of a complete Python module defining this schema.
249249
"""
250-
251250
module_count = itertools.count()
252251
# add virtual modules for referenced modules with names vmod0, vmod1, ...
253252
module_lookup = collections.defaultdict(lambda: 'vmod' + str(next(module_count)))
@@ -258,15 +257,16 @@ def make_class_definition(table):
258257
class_name = table.split('.')[1].strip('`')
259258
indent = ''
260259
if tier == 'Part':
261-
class_name = class_name.split('__')[1]
260+
class_name = class_name.split('__')[-1]
262261
indent += ' '
263262
class_name = to_camel_case(class_name)
264263

265264
def repl(s):
266-
d, tab = s.group(1), s.group(2)
267-
return ('' if d == db else (module_lookup[d]+'.')) + to_camel_case(tab)
265+
d, tabs = s.group(1), s.group(2)
266+
return ('' if d == db else (module_lookup[d]+'.')) + '.'.join(
267+
to_camel_case(tab) for tab in tabs.lstrip('__').split('__'))
268268

269-
return ('' if tier == 'Part' else '@schema\n') + \
269+
return ('' if tier == 'Part' else '\n@schema\n') + \
270270
'{indent}class {class_name}(dj.{tier}):\n{indent} definition = """\n{indent} {defi}"""'.format(
271271
class_name=class_name,
272272
indent=indent,
@@ -276,8 +276,8 @@ def repl(s):
276276
FreeTable(self.connection, table).describe(printout=False).replace('\n', '\n ' + indent)))
277277

278278
diagram = Diagram(self)
279-
body = '\n\n\n'.join(make_class_definition(table) for table in diagram.topological_sort())
280-
python_code = '\n\n\n'.join((
279+
body = '\n\n'.join(make_class_definition(table) for table in diagram.topological_sort())
280+
python_code = '\n\n'.join((
281281
'"""This module was auto-generated by datajoint from an existing schema"""',
282282
"import datajoint as dj\n\nschema = dj.Schema('{db}')".format(db=db),
283283
'\n'.join("{module} = dj.VirtualModule('{module}', '{schema_name}')".format(module=v, schema_name=k)

0 commit comments

Comments
 (0)