5
5
from time import sleep , time
6
6
from typing import Any , Dict , Iterator , List , Literal , Optional , Tuple , Union
7
7
8
+ from biocframe import BiocFrame
8
9
from sqlalchemy import create_engine , func , text
9
10
from sqlalchemy .orm import Session , sessionmaker
10
11
from sqlalchemy .pool import QueuePool
14
15
from .models import Base , Metadata , Resource
15
16
from .utils import (
16
17
calculate_file_hash ,
18
+ convert_to_columnar ,
17
19
copy_or_move ,
18
20
create_tmp_dir ,
19
21
download_web_file ,
@@ -104,15 +106,19 @@ def _setup_database(self) -> None:
104
106
105
107
return SCHEMA_VERSION
106
108
107
- def _get_detached_resource (
108
- self , session : Session , obj : Union [Resource , Metadata ]
109
- ) -> Optional [Union [Resource , Metadata ]]:
109
+ def _get_detached_resource (self , session : Session , obj : Union [Resource , Metadata ]) -> Optional [dict ]:
110
110
"""Get a detached copy of a resource."""
111
111
if obj is None :
112
112
return None
113
113
session .refresh (obj )
114
114
session .expunge (obj )
115
- return obj
115
+ obj_dict = obj .to_dict ()
116
+
117
+ if isinstance (obj , Resource ):
118
+ if obj_dict ["rtype" ] == "relative" :
119
+ obj_dict ["rpath" ] = f"{ self .config .cache_dir } /{ obj_dict ['rpath' ]} "
120
+
121
+ return obj_dict
116
122
117
123
def __enter__ (self ) -> "BiocFileCache" :
118
124
return self
@@ -137,6 +143,10 @@ def get_session(self) -> Iterator[Session]:
137
143
finally :
138
144
session .close ()
139
145
146
+ #########################
147
+ ######>> cleanup <<######
148
+ #########################
149
+
140
150
# def _validate_rname(self, rname: str) -> None:
141
151
# """Validate resource name format."""
142
152
# if not validate_rname(rname, self.config.rname_pattern):
@@ -191,7 +201,11 @@ def cleanup(self) -> int:
191
201
self ._last_cleanup = datetime .now ()
192
202
return removed
193
203
194
- def get (self , rname : str = None , rid : str = None ) -> Optional [Resource ]:
204
+ ###############################
205
+ ######>> get resources <<######
206
+ ###############################
207
+
208
+ def get (self , rname : str = None , rid : str = None ) -> Optional [dict ]:
195
209
"""Get resource by name from cache.
196
210
197
211
Args:
@@ -215,7 +229,7 @@ def get(self, rname: str = None, rid: str = None) -> Optional[Resource]:
215
229
# Check if path exists with timeout
216
230
start = time ()
217
231
timeout = 30
218
- while not Path (str (resource .rpath )).exists ():
232
+ while not Path (str (self . config . cache_dir / resource .rpath )).exists ():
219
233
if time () - start >= timeout :
220
234
raise TimeoutError (f"For resource: '{ rname } ' the rpath does not exist after { timeout } seconds." )
221
235
sleep (0.1 )
@@ -236,7 +250,7 @@ def add(
236
250
expires : Optional [datetime ] = None ,
237
251
download : bool = True ,
238
252
ext : bool = True ,
239
- ) -> Resource :
253
+ ) -> dict :
240
254
"""Add a resource to the cache.
241
255
242
256
Args:
@@ -268,7 +282,7 @@ def add(
268
282
Defaults to `True`.
269
283
270
284
Returns:
271
- The `Resource` object added to the cache.
285
+ The `Resource` object added to the cache as dictionary .
272
286
"""
273
287
# self._validate_rname(rname)
274
288
fpath = Path (fpath ) if rtype != "web" else fpath
@@ -289,7 +303,7 @@ def add(
289
303
# Generate paths and check size
290
304
rid = generate_id (size = len (self ))
291
305
uuid = generate_uuid ()
292
- rpath = self . config . cache_dir / f"{ uuid } _{ outpath .name if ext else outpath .stem } " if action != "asis" else fpath
306
+ rpath = f"{ uuid } _{ outpath .name if ext else outpath .stem } " if action != "asis" else fpath
293
307
294
308
# Create resource record
295
309
resource = Resource (
@@ -307,10 +321,10 @@ def add(
307
321
session .commit ()
308
322
309
323
try :
310
- copy_or_move (outpath , rpath , rname , action , False )
324
+ copy_or_move (outpath , self . config . cache_dir / rpath , rname , action , False )
311
325
312
326
# Calculate and store checksum
313
- resource .etag = calculate_file_hash (rpath , self .config .hash_algorithm )
327
+ resource .etag = calculate_file_hash (self . config . cache_dir / rpath , self .config .hash_algorithm )
314
328
session .commit ()
315
329
result = self ._get_detached_resource (session , resource )
316
330
return result
@@ -320,7 +334,7 @@ def add(
320
334
session .commit ()
321
335
raise Exception ("Failed to add resource" ) from e
322
336
323
- def add_batch (self , resources : List [Dict [str , Any ]]) -> List [ Resource ] :
337
+ def add_batch (self , resources : List [Dict [str , Any ]]) -> BiocFrame :
324
338
"""Add multiple resources in a single transaction.
325
339
326
340
Args:
@@ -344,7 +358,7 @@ def update(
344
358
rname : str ,
345
359
fpath : Union [str , Path ],
346
360
action : Literal ["copy" , "move" , "asis" ] = "copy" ,
347
- ) -> Resource :
361
+ ) -> dict :
348
362
"""Update an existing resource.
349
363
350
364
Args:
@@ -359,7 +373,7 @@ def update(
359
373
Defaults to ``copy``.
360
374
361
375
Returns:
362
- Updated `Resource` object.
376
+ Updated `Resource` object as dictionary .
363
377
364
378
"""
365
379
fpath = Path (fpath )
@@ -416,7 +430,7 @@ def remove(self, rname: str) -> None:
416
430
session .rollback ()
417
431
raise Exception (f"Failed to remove resource '{ rname } '" ) from e
418
432
419
- def list_resources (self , rtype : Optional [str ] = None , expired : Optional [bool ] = None ) -> List [ Resource ] :
433
+ def list_resources (self , rtype : Optional [str ] = None , expired : Optional [bool ] = None ) -> BiocFrame :
420
434
"""List resources in the cache with optional filtering.
421
435
422
436
Args:
@@ -432,7 +446,7 @@ def list_resources(self, rtype: Optional[str] = None, expired: Optional[bool] =
432
446
Note: Resources with no expiration are always considered non-expired.
433
447
434
448
Returns:
435
- List of Resource objects matching the filters
449
+ List of Resource objects matching the filters.
436
450
"""
437
451
with self .get_session () as session :
438
452
query = session .query (Resource )
@@ -452,7 +466,7 @@ def list_resources(self, rtype: Optional[str] = None, expired: Optional[bool] =
452
466
)
453
467
454
468
resources = query .all ()
455
- return [self ._get_detached_resource (session , r ) for r in resources ]
469
+ return BiocFrame ( convert_to_columnar ( [self ._get_detached_resource (session , r ) for r in resources ]))
456
470
457
471
def validate_resource (self , resource : Resource ) -> bool :
458
472
"""Validate resource integrity.
@@ -521,7 +535,7 @@ def verify_cache(self) -> Tuple[int, int]:
521
535
invalid += 1
522
536
return valid , invalid
523
537
524
- def search (self , query : str , field : str = "rname" , exact : bool = False ) -> List [ Resource ] :
538
+ def search (self , query : str , field : str = "rname" , exact : bool = False ) -> BiocFrame :
525
539
"""Search for resources by field value.
526
540
527
541
Args:
@@ -543,7 +557,7 @@ def search(self, query: str, field: str = "rname", exact: bool = False) -> List[
543
557
else :
544
558
resources = session .query (Resource ).filter (Resource [field ].ilike (f"%{ query } %" )).all ()
545
559
546
- return [self ._get_detached_resource (session , r ) for r in resources ]
560
+ return BiocFrame ( convert_to_columnar ( [self ._get_detached_resource (session , r ) for r in resources ]))
547
561
548
562
def get_stats (self ) -> Dict [str , Any ]:
549
563
"""Get statistics about the cache."""
@@ -669,7 +683,7 @@ def add_metadata(self, key: str, value: str):
669
683
except Exception as e :
670
684
session .delete (meta )
671
685
session .commit ()
672
- raise Exception ("Failed to add metadata" ) from e
686
+ raise Exception ("Failed to add metadata" , str ( e ) ) from e
673
687
else :
674
688
raise Exception (f"'key'={ key } already exists in metadata." )
675
689
0 commit comments