Skip to content

Commit 6eb3d63

Browse files
authored
Fix issues for compatibility with the R package (#32)
1 parent b31cd3c commit 6eb3d63

File tree

7 files changed

+83
-32
lines changed

7 files changed

+83
-32
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## Version 0.7.0
4+
5+
- Fixing issues with using R and Python interfaces to the same cache directory.
6+
- List resources now returns a `BiocFrame` object.
7+
38
## Version 0.6.1 - 0.6.2
49

510
- Generate rid's that match with R's cache.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ resource = cache.add("myfile", "path/to/file.txt")
2929
resource = cache.get("myfile")
3030

3131
# Use the cached file
32-
print(resource.rpath) # Path to cached file
32+
print(resource["rpath"]) # Path to cached file
3333
```
3434

3535
## Advanced Usage

setup.cfg

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ author_email = jayaram.kancherla@gmail.com
1010
license = MIT
1111
long_description = file: README.md
1212
long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
13-
url = https://github.com/epiviz/pyBiocFileCache
13+
url = https://github.com/biocpy/pyBiocFileCache
1414
# Add here related links, for example:
1515
project_urls =
16-
Documentation = https://pyscaffold.org/
16+
Documentation = https://github.com/biocpy/pyBiocFileCache
1717
# Source = https://github.com/pyscaffold/pyscaffold/
1818
# Changelog = https://pyscaffold.org/en/latest/changelog.html
1919
# Tracker = https://github.com/pyscaffold/pyscaffold/issues
@@ -48,6 +48,7 @@ python_requires = >=3.9
4848
install_requires =
4949
importlib-metadata; python_version<"3.8"
5050
sqlalchemy
51+
biocframe
5152

5253
[options.packages.find]
5354
where = src

src/pybiocfilecache/cache.py

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from time import sleep, time
66
from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union
77

8+
from biocframe import BiocFrame
89
from sqlalchemy import create_engine, func, text
910
from sqlalchemy.orm import Session, sessionmaker
1011
from sqlalchemy.pool import QueuePool
@@ -14,6 +15,7 @@
1415
from .models import Base, Metadata, Resource
1516
from .utils import (
1617
calculate_file_hash,
18+
convert_to_columnar,
1719
copy_or_move,
1820
create_tmp_dir,
1921
download_web_file,
@@ -104,15 +106,19 @@ def _setup_database(self) -> None:
104106

105107
return SCHEMA_VERSION
106108

107-
def _get_detached_resource(
108-
self, session: Session, obj: Union[Resource, Metadata]
109-
) -> Optional[Union[Resource, Metadata]]:
109+
def _get_detached_resource(self, session: Session, obj: Union[Resource, Metadata]) -> Optional[dict]:
110110
"""Get a detached copy of a resource."""
111111
if obj is None:
112112
return None
113113
session.refresh(obj)
114114
session.expunge(obj)
115-
return obj
115+
obj_dict = obj.to_dict()
116+
117+
if isinstance(obj, Resource):
118+
if obj_dict["rtype"] == "relative":
119+
obj_dict["rpath"] = f"{self.config.cache_dir}/{obj_dict['rpath']}"
120+
121+
return obj_dict
116122

117123
def __enter__(self) -> "BiocFileCache":
118124
return self
@@ -137,6 +143,10 @@ def get_session(self) -> Iterator[Session]:
137143
finally:
138144
session.close()
139145

146+
#########################
147+
######>> cleanup <<######
148+
#########################
149+
140150
# def _validate_rname(self, rname: str) -> None:
141151
# """Validate resource name format."""
142152
# if not validate_rname(rname, self.config.rname_pattern):
@@ -191,7 +201,11 @@ def cleanup(self) -> int:
191201
self._last_cleanup = datetime.now()
192202
return removed
193203

194-
def get(self, rname: str = None, rid: str = None) -> Optional[Resource]:
204+
###############################
205+
######>> get resources <<######
206+
###############################
207+
208+
def get(self, rname: str = None, rid: str = None) -> Optional[dict]:
195209
"""Get resource by name from cache.
196210
197211
Args:
@@ -215,7 +229,7 @@ def get(self, rname: str = None, rid: str = None) -> Optional[Resource]:
215229
# Check if path exists with timeout
216230
start = time()
217231
timeout = 30
218-
while not Path(str(resource.rpath)).exists():
232+
while not Path(str(self.config.cache_dir / resource.rpath)).exists():
219233
if time() - start >= timeout:
220234
raise TimeoutError(f"For resource: '{rname}' the rpath does not exist after {timeout} seconds.")
221235
sleep(0.1)
@@ -236,7 +250,7 @@ def add(
236250
expires: Optional[datetime] = None,
237251
download: bool = True,
238252
ext: bool = True,
239-
) -> Resource:
253+
) -> dict:
240254
"""Add a resource to the cache.
241255
242256
Args:
@@ -268,7 +282,7 @@ def add(
268282
Defaults to `True`.
269283
270284
Returns:
271-
The `Resource` object added to the cache.
285+
The `Resource` object added to the cache as dictionary.
272286
"""
273287
# self._validate_rname(rname)
274288
fpath = Path(fpath) if rtype != "web" else fpath
@@ -289,7 +303,7 @@ def add(
289303
# Generate paths and check size
290304
rid = generate_id(size=len(self))
291305
uuid = generate_uuid()
292-
rpath = self.config.cache_dir / f"{uuid}_{outpath.name if ext else outpath.stem}" if action != "asis" else fpath
306+
rpath = f"{uuid}_{outpath.name if ext else outpath.stem}" if action != "asis" else fpath
293307

294308
# Create resource record
295309
resource = Resource(
@@ -307,10 +321,10 @@ def add(
307321
session.commit()
308322

309323
try:
310-
copy_or_move(outpath, rpath, rname, action, False)
324+
copy_or_move(outpath, self.config.cache_dir / rpath, rname, action, False)
311325

312326
# Calculate and store checksum
313-
resource.etag = calculate_file_hash(rpath, self.config.hash_algorithm)
327+
resource.etag = calculate_file_hash(self.config.cache_dir / rpath, self.config.hash_algorithm)
314328
session.commit()
315329
result = self._get_detached_resource(session, resource)
316330
return result
@@ -320,7 +334,7 @@ def add(
320334
session.commit()
321335
raise Exception("Failed to add resource") from e
322336

323-
def add_batch(self, resources: List[Dict[str, Any]]) -> List[Resource]:
337+
def add_batch(self, resources: List[Dict[str, Any]]) -> BiocFrame:
324338
"""Add multiple resources in a single transaction.
325339
326340
Args:
@@ -344,7 +358,7 @@ def update(
344358
rname: str,
345359
fpath: Union[str, Path],
346360
action: Literal["copy", "move", "asis"] = "copy",
347-
) -> Resource:
361+
) -> dict:
348362
"""Update an existing resource.
349363
350364
Args:
@@ -359,7 +373,7 @@ def update(
359373
Defaults to ``copy``.
360374
361375
Returns:
362-
Updated `Resource` object.
376+
Updated `Resource` object as dictionary.
363377
364378
"""
365379
fpath = Path(fpath)
@@ -416,7 +430,7 @@ def remove(self, rname: str) -> None:
416430
session.rollback()
417431
raise Exception(f"Failed to remove resource '{rname}'") from e
418432

419-
def list_resources(self, rtype: Optional[str] = None, expired: Optional[bool] = None) -> List[Resource]:
433+
def list_resources(self, rtype: Optional[str] = None, expired: Optional[bool] = None) -> BiocFrame:
420434
"""List resources in the cache with optional filtering.
421435
422436
Args:
@@ -432,7 +446,7 @@ def list_resources(self, rtype: Optional[str] = None, expired: Optional[bool] =
432446
Note: Resources with no expiration are always considered non-expired.
433447
434448
Returns:
435-
List of Resource objects matching the filters
449+
List of Resource objects matching the filters.
436450
"""
437451
with self.get_session() as session:
438452
query = session.query(Resource)
@@ -452,7 +466,7 @@ def list_resources(self, rtype: Optional[str] = None, expired: Optional[bool] =
452466
)
453467

454468
resources = query.all()
455-
return [self._get_detached_resource(session, r) for r in resources]
469+
return BiocFrame(convert_to_columnar([self._get_detached_resource(session, r) for r in resources]))
456470

457471
def validate_resource(self, resource: Resource) -> bool:
458472
"""Validate resource integrity.
@@ -521,7 +535,7 @@ def verify_cache(self) -> Tuple[int, int]:
521535
invalid += 1
522536
return valid, invalid
523537

524-
def search(self, query: str, field: str = "rname", exact: bool = False) -> List[Resource]:
538+
def search(self, query: str, field: str = "rname", exact: bool = False) -> BiocFrame:
525539
"""Search for resources by field value.
526540
527541
Args:
@@ -543,7 +557,7 @@ def search(self, query: str, field: str = "rname", exact: bool = False) -> List[
543557
else:
544558
resources = session.query(Resource).filter(Resource[field].ilike(f"%{query}%")).all()
545559

546-
return [self._get_detached_resource(session, r) for r in resources]
560+
return BiocFrame(convert_to_columnar([self._get_detached_resource(session, r) for r in resources]))
547561

548562
def get_stats(self) -> Dict[str, Any]:
549563
"""Get statistics about the cache."""
@@ -669,7 +683,7 @@ def add_metadata(self, key: str, value: str):
669683
except Exception as e:
670684
session.delete(meta)
671685
session.commit()
672-
raise Exception("Failed to add metadata") from e
686+
raise Exception("Failed to add metadata", str(e)) from e
673687
else:
674688
raise Exception(f"'key'={key} already exists in metadata.")
675689

src/pybiocfilecache/models.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ class Metadata(Base):
1919
def __repr__(self) -> str:
2020
return f"<Metadata(key='{self.key}', value='{self.value}')>"
2121

22+
def to_dict(self) -> dict:
23+
return {"key": self.key, "value": self.value}
24+
2225

2326
class Resource(Base):
2427
"""Resource information stored in cache.
@@ -74,3 +77,18 @@ class Resource(Base):
7477

7578
def __repr__(self) -> str:
7679
return f"<Resource(rid='{self.rid}', rname='{self.rname}', rpath='{self.rpath}')>"
80+
81+
def to_dict(self) -> dict:
82+
return {
83+
"id": self.id,
84+
"rid": self.rid,
85+
"rname": self.rname,
86+
"create_time": self.create_time,
87+
"access_time": self.access_time,
88+
"rpath": self.rpath,
89+
"rtype": self.rtype,
90+
"fpath": self.fpath,
91+
"last_modified_time": self.last_modified_time,
92+
"etag": self.etag,
93+
"expires": self.expires,
94+
}

src/pybiocfilecache/utils.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import zlib
88
from pathlib import Path
99
from shutil import copy2, move
10-
from typing import Literal
10+
from typing import List, Literal
1111

1212
__author__ = "Jayaram Kancherla"
1313
__copyright__ = "Jayaram Kancherla"
@@ -97,3 +97,16 @@ def download_web_file(url: str, filename: str, download: bool):
9797
open(str(outpath), "a").close()
9898

9999
return outpath
100+
101+
102+
def convert_to_columnar(list_of_dicts: List[dict]):
103+
if not list_of_dicts:
104+
return {}
105+
106+
column_names = list_of_dicts[0].keys()
107+
result = {col: [] for col in column_names}
108+
109+
for row in list_of_dicts:
110+
for col in column_names:
111+
result[col].append(row.get(col))
112+
return result

tests/test_cache.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,19 @@ def test_add_get_list_operations():
3434
rec2 = bfc.get("test2")
3535
assert rec2 is not None
3636

37-
frec1 = open(rec1.rpath, "r").read().strip()
37+
frec1 = open(rec1["rpath"], "r").read().strip()
3838
assert frec1 == "test1"
3939

40-
frec2 = open(rec2.rpath, "r").read().strip()
40+
frec2 = open(rec2["rpath"], "r").read().strip()
4141
assert frec2 == "test2"
4242

4343
shutil.copy(os.getcwd() + "/tests/data/test2.txt", os.getcwd() + "/tests/data/test3.txt")
4444
bfc.add("test3_asis", os.getcwd() + "/tests/data/test3.txt", action="asis")
4545
rec3 = bfc.get("test3_asis")
4646
assert rec3 is not None
47-
assert rec3.rpath == os.getcwd() + "/tests/data/test3.txt"
47+
assert rec3["rpath"] == os.getcwd() + "/tests/data/test3.txt"
4848

49-
frec3 = open(rec3.rpath, "r").read().strip()
49+
frec3 = open(rec3["rpath"], "r").read().strip()
5050
assert frec3 == "test2"
5151

5252
rtrip = bfc.list_resources()
@@ -55,8 +55,8 @@ def test_add_get_list_operations():
5555
downurl = "https://bioconductor.org/packages/stats/bioc/BiocFileCache/BiocFileCache_2024_stats.tab"
5656
add_url = bfc.add(rname="download_link", fpath=downurl, rtype="web")
5757

58-
row = bfc.get(rid=add_url.rid)
59-
assert row.fpath == downurl
58+
row = bfc.get(rid=add_url["rid"])
59+
assert row["fpath"] == downurl
6060

6161
rtrip = bfc.list_resources()
6262
assert len(rtrip) == 4
@@ -99,10 +99,10 @@ def test_meta_operations():
9999
add_url = bfc.add(rname="download_link", fpath=downurl, rtype="web")
100100

101101
rec = bfc.get_metadata("schema_version")
102-
assert rec.value == "0.99.4"
102+
assert rec["value"] == "0.99.4"
103103

104104
rec = bfc.get_metadata("language")
105-
assert rec.value == "python"
105+
assert rec["value"] == "python"
106106

107107
rtrip = bfc.list_resources()
108108
assert len(rtrip) == 2

0 commit comments

Comments
 (0)