From 912d0865b14765062640ec53780bbe6be5d9ffa1 Mon Sep 17 00:00:00 2001
From: hackermd <hackermd@protonmail.com>
Date: Tue, 9 Aug 2022 12:45:34 -0400
Subject: [PATCH 1/5] Refactor file access by DICOMfileClient

---
 src/dicomweb_client/__init__.py |    2 +-
 src/dicomweb_client/file.py     | 1974 ++++++++++++++++++-------------
 src/dicomweb_client/web.py      |    6 +-
 tests/test_file.py              |    1 -
 4 files changed, 1131 insertions(+), 852 deletions(-)

diff --git a/src/dicomweb_client/__init__.py b/src/dicomweb_client/__init__.py
index c5ab498..f51cda6 100644
--- a/src/dicomweb_client/__init__.py
+++ b/src/dicomweb_client/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '0.56.3'
+__version__ = '0.56.4'
 
 from dicomweb_client.api import DICOMwebClient, DICOMfileClient
 from dicomweb_client.protocol import DICOMClient
diff --git a/src/dicomweb_client/file.py b/src/dicomweb_client/file.py
index b2e6395..e929a11 100644
--- a/src/dicomweb_client/file.py
+++ b/src/dicomweb_client/file.py
@@ -6,10 +6,7 @@
 import os
 import re
 import sqlite3
-import sys
 import time
-import traceback
-from collections import OrderedDict
 from enum import Enum
 from pathlib import Path
 from typing import (
@@ -27,20 +24,17 @@
 )
 
 import numpy as np
+import requests
 from PIL import Image
 from PIL.ImageCms import ImageCmsProfile, createProfile
 from pydicom import config as pydicom_config
 from pydicom.datadict import dictionary_VR, keyword_for_tag, tag_for_keyword
+from pydicom.dataelem import DataElement
 from pydicom.dataset import Dataset, FileMetaDataset
 from pydicom.encaps import encapsulate, get_frame_offsets
 from pydicom.errors import InvalidDicomError
 from pydicom.filebase import DicomFileLike
-from pydicom.filereader import (
-    data_element_offset_to_value,
-    dcmread,
-    read_file_meta_info,
-    read_partial,
-)
+from pydicom.filereader import data_element_offset_to_value, dcmread
 from pydicom.filewriter import dcmwrite
 from pydicom.pixel_data_handlers.numpy_handler import unpack_bits
 from pydicom.tag import (
@@ -50,7 +44,7 @@
     Tag,
     TupleTag,
 )
-from pydicom.uid import UID
+from pydicom.uid import UID, RLELossless
 from pydicom.valuerep import DA, DT, TM
 
 logger = logging.getLogger(__name__)
@@ -68,6 +62,30 @@
 _END_MARKERS = {_JPEG_EOI_MARKER, _JPEG2000_EOC_MARKER}
 
 
+def _are_frames_encapsulated(transfer_syntax_uid: str) -> bool:
+    """Determine whether frames are compressed.
+
+    Parameters
+    ----------
+    transfer_syntax_uid: str
+        Unique identifier of the transfer syntax
+
+    Returns
+    -------
+    bool
+        Whether frames are compressed
+
+    """
+    if transfer_syntax_uid in {
+        '1.2.840.10008.1.2',
+        '1.2.840.10008.1.2.1',
+        '1.2.840.10008.1.2.2',
+        '1.2.840.10008.1.2.1.99',
+    }:
+        return False
+    return True
+
+
 def _enforce_standard_conformance(fn: Callable) -> Callable:
     """Enforce standard conformance during a function call.
 
@@ -99,7 +117,153 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
-def _get_bot(fp: DicomFileLike, number_of_frames: int) -> List[int]:
+def _read_frame(
+    fp: DicomFileLike,
+    first_frame_offset: int,
+    basic_offset_table: np.ndarray,
+    frame_index: int,
+    transfer_syntax_uid: str
+) -> bytes:
+    frame_offset = basic_offset_table[frame_index]
+    fp.seek(first_frame_offset + frame_offset, 0)
+    try:
+        stop_at = basic_offset_table[frame_index + 1] - frame_offset
+    except IndexError:
+        # For the last frame, there is no next offset available.
+        stop_at = -1
+
+    if _are_frames_encapsulated(transfer_syntax_uid):
+        n = 0
+        # A frame may consist of multiple items (fragments).
+        fragments = []
+        while True:
+            tag = TupleTag(fp.read_tag())
+            if n == stop_at or int(tag) == SequenceDelimiterTag:
+                break
+            if int(tag) != ItemTag:
+                raise ValueError(
+                    f'Failed to read frame #{frame_index + 1}. '
+                    f'Encountered unexpected tag {tag} in Pixel Data element.'
+                )
+            length = fp.read_UL()
+            fragments.append(fp.read(length))
+            n += 4 + 4 + length
+        return b''.join(fragments)
+    else:
+        return fp.read(stop_at)
+
+
+def _decode_frame(
+    frame: bytes,
+    frame_index: int,
+    transfer_syntax_uid: str,
+    rows: int,
+    columns: int,
+    samples_per_pixel: int,
+    bits_allocated: int,
+    bits_stored: int,
+    photometric_interpretation: str,
+    pixel_representation: int,
+    planar_configuration: Optional[int] = None
+) -> np.ndarray:
+    """Decode the pixel data of an individual frame item.
+
+    Parameters
+    ----------
+    index: int
+        Zero-based frame index
+    value: bytes
+        Value of a Frame item
+
+    Returns
+    -------
+    numpy.ndarray
+        Array of decoded pixels of the frame with shape (Rows x Columns)
+        in case of a monochrome image or (Rows x Columns x SamplesPerPixel)
+        in case of a color image.
+
+    """
+    if bits_allocated == 1:
+        # Unfortunately, the type of the return value of the unpack_bits()
+        # can be changed dynamically via the as_array argument. This causes
+        # issues for mypy and requires this workaround.
+        unpacked_frame: np.ndarray = unpack_bits(  # type: ignore
+            frame,
+            as_array=True
+        )
+        num_pixels_per_frame = rows * columns * samples_per_pixel
+        # Determine the nearest whole number of bytes needed to contain
+        # 1-bit pixel data. e.g. 10 x 10 1-bit pixels is 100 bits,
+        # which are packed into 12.5 -> 13 bytes
+        start = int(((frame_index * num_pixels_per_frame / 8) % 1) * 8)
+        end = start + num_pixels_per_frame
+        pixel_array = unpacked_frame[start:end]
+        return pixel_array.reshape(rows, columns)
+    else:
+        # This hack creates a small dataset containing a Pixel Data element
+        # with only a single frame item, which can then be decoded using the
+        # existing pydicom API.
+        ds = Dataset()
+        ds.file_meta = FileMetaDataset()
+        ds.file_meta.TransferSyntaxUID = UID(transfer_syntax_uid)
+        ds.Rows = rows
+        ds.Columns = columns
+        ds.SamplesPerPixel = samples_per_pixel
+        ds.PhotometricInterpretation = photometric_interpretation
+        ds.PixelRepresentation = pixel_representation
+        if planar_configuration is not None:
+            ds.PlanarConfiguration = planar_configuration
+        ds.BitsAllocated = bits_allocated
+        ds.BitsStored = bits_stored
+        ds.HighBit = bits_stored - 1
+        if _are_frames_encapsulated(transfer_syntax_uid):
+            ds.PixelData = encapsulate(frames=[frame])
+        else:
+            ds.PixelData = frame
+        return ds.pixel_array
+
+
+def _get_frame_offsets(
+    fp: DicomFileLike,
+    number_of_frames: int,
+    number_of_pixels_per_frame: int,
+    transfer_syntax_uid: str,
+    bits_allocated: int
+) -> Tuple[int, np.ndarray]:
+    pixel_data_offset = fp.tell()
+    if _are_frames_encapsulated(transfer_syntax_uid):
+        try:
+            basic_offset_table = _get_bot(
+                fp,
+                number_of_frames=number_of_frames,
+                transfer_syntax_uid=transfer_syntax_uid
+            )
+        except Exception as error:
+            raise IOError(f'Failed to build Basic Offset Table: "{error}".')
+        first_frame_offset = fp.tell()
+    else:
+        if fp.is_implicit_VR:
+            header_offset = 4 + 4  # tag and length
+        else:
+            header_offset = 4 + 2 + 2 + 4  # tag, VR, reserved, and length
+        first_frame_offset = pixel_data_offset + header_offset
+        n_pixels = number_of_pixels_per_frame
+        bytes_per_frame_uncompressed = n_pixels * bits_allocated / 8
+        basic_offset_table = np.array(
+            [
+                int(math.floor(i * bytes_per_frame_uncompressed))
+                for i in range(number_of_frames)
+            ],
+            dtype=np.uint32
+        )
+    return (first_frame_offset, basic_offset_table)
+
+
+def _get_bot(
+    fp: DicomFileLike,
+    number_of_frames: int,
+    transfer_syntax_uid: str
+) -> np.ndarray:
     """Read or build the Basic Offset Table (BOT).
 
     Parameters
@@ -109,10 +273,12 @@ def _get_bot(fp: DicomFileLike, number_of_frames: int) -> List[int]:
         the Pixel Data element
     number_of_frames: int
         Number of frames contained in the Pixel Data element
+    transfer_syntax_uid: str
+        Unique identifier of the transfer syntax
 
     Returns
     -------
-    List[int]
+    numpy.ndarray
         Offset of each Frame item in bytes from the first byte of the Pixel
         Data element following the BOT item
 
@@ -138,13 +304,14 @@ def _get_bot(fp: DicomFileLike, number_of_frames: int) -> List[int]:
         logger.debug('build Basic Offset Table item')
         basic_offset_table = _build_bot(
             fp,
-            number_of_frames=number_of_frames
+            number_of_frames=number_of_frames,
+            transfer_syntax_uid=transfer_syntax_uid
         )
 
     return basic_offset_table
 
 
-def _read_bot(fp: DicomFileLike) -> List[int]:
+def _read_bot(fp: DicomFileLike) -> np.ndarray:
     """Read the Basic Offset Table (BOT) of an encapsulated Pixel Data element.
 
     Parameters
@@ -155,7 +322,7 @@ def _read_bot(fp: DicomFileLike) -> List[int]:
 
     Returns
     -------
-    List[int]
+    numpy.ndarray
         Offset of each Frame item in bytes from the first byte of the Pixel
         Data element following the BOT item
 
@@ -181,10 +348,14 @@ def _read_bot(fp: DicomFileLike) -> List[int]:
     )
     fp.seek(pixel_data_element_value_offset - 4, 1)
     is_empty, offsets = get_frame_offsets(fp)
-    return offsets
+    return np.array(offsets, dtype=np.uint32)
 
 
-def _build_bot(fp: DicomFileLike, number_of_frames: int) -> List[int]:
+def _build_bot(
+    fp: DicomFileLike,
+    number_of_frames: int,
+    transfer_syntax_uid: str
+) -> np.ndarray:
     """Build a Basic Offset Table (BOT) for an encapsulated Pixel Data element.
 
     Parameters
@@ -194,10 +365,12 @@ def _build_bot(fp: DicomFileLike, number_of_frames: int) -> List[int]:
         the Pixel Data element following the empty Basic Offset Table (BOT)
     number_of_frames: int
         Total number of frames in the dataset
+    transfer_syntax_uid: str
+        Unique identifier of the transfer syntax
 
     Returns
     -------
-    List[int]
+    numpy.ndarray
         Offset of each Frame item in bytes from the first byte of the Pixel
         Data element following the BOT item
 
@@ -223,6 +396,7 @@ def _build_bot(fp: DicomFileLike, number_of_frames: int) -> List[int]:
     while True:
         frame_position = fp.tell()
         tag = TupleTag(fp.read_tag())
+        print(tag)
         if int(tag) == SequenceDelimiterTag:
             break
         if int(tag) != ItemTag:
@@ -249,463 +423,30 @@ def _build_bot(fp: DicomFileLike, number_of_frames: int) -> List[int]:
         if not fp.is_little_endian:
             first_two_bytes = first_two_bytes[::-1]
 
-        # In case of fragmentation, we only want to get the offsets to the
-        # first fragment of a given frame. We can identify those based on the
-        # JPEG and JPEG 2000 markers that should be found at the beginning and
-        # end of the compressed byte stream.
-        if first_two_bytes in _START_MARKERS:
-            current_offset = frame_position - initial_position
+        current_offset = frame_position - initial_position
+        if transfer_syntax_uid == RLELossless:
             offset_values.append(current_offset)
+        else:
+            # In case of fragmentation, we only want to get the offsets to the
+            # first fragment of a given frame. We can identify those based on
+            # the JPEG and JPEG 2000 markers that should be found at the
+            # beginning and end of the compressed byte stream.
+            if first_two_bytes in _START_MARKERS:
+                offset_values.append(current_offset)
 
         i += 1
         fp.seek(length - 2, 1)  # minus the first two bytes
 
     if len(offset_values) != number_of_frames:
         raise ValueError(
-            'Number of frame items does not match specified Number of Frames.'
+            'Number of found frame items does not match specified number '
+            f'of frames: {len(offset_values)} instead of {number_of_frames}.'
         )
     else:
         basic_offset_table = offset_values
 
     fp.seek(initial_position, 0)
-    return basic_offset_table
-
-
-class _ImageFileReader:
-
-    """Class for reading DICOM files that represent Image Information Entities.
-
-    The class provides methods for efficient access to individual Frame items
-    contained in the Pixel Data element of a Data Set stored in a Part10 file
-    on disk without loading the entire element into memory.
-
-    """
-
-    def __init__(self, fp: Union[str, Path, DicomFileLike]):
-        """
-        Parameters
-        ----------
-        fp: Union[str, pathlib.Path, pydicom.filebase.DicomfileLike]
-            DICOM Part10 file containing a dataset of an image SOP Instance
-
-        """
-        self._filepointer: Union[DicomFileLike, None]
-        self._filepath: Union[Path, None]
-        if isinstance(fp, DicomFileLike):
-            is_little_endian, is_implicit_VR = self._check_file_format(fp)
-            try:
-                if fp.is_little_endian != is_little_endian:
-                    raise ValueError(
-                        'Transfer syntax of file object has incorrect value '
-                        'for attribute "is_little_endian".'
-                    )
-            except AttributeError:
-                raise AttributeError(
-                    'Transfer syntax of file object does not have '
-                    'attribute "is_little_endian".'
-                )
-            try:
-                if fp.is_implicit_VR != is_implicit_VR:
-                    raise ValueError(
-                        'Transfer syntax of file object has incorrect value '
-                        'for attribute "is_implicit_VR".'
-                    )
-            except AttributeError:
-                raise AttributeError(
-                    'Transfer syntax of file object does not have '
-                    'attribute "is_implicit_VR".'
-                )
-            self._filepointer = fp
-            self._filepath = None
-        elif isinstance(fp, (str, Path)):
-            self._filepath = Path(fp)
-            self._filepointer = None
-        else:
-            raise TypeError(
-                'Argument "filename" must either an open DICOM file object or '
-                'the path to a DICOM file stored on disk.'
-            )
-
-        # Those attributes will be set by the "open()"
-        self._metadata: Dataset = Dataset()
-        self._is_open = False
-        self._as_float = False
-        self._bytes_per_frame_uncompressed: int = -1
-        self._basic_offset_table: List[int] = []
-        self._first_frame_offset: int = -1
-        self._pixel_data_offset: int = -1
-        self._pixels_per_frame: int = -1
-
-    def _check_file_format(self, fp: DicomFileLike) -> Tuple[bool, bool]:
-        """Check whether file object represents a DICOM Part 10 file.
-
-        Parameters
-        ----------
-        fp: pydicom.filebase.DicomFileLike
-            DICOM file object
-
-        Returns
-        -------
-        is_little_endian: bool
-            Whether the data set is encoded in little endian transfer syntax
-        is_implicit_VR: bool
-            Whether value representations of data elements in the data set
-            are implicit
-
-        Raises
-        ------
-        InvalidDicomError
-            If the file object does not represent a DICOM Part 10 file
-
-        """
-        def is_main_tag(tag: BaseTag, VR: Optional[str], length: int) -> bool:
-            return tag >= 0x00040000
-
-        pos = fp.tell()
-        ds = read_partial(fp, stop_when=is_main_tag)  # type: ignore
-        fp.seek(pos)
-        transfer_syntax_uid = UID(ds.file_meta.TransferSyntaxUID)
-        return (
-            transfer_syntax_uid.is_little_endian,
-            transfer_syntax_uid.is_implicit_VR,
-        )
-
-    def __enter__(self) -> '_ImageFileReader':
-        self.open()
-        return self
-
-    def __exit__(self, except_type, except_value, except_trace) -> None:
-        self._fp.close()
-        if except_value:
-            sys.stdout.write(
-                'Error while accessing file "{}":\n{}'.format(
-                    self._filepath, str(except_value)
-                )
-            )
-            for tb in traceback.format_tb(except_trace):
-                sys.stdout.write(tb)
-            raise
-
-    @property
-    def _fp(self) -> DicomFileLike:
-        if self._filepointer is None:
-            raise IOError('File has not been opened for reading.')
-        return self._filepointer
-
-    def open(self) -> None:
-        """Open file for reading.
-
-        Raises
-        ------
-        FileNotFoundError
-            When file cannot be found
-        OSError
-            When file cannot be opened
-        IOError
-            When DICOM metadata cannot be read from file
-        ValueError
-            When DICOM dataset contained in file does not represent an image
-
-        Note
-        ----
-        Reads the metadata of the DICOM Data Set contained in the file and
-        builds a Basic Offset Table to speed up subsequent frame-level access.
-
-        """
-        # This methods sets several attributes on the object, which cannot
-        # (or should not) be set in the constructor. Other methods assert that
-        # this method has been called first by checking the value of the
-        # "_is_open" attribute.
-        if self._is_open:
-            return
-
-        if self._filepointer is None:
-            # This should not happen is just for mypy to be happy
-            if self._filepath is None:
-                raise ValueError(f'File not found: "{self._filepath}".')
-            logger.debug('read File Meta Information')
-            try:
-                file_meta = read_file_meta_info(self._filepath)
-            except FileNotFoundError:
-                raise ValueError('No file path was set.')
-            except InvalidDicomError:
-                raise InvalidDicomError(
-                    f'File is not a valid DICOM file: "{self._filepath}".'
-                )
-            except Exception:
-                raise IOError(f'Could not read file: "{self._filepath}".')
-
-            transfer_syntax_uid = UID(file_meta.TransferSyntaxUID)
-            if transfer_syntax_uid is None:
-                raise IOError(
-                    'File is not a valid DICOM file: "{self._filepath}".'
-                    'It lacks File Meta Information.'
-                )
-            self._transfer_syntax_uid: UID = transfer_syntax_uid
-            is_little_endian = transfer_syntax_uid.is_little_endian
-            is_implicit_VR = transfer_syntax_uid.is_implicit_VR
-            self._filepointer = DicomFileLike(open(self._filepath, 'rb'))
-            self._filepointer.is_little_endian = is_little_endian
-            self._filepointer.is_implicit_VR = is_implicit_VR
-
-        logger.debug('read metadata elements')
-        try:
-            tmp = dcmread(self._fp, stop_before_pixels=True)
-        except Exception as error:
-            raise IOError(
-                f'DICOM metadata cannot be read from file: "{error}"'
-            )
-
-        # Construct a new Dataset that is fully decoupled from the file,
-        # i.e., that does not contain any File Meta Information
-        del tmp.file_meta
-        self._metadata = Dataset(tmp)
-
-        self._pixels_per_frame = int(np.product([
-            self._metadata.Rows,
-            self._metadata.Columns,
-            self._metadata.SamplesPerPixel
-        ]))
-
-        self._pixel_data_offset = self._fp.tell()
-        # Determine whether dataset contains a Pixel Data element
-        try:
-            tag = TupleTag(self._fp.read_tag())
-        except EOFError:
-            raise ValueError(
-                'Dataset does not represent an image information entity.'
-            )
-        if int(tag) not in _PIXEL_DATA_TAGS:
-            raise ValueError(
-                'Dataset does not represent an image information entity.'
-            )
-        self._as_float = False
-        if int(tag) in _FLOAT_PIXEL_DATA_TAGS:
-            self._as_float = True
-
-        # Reset the file pointer to the beginning of the Pixel Data element
-        self._fp.seek(self._pixel_data_offset, 0)
-
-        logger.debug('build Basic Offset Table')
-        try:
-            number_of_frames = int(self._metadata.NumberOfFrames)
-        except AttributeError:
-            number_of_frames = 1
-        if self._transfer_syntax_uid.is_encapsulated:
-            try:
-                self._basic_offset_table = _get_bot(
-                    self._fp,
-                    number_of_frames=number_of_frames
-                )
-            except Exception as error:
-                raise IOError(
-                    f'Failed to build Basic Offset Table: "{error}"'
-                )
-            self._first_frame_offset = self._fp.tell()
-        else:
-            if self._fp.is_implicit_VR:
-                header_offset = 4 + 4  # tag and length
-            else:
-                header_offset = 4 + 2 + 2 + 4  # tag, VR, reserved, and length
-            self._first_frame_offset = self._pixel_data_offset + header_offset
-            n_pixels = self._pixels_per_frame
-            bits_allocated = self._metadata.BitsAllocated
-            if bits_allocated == 1:
-                # Determine the nearest whole number of bytes needed to contain
-                # 1-bit pixel data. e.g. 10 x 10 1-bit pixels is 100 bits,
-                # which are packed into 12.5 -> 13 bytes
-                self._bytes_per_frame_uncompressed = (
-                    n_pixels // 8 + (n_pixels % 8 > 0)
-                )
-                self._basic_offset_table = [
-                    int(math.floor(i * n_pixels / 8))
-                    for i in range(number_of_frames)
-                ]
-            else:
-                self._bytes_per_frame_uncompressed = (
-                    n_pixels * bits_allocated // 8
-                )
-                self._basic_offset_table = [
-                    i * self._bytes_per_frame_uncompressed
-                    for i in range(number_of_frames)
-                ]
-
-        if len(self._basic_offset_table) != number_of_frames:
-            raise ValueError(
-                'Length of Basic Offset Table does not match '
-                'Number of Frames.'
-            )
-
-        self._is_open = True
-
-    def _assert_is_open(self) -> None:
-        if not self._is_open:
-            raise IOError('DICOM image file has not been opened for reading.')
-
-    @property
-    def transfer_syntax_uid(self) -> UID:
-        """pydicom.uid.UID: Transfer Syntax UID"""
-        self._assert_is_open()
-        return self._transfer_syntax_uid
-
-    @property
-    def metadata(self) -> Dataset:
-        """pydicom.dataset.Dataset: Metadata"""
-        self._assert_is_open()
-        return self._metadata
-
-    def close(self) -> None:
-        """Close file."""
-        if self._fp is not None:
-            self._fp.close()
-        self._is_open = False
-
-    def read_frame(self, index: int) -> bytes:
-        """Read the pixel data of an individual frame item.
-
-        Parameters
-        ----------
-        index: int
-            Zero-based frame index
-
-        Returns
-        -------
-        bytes
-            Pixel data of a given frame item encoded in the transfer syntax.
-
-        Raises
-        ------
-        IOError
-            When frame could not be read
-
-        """
-        self._assert_is_open()
-        if index > self.number_of_frames:
-            raise ValueError(
-                f'Frame index {index} exceeds number of frames in image: '
-                f'{self.number_of_frames}.'
-            )
-
-        logger.debug(f'read frame #{index}')
-
-        frame_offset = self._basic_offset_table[index]
-        self._fp.seek(self._first_frame_offset + frame_offset, 0)
-        if self._transfer_syntax_uid.is_encapsulated:
-            try:
-                stop_at = self._basic_offset_table[index + 1] - frame_offset
-            except IndexError:
-                # For the last frame, there is no next offset available.
-                stop_at = -1
-            n = 0
-            # A frame may consist of multiple items (fragments).
-            fragments = []
-            while True:
-                tag = TupleTag(self._fp.read_tag())
-                if n == stop_at or int(tag) == SequenceDelimiterTag:
-                    break
-                if int(tag) != ItemTag:
-                    raise ValueError(f'Failed to read frame #{index}.')
-                length = self._fp.read_UL()
-                fragments.append(self._fp.read(length))
-                n += 4 + 4 + length
-            frame_data = b''.join(fragments)
-        else:
-            frame_data = self._fp.read(self._bytes_per_frame_uncompressed)
-
-        if len(frame_data) == 0:
-            raise IOError(f'Failed to read frame #{index}.')
-
-        return frame_data
-
-    def decode_frame(self, index: int, value: bytes):
-        """Decode the pixel data of an individual frame item.
-
-        Parameters
-        ----------
-        index: int
-            Zero-based frame index
-        value: bytes
-            Value of a Frame item
-
-        Returns
-        -------
-        numpy.ndarray
-            Array of decoded pixels of the frame with shape (Rows x Columns)
-            in case of a monochrome image or (Rows x Columns x SamplesPerPixel)
-            in case of a color image.
-
-        """
-        self._assert_is_open()
-        logger.debug(f'decode frame #{index}')
-
-        metadata = self.metadata
-        if metadata.BitsAllocated == 1:
-            # Unfortunately, the type of the return value of the unpack_bits()
-            # can be changed dynamically via the as_array argument. This causes
-            # issues for mypy and requires this workaround.
-            unpacked_frame: np.ndarray = unpack_bits(  # type: ignore
-                value,
-                as_array=True
-            )
-            rows, columns = metadata.Rows, self.metadata.Columns
-            n_pixels = self._pixels_per_frame
-            pixel_offset = int(((index * n_pixels / 8) % 1) * 8)
-            pixel_array = unpacked_frame[pixel_offset:pixel_offset + n_pixels]
-            return pixel_array.reshape(rows, columns)
-        else:
-            # This hack creates a small dataset containing a Pixel Data element
-            # with only a single frame item, which can then be decoded using the
-            # existing pydicom API.
-            ds = Dataset()
-            ds.file_meta = FileMetaDataset()
-            ds.file_meta.TransferSyntaxUID = self._transfer_syntax_uid
-            ds.Rows = metadata.Rows
-            ds.Columns = metadata.Columns
-            ds.SamplesPerPixel = metadata.SamplesPerPixel
-            ds.PhotometricInterpretation = metadata.PhotometricInterpretation
-            ds.PixelRepresentation = metadata.PixelRepresentation
-            ds.PlanarConfiguration = metadata.get('PlanarConfiguration', None)
-            ds.BitsAllocated = metadata.BitsAllocated
-            ds.BitsStored = metadata.BitsStored
-            ds.HighBit = metadata.HighBit
-            if self._transfer_syntax_uid.is_encapsulated:
-                ds.PixelData = encapsulate(frames=[value])
-            else:
-                ds.PixelData = value
-            return ds.pixel_array
-
-    def read_and_decode_frame(self, index: int):
-        """Read and decode the pixel data of an individual frame item.
-
-        Parameters
-        ----------
-        index: int
-            Zero-based frame index
-
-        Returns
-        -------
-        numpy.ndarray
-            Array of decoded pixels of the frame with shape (Rows x Columns)
-            in case of a monochrome image or (Rows x Columns x SamplesPerPixel)
-            in case of a color image.
-
-        Raises
-        ------
-        IOError
-            When frame could not be read
-
-        """
-        frame = self.read_frame(index)
-        return self.decode_frame(index, frame)
-
-    @property
-    def number_of_frames(self) -> int:
-        """int: Number of frames"""
-        self._assert_is_open()
-        try:
-            return int(self.metadata.NumberOfFrames)
-        except AttributeError:
-            return 1
+    return np.array(basic_offset_table, dtype=np.uint32)
 
 
 class _QueryResourceType(Enum):
@@ -767,37 +508,12 @@ def _build_acceptable_media_type_lut(
     return acceptable_media_type_lut
 
 
-class DICOMfileClient:
-
-    """Client for managing DICOM Part10 files in a DICOMweb-like manner.
-
-    Facilitates serverless access to data stored locally on a file system as
-    DICOM Part10 files.
-
-    Note
-    ----
-    The class exposes the same :class:`dicomweb_client.api.DICOMClient`
-    interface as the :class:`dicomweb_client.api.DICOMwebClient` class.
-    While method parameters and return values have the same types, but the
-    types of exceptions may differ.
-
-    Note
-    ----
-    The class internally uses an in-memory database, which is persisted on disk
-    to facilitate faster subsequent data access. However, the implementation
-    details of the database and the structure of any database files stored on
-    the file system may change at any time and should not be relied on.
-
-    Note
-    ----
-    This is **not** an implementation of the DICOM File Service and does not
-    depend on the presence of ``DICOMDIR`` files.
-
-    """
+class _DatabaseManager:
 
     def __init__(
         self,
-        base_dir: Union[Path, str],
+        base_dir: Path,
+        db_dir: Path,
         update_db: bool = False,
         recreate_db: bool = False,
         in_memory: bool = False
@@ -806,8 +522,10 @@ def __init__(
 
         Parameters
         ----------
-        base_dir: Union[pathlib.Path, str]
-            Path to base directory containing DICOM files
+        base_dir: pathlib.Path
+            Base path to the directory where DICOM files are stored
+        db_dir: pathlib.Path
+            Path to the directory where database files should be stored
         update_db: bool, optional
             Whether the database should be updated (default: ``False``). If
             ``True``, the client will search `base_dir` recursively for new
@@ -823,14 +541,17 @@ def __init__(
             ``False``).
 
         """
-        self.base_dir = Path(base_dir).resolve()
+        self.base_dir = base_dir
         if in_memory:
             filename = ':memory:'
+            self._db_file_identifier = filename
+            update_db = True
         else:
             filename = '.dicom-file-client.db'
-        self._db_filepath = self.base_dir.joinpath(filename)
-        if not self._db_filepath.exists():
-            update_db = True
+            filepath = db_dir.joinpath(filename)
+            if not filepath.exists():
+                update_db = True
+            self._db_file_identifier = str(filepath)
 
         self._db_connection_handle: Union[sqlite3.Connection, None] = None
         self._db_cursor_handle: Union[sqlite3.Cursor, None] = None
@@ -860,9 +581,6 @@ def __init__(
             elapsed = round(end - start)
             logger.info(f'updated database in {elapsed} seconds')
 
-        self._reader_cache: OrderedDict[Path, _ImageFileReader] = OrderedDict()
-        self._max_reader_cache_size = 50
-
     def __getstate__(self) -> dict:
         """Customize state for serialization via pickle module.
 
@@ -885,19 +603,29 @@ def __getstate__(self) -> dict:
             if self._db_connection_handle is not None:
                 self._db_connection_handle.commit()
                 self._db_connection_handle.close()
-            for image_file_reader in self._reader_cache.values():
-                image_file_reader.close()
         finally:
             contents['_db_connection_handle'] = None
             contents['_db_cursor_handle'] = None
-            contents['_reader_cache'] = OrderedDict()
         return contents
 
     @property
     def _connection(self) -> sqlite3.Connection:
         """sqlite3.Connection: database connection"""
+        def adapt_array(array: np.ndarray) -> sqlite3.Binary:
+            buffer = array.astype(np.uint32).tobytes()
+            return sqlite3.Binary(buffer)
+
+        def convert_array(buffer: bytes) -> np.ndarray:
+            return np.frombuffer(buffer, dtype=np.uint32)
+
+        sqlite3.register_adapter(np.ndarray, adapt_array)
+        sqlite3.register_converter('ARRAY', convert_array)
+
         if self._db_connection_handle is None:
-            self._db_connection_handle = sqlite3.connect(str(self._db_filepath))
+            self._db_connection_handle = sqlite3.connect(
+                str(self._db_file_identifier),
+                detect_types=sqlite3.PARSE_DECLTYPES
+            )
             self._db_connection_handle.row_factory = sqlite3.Row
         return self._db_connection_handle
 
@@ -966,10 +694,17 @@ def _create_db(self):
                     InstanceNumber INTEGER,
                     Rows INTEGER,
                     Columns INTEGER,
-                    BitsAllocated INTEGER,
                     NumberOfFrames INTEGER,
-                    TransferSyntaxUID TEXT NOT NULL,
+                    BitsAllocated INTEGER,
+                    BitsStored INTEGER,
+                    SamplesPerPixel INTEGER,
+                    PhotometricInterpretation TEXT,
+                    PixelRepresentation INTEGER,
+                    PlanarConfiguration INTEGER,
+                    _transfer_syntax_uid TEXT NOT NULL,
                     _file_path TEXT,
+                    _first_frame_offset INTEGER,
+                    _basic_offset_table ARRAY,
                     PRIMARY KEY (
                         StudyInstanceUID,
                         SeriesInstanceUID,
@@ -997,21 +732,68 @@ def _drop_db(self):
             cursor.execute('DROP TABLE IF EXISTS studies')
             cursor.close()
 
+    def _get_file_pointer(
+        self,
+        buf: Union[io.BufferedReader, io.BytesIO],
+        transfer_syntax_uid: str
+    ) -> DicomFileLike:
+        """Get a pointer to a given image file.
+
+        Parameters
+        ----------
+        buf: Union[io.BufferedReader, io.BytesIO]
+            Buffer of a DICOM file containing a data set of an image
+        transfer_syntax_uid: str
+            Unique identifier of the transfer syntax
+
+        Returns
+        -------
+        pydicom.filebase.DicomFileLike
+            Pointer to file-like object
+
+        Note
+        ----
+        Close the file-like object when done reading from it.
+
+        """
+        uid = UID(transfer_syntax_uid)
+        fp = DicomFileLike(buf)
+        fp.is_little_endian = uid.is_little_endian
+        fp.is_implicit_VR = uid.is_implicit_VR
+        return fp
+
+    def _read_selected_metadata(self, fp: DicomFileLike) -> Dataset:
+        """Read selected instance metadata from file.
+
+        Parameters
+        ----------
+        fp: pydicom.filebase.DicomFileLike
+            Pointer to file-like object
+
+        Returns
+        -------
+        pydicom.dataset.Dataset
+            Metadata
+
+        """
+        tags: List[int] = [
+            tag_for_keyword(attr)  # type: ignore
+            for attr in (
+                self._attributes[_QueryResourceType.STUDIES] +
+                self._attributes[_QueryResourceType.SERIES] +
+                self._attributes[_QueryResourceType.INSTANCES]
+            )
+        ]
+        return dcmread(
+            fp,
+            stop_before_pixels=True,
+            specific_tags=tags,
+            force=True
+        )
+
     @_enforce_standard_conformance
     def _update_db(self):
         """Update database."""
-        all_attributes = (
-            self._attributes[_QueryResourceType.STUDIES] +
-            self._attributes[_QueryResourceType.SERIES] +
-            self._attributes[_QueryResourceType.INSTANCES]
-        )
-        tags = [
-            tag_for_keyword(attr)
-            for attr in all_attributes
-        ]
-
-        def is_stop_tag(tag: BaseTag, VR: Optional[str], length: int) -> bool:
-            return tag > max(tags)
 
         indexed_file_paths = set(self._get_indexed_file_paths())
         found_file_paths = set()
@@ -1031,39 +813,66 @@ def is_stop_tag(tag: BaseTag, VR: Optional[str], length: int) -> bool:
                 continue
 
             logger.debug(f'index file {file_path}')
-            with open(file_path, 'rb') as fp:
+            # TODO: considering changing default "buffering" for improved
+            # performance with network-attached storage systems.
+            with open(file_path, 'rb') as f:
                 try:
-                    ds = read_partial(
-                        fp,
-                        stop_when=is_stop_tag,
-                        specific_tags=tags
-                    )
+                    ds = self._read_selected_metadata(f)
                 except (InvalidDicomError, AttributeError, ValueError):
-                    logger.debug(f'failed to read file "{file_path}"')
+                    logger.warning(f'failed to read file "{file_path}"')
                     continue
 
-            if not hasattr(ds, 'SOPClassUID'):
-                # This is probably a DICOMDIR file or some other weird thing
-                continue
-
-            try:
-                study_metadata = self._extract_study_metadata(ds)
-                study_instance_uid = ds.StudyInstanceUID
-                studies[study_instance_uid] = tuple(study_metadata)
-
-                series_metadata = self._extract_series_metadata(ds)
-                series_instance_uid = ds.SeriesInstanceUID
-                series[series_instance_uid] = tuple(series_metadata)
+                if not hasattr(ds, 'SOPClassUID'):
+                    # This is probably a DICOMDIR file or some other weird thing
+                    continue
 
-                instance_metadata = self._extract_instance_metadata(
-                    ds,
-                    rel_file_path
-                )
-                sop_instance_uid = ds.SOPInstanceUID
-                instances[sop_instance_uid] = tuple(instance_metadata)
-            except (AttributeError, ValueError) as error:
-                logger.warning(f'failed to parse file "{file_path}": {error}')
-                continue
+                try:
+                    study_instance_uid = ds.StudyInstanceUID
+                    series_instance_uid = ds.SeriesInstanceUID
+                    sop_instance_uid = ds.SOPInstanceUID
+
+                    study_metadata = self._extract_study_metadata(ds)
+                    studies[study_instance_uid] = study_metadata
+
+                    series_metadata = self._extract_series_metadata(ds)
+                    series[series_instance_uid] = series_metadata
+
+                    instance_metadata = self._extract_instance_metadata(ds)
+                    if hasattr(ds, 'Rows') and hasattr(ds, 'Columns'):
+                        transfer_syntax_uid = ds.file_meta.TransferSyntaxUID
+                        first_frame_offset, bot = _get_frame_offsets(
+                            fp=self._get_file_pointer(
+                                f,
+                                transfer_syntax_uid=transfer_syntax_uid
+                            ),
+                            number_of_frames=int(
+                                getattr(ds, 'NumberOfFrames', '1')
+                            ),
+                            number_of_pixels_per_frame=int(
+                                np.product([
+                                    ds.Rows,
+                                    ds.Columns,
+                                    ds.SamplesPerPixel,
+                                ])
+                            ),
+                            transfer_syntax_uid=transfer_syntax_uid,
+                            bits_allocated=ds.BitsAllocated
+                        )
+                    else:
+                        first_frame_offset = -1
+                        bot = np.zeros((0, ), dtype=np.uint32)
+                    instances[sop_instance_uid] = (
+                        *instance_metadata,
+                        str(ds.file_meta.TransferSyntaxUID),
+                        str(rel_file_path),
+                        first_frame_offset,
+                        bot,
+                    )
+                except (AttributeError, ValueError) as error:
+                    logger.warning(
+                        f'failed to parse file "{file_path}": {error}'
+                    )
+                    continue
 
             if not i % n:
                 # Insert every nth iteration to avoid having to read all
@@ -1092,12 +901,21 @@ def _get_data_element_value(
         dataset: Dataset,
         keyword: str
     ) -> Union[str, int, None]:
-        # TODO: consider converting date and time to ISO format
-        value = getattr(dataset, keyword, None)
+        try:
+            value = getattr(dataset, keyword)
+        except (AttributeError, KeyError):
+            if keyword == 'NumberOfFrames':
+                value = '1'
+            else:
+                logger.debug(
+                    f'could not extract value of element "{keyword}" '
+                    'from dataset'
+                )
+                value = None
         if value is None or isinstance(value, int):
             return value
-        else:
-            return str(value)
+        # TODO: consider converting date and time to ISO format
+        return str(value)
 
     def _extract_study_metadata(
         self,
@@ -1138,26 +956,26 @@ def _extract_series_metadata(
     def _extract_instance_metadata(
         self,
         dataset: Dataset,
-        file_path: Union[Path, str]
     ) -> Tuple[
-        str,
-        str,
-        str,
-        str,
-        Optional[int],
-        Optional[int],
-        Optional[int],
-        Optional[int],
-        Optional[int],
-        str,
-        str,
+            str,
+            str,
+            str,
+            str,
+            Optional[int],
+            Optional[int],
+            Optional[int],
+            Optional[int],
+            Optional[int],
+            Optional[int],
+            Optional[int],
+            Optional[str],
+            Optional[int],
+            Optional[int],
     ]:
         metadata = [
             self._get_data_element_value(dataset, attr)
             for attr in self._attributes[_QueryResourceType.INSTANCES]
         ]
-        metadata.append(str(dataset.file_meta.TransferSyntaxUID))
-        metadata.append(str(file_path))
         return tuple(metadata)  # type: ignore
 
     def _insert_into_db(
@@ -1195,8 +1013,15 @@ def _insert_into_db(
                 Optional[int],
                 Optional[int],
                 Optional[int],
+                Optional[int],
+                Optional[int],
+                Optional[str],
+                Optional[int],
+                Optional[int],
                 str,
                 str,
+                int,
+                np.ndarray
             ]
         ]
     ):
@@ -1214,7 +1039,7 @@ def _insert_into_db(
             )
             cursor.executemany(
                 'INSERT OR REPLACE INTO instances '
-                'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
+                'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                 instances
             )
             cursor.close()
@@ -1240,7 +1065,7 @@ def _cleanup_db(self, missing_file_paths: Sequence[Path]):
                 results += cursor.fetchall()
             cursor.close()
 
-        self._delete_instances_from_db(
+        self.delete_instances(
             uids=[
                 (
                     r['StudyInstanceUID'],
@@ -1251,7 +1076,7 @@ def _cleanup_db(self, missing_file_paths: Sequence[Path]):
             ]
         )
 
-    def _delete_instances_from_db(
+    def delete_instances(
         self,
         uids: Sequence[Tuple[str, str, str]]
     ) -> None:
@@ -1296,7 +1121,7 @@ def _get_attributes(self, resource_type: _QueryResourceType) -> List[str]:
         self._cursor.execute(f'SELECT * FROM {table} LIMIT 1')
         attributes = [
             item[0] for item in self._cursor.description
-            if not item[0].startswith('_') and item[0] != 'TransferSyntaxUID'
+            if not item[0].startswith('_')
         ]
         return attributes
 
@@ -1337,7 +1162,7 @@ def _build_query(
                     continue
 
                 try:
-                    keyword = self.lookup_keyword(key)
+                    keyword = keyword_for_tag(key)
                     vr = dictionary_VR(key)
                 except Exception:
                     keyword = key
@@ -1446,12 +1271,12 @@ def _get_modalities_in_study(self, study_instance_uid: str) -> List[str]:
         results = self._cursor.fetchall()
         return [r['Modality'] for r in results]
 
-    def _get_studies(self) -> List[str]:
+    def get_study_identifiers(self) -> List[str]:
         self._cursor.execute('SELECT StudyInstanceUID FROM studies')
         results = self._cursor.fetchall()
         return [r['StudyInstanceUID'] for r in results]
 
-    def _get_series(
+    def get_series_identifiers(
         self,
         study_instance_uid: Optional[str] = None
     ) -> List[Tuple[str, str]]:
@@ -1478,7 +1303,7 @@ def _get_series(
             for r in results
         ]
 
-    def _get_instances(
+    def get_instance_identifiers(
         self,
         study_instance_uid: Optional[str] = None,
         series_instance_uid: Optional[str] = None
@@ -1518,40 +1343,7 @@ def _get_instances(
             for r in results
         ]
 
-    def _get_image_file_reader(self, file_path: Path) -> _ImageFileReader:
-        """Get the reader for a given image file.
-
-        Parameters
-        ----------
-        file_path: pathlib.Path
-            Path to the DICOM file containing a data set of an image
-
-        Returns
-        -------
-        dicomweb_client.file._ImageFileReader
-            Reader object
-
-        Note
-        ----
-        The instance of the class caches reader object to improve performance
-        for repeated frame-level file access.
-
-        """
-        try:
-            image_file_reader = self._reader_cache[file_path]
-            # Move the most recently retrieved entry to the beginning.
-            self._reader_cache.move_to_end(file_path, last=False)
-        except KeyError:
-            image_file_reader = _ImageFileReader(file_path)
-            image_file_reader.open()
-            self._reader_cache[file_path] = image_file_reader
-            if len(self._reader_cache) > self._max_reader_cache_size:
-                # Remove the last entry.
-                tmp_path, tmp_reader = self._reader_cache.popitem(last=False)
-                tmp_reader.close()
-        return image_file_reader
-
-    def _get_instance_file_path(
+    def get_instance_file_path(
         self,
         study_instance_uid: str,
         series_instance_uid: str,
@@ -1606,7 +1398,7 @@ def _count_instances_in_series(self, series_instance_uid: str) -> int:
         result = self._cursor.fetchone()
         return int(result['count'])
 
-    def search_for_studies(
+    def query_studies(
         self,
         fuzzymatching: Optional[bool] = None,
         limit: Optional[int] = None,
@@ -1640,10 +1432,6 @@ def search_for_studies(
             Studies
             (see `Study Result Attributes <http://dicom.nema.org/medical/dicom/current/output/chtml/part18/sect_6.7.html#table_6.7.1-2>`_)
 
-        Note
-        ----
-        No additional `fields` are currently supported.
-
         """  # noqa: E501
         logger.info('search for studies')
         query_filter_string, query_params = self._build_query(
@@ -1688,7 +1476,7 @@ def search_for_studies(
 
         return collection
 
-    def search_for_series(
+    def query_series(
         self,
         study_instance_uid: Optional[str] = None,
         fuzzymatching: Optional[bool] = None,
@@ -1835,7 +1623,44 @@ def search_for_series(
 
         return collection
 
-    def search_for_instances(
+    def get_instance_info(
+        self,
+        study_instance_uid: str,
+        series_instance_uid: str,
+        sop_instance_uid: str
+    ) -> Tuple[Dataset, UID, int, np.ndarray]:
+        self._cursor.execute(
+            '''
+            SELECT * FROM instances
+            WHERE StudyInstanceUID = ?
+            AND SeriesInstanceUID = ?
+            AND SOPInstanceUID = ?
+            ''',
+            (
+                study_instance_uid,
+                series_instance_uid,
+                sop_instance_uid,
+            )
+        )
+        row = self._cursor.fetchone()
+
+        dataset = Dataset()
+        for key in row.keys():
+            if not key.startswith('_'):
+                value = row[key]
+                setattr(dataset, key, value)
+        transfer_syntax_uid = UID(row['_transfer_syntax_uid'])
+        first_frame_offset = row['_first_frame_offset']
+        basic_offset_table = row['_basic_offset_table']
+
+        return (
+            dataset,
+            transfer_syntax_uid,
+            first_frame_offset,
+            basic_offset_table
+        )
+
+    def query_instances(
         self,
         study_instance_uid: Optional[str] = None,
         series_instance_uid: Optional[str] = None,
@@ -1875,10 +1700,6 @@ def search_for_instances(
             Instances
             (see `Instance Result Attributes <http://dicom.nema.org/medical/dicom/current/output/chtml/part18/sect_6.7.html#table_6.7.1-2b>`_)
 
-        Note
-        ----
-        No additional `fields` are currently supported.
-
         """  # noqa: E501
         if search_filters is None:
             search_params = {}
@@ -1915,9 +1736,16 @@ def search_for_instances(
         searchable_keywords.extend(
             self._attributes[_QueryResourceType.SERIES]
         )
-        searchable_keywords.extend(
-            self._attributes[_QueryResourceType.INSTANCES]
-        )
+        # Not all attributes that get stored in the database are searchable
+        searchable_keywords.extend([
+            'SOPInstanceUID',
+            'SOPClassUID',
+            'InstanceNumber',
+            'Rows',
+            'Columns',
+            'BitsAllocated',
+            'NumberOfFrames',
+        ])
         query_filter_string, query_params = self._build_query(
             searchable_keywords=searchable_keywords,
             fuzzymatching=fuzzymatching,
@@ -1956,7 +1784,6 @@ def search_for_instances(
                 'Columns',
                 'BitsAllocated',
                 'NumberOfFrames',
-                'TransferSyntaxUID',
             ]
             if study_instances:
                 includefields += [
@@ -2019,39 +1846,460 @@ def search_for_instances(
         self._cursor.execute(query_string, query_params)
         results = self._cursor.fetchall()
 
-        collection = []
-        for row in results:
-            dataset = Dataset()
-            for key in row.keys():
-                if not key.startswith('_'):
-                    value = row[key]
-                    setattr(dataset, key, value)
+        collection = []
+        for row in results:
+            dataset = Dataset()
+            for key in row.keys():
+                if not key.startswith('_'):
+                    value = row[key]
+                    setattr(dataset, key, value)
+
+            if all_instances:
+                n_series_in_study = self._count_series_in_study(
+                    study_instance_uid=dataset.StudyInstanceUID
+                )
+                dataset.NumberOfStudyRelatedSeries = n_series_in_study
+
+                n_instances_in_study = self._count_instances_in_study(
+                    study_instance_uid=dataset.StudyInstanceUID
+                )
+                dataset.NumberOfStudyRelatedInstances = n_instances_in_study
+
+                modalities_in_study = self._get_modalities_in_study(
+                    study_instance_uid=dataset.StudyInstanceUID
+                )
+                dataset.ModalitiesInStudy = modalities_in_study
+
+            if all_instances or study_instances:
+                n_instances_in_series = self._count_instances_in_series(
+                    series_instance_uid=dataset.SeriesInstanceUID,
+                )
+                dataset.NumberOfSeriesRelatedInstances = n_instances_in_series
+
+            collection.append(dataset.to_json_dict())
+
+        return collection
+
+    def insert_instances(
+        self,
+        datasets: Sequence[Dataset]
+    ) -> Tuple[List[Tuple[Dataset, Path, bytes]], List[Dataset]]:
+        """Insert metadata of one or more DICOM instances.
+
+        Parameters
+        ----------
+        datasets: Sequence[pydicom.dataset.Dataset]
+            Datasets that should be stored
+
+        Returns
+        -------
+        successes: List[Tuple[pydicom.dataset.Dataset, pathlib.Path, bytes]
+            Datasets that can be stored successfully, as well as the path to
+            the file where it should be stored and the file's content
+        failures: List[pydicom.dataset.Dataset]
+            Datasets that cannot be stored
+
+        """
+        # We first encode all data sets and temporarily store them in memory
+        # before inserting the metadata into the database and writing the data
+        # sets to files on disk. This will allow us to "roll back" in case of
+        # an error. We may want to consider implementing this in a more
+        # sophisticated way in case it becomes a performance bottleneck.
+        studies: Dict[
+            str,
+            Tuple[
+                str,
+                Optional[str],
+                Optional[str],
+                Optional[str],
+                Optional[str],
+                Optional[str],
+                Optional[str],
+                Optional[str],
+                Optional[str],
+            ]
+        ] = {}
+        series: Dict[
+            str,
+            Tuple[
+                str,
+                str,
+                str,
+                Optional[str],
+                Optional[int],
+            ]
+        ] = {}
+        instances: Dict[
+            str,
+            Tuple[
+                str,
+                str,
+                str,
+                str,
+                Optional[int],
+                Optional[int],
+                Optional[int],
+                Optional[int],
+                Optional[int],
+                Optional[int],
+                Optional[int],
+                Optional[str],
+                Optional[int],
+                Optional[int],
+                str,
+                str,
+                int,
+                np.ndarray
+            ]
+        ] = {}
+        successes: List[Tuple[Dataset, Path, bytes]] = []
+        failures: List[Dataset] = []
+        for ds in datasets:
+            logger.info(
+                f'store instance "{ds.SOPInstanceUID}" '
+                f'of series "{ds.SeriesInstanceUID}" '
+                f'of study "{ds.StudyInstanceUID}" '
+            )
+
+            try:
+                study_instance_uid = ds.StudyInstanceUID
+                series_instance_uid = ds.SeriesInstanceUID
+                sop_instance_uid = ds.SOPInstanceUID
+                rel_file_path = '/'.join([
+                    'studies',
+                    study_instance_uid,
+                    'series',
+                    series_instance_uid,
+                    'instances',
+                    sop_instance_uid
+                ])
+
+                study_metadata = self._extract_study_metadata(ds)
+                studies[study_instance_uid] = study_metadata
+
+                series_metadata = self._extract_series_metadata(ds)
+                series[series_instance_uid] = series_metadata
+
+                instance_metadata = self._extract_instance_metadata(ds)
+                with io.BytesIO() as b:
+                    transfer_syntax_uid = ds.file_meta.TransferSyntaxUID
+                    fp = self._get_file_pointer(
+                        b,
+                        transfer_syntax_uid=transfer_syntax_uid
+                    )
+                    dcmwrite(b, ds, write_like_original=False)
+                    pixel_data_element: Union[DataElement, None] = None
+                    for pixel_data_tag in _PIXEL_DATA_TAGS:
+                        try:
+                            pixel_data_element = ds[pixel_data_tag]
+                        except KeyError:
+                            continue
+                    if pixel_data_element is not None:
+                        pixel_data_offset = pixel_data_element.file_tell
+                        if pixel_data_offset is None:
+                            continue
+                        fp.seek(pixel_data_offset, 0)
+                        first_frame_offset, bot = _get_frame_offsets(
+                            fp,
+                            number_of_frames=int(
+                                getattr(ds, 'NumberOfFrames', '1')
+                            ),
+                            number_of_pixels_per_frame=int(
+                                np.product([
+                                    ds.Rows,
+                                    ds.Columns,
+                                    ds.SamplesPerPixel])
+                            ),
+                            transfer_syntax_uid=ds.file_meta.TransferSyntaxUID,
+                            bits_allocated=ds.BitsAllocated
+                        )
+                    else:
+                        first_frame_offset = -1
+                        bot = np.zeros((0, ), dtype=np.uint32)
+
+                    fp.seek(0)
+                    file_content = fp.read()
+                instances[sop_instance_uid] = (
+                    *instance_metadata,
+                    str(ds.file_meta.TransferSyntaxUID),
+                    str(rel_file_path),
+                    first_frame_offset,
+                    bot,
+                )
+
+                file_path = self.base_dir.joinpath(rel_file_path)
+                successes.append((ds, file_path, file_content))
+            except Exception as error:
+                logger.error(
+                    f'failed to store instance "{ds.SOPInstanceUID}" '
+                    f'of series "{ds.SeriesInstanceUID}" '
+                    f'of study "{ds.StudyInstanceUID}": {error}'
+                )
+                failures.append(ds)
+
+        self._insert_into_db(
+            studies.values(),
+            series.values(),
+            instances.values()
+        )
+
+        return (successes, failures)
+
+
+def _raise_client_error(url: str, error_message: str) -> None:
+    http_error_message = f'400 Client Error: {error_message} for url: {url}'
+    raise requests.HTTPError(http_error_message)
+
+
+def _raise_server_error(url: str, error_message: str) -> None:
+    http_error_message = f'500 Server Error: {error_message} for url: {url}'
+    raise requests.HTTPError(http_error_message)
+
+
+class DICOMfileClient:
+
+    """Client for managing DICOM Part10 files in a DICOMweb-like manner.
+
+    Facilitates serverless access to data stored locally on a file system as
+    DICOM Part10 files.
+
+    Note
+    ----
+    The class internally uses an in-memory database, which is persisted on disk
+    to facilitate faster subsequent data access. However, the implementation
+    details of the database and the structure of any database files stored on
+    the file system may change at any time and should not be relied on.
+
+    Note
+    ----
+    This is **not** an implementation of the DICOM File Service and does not
+    depend on the presence of ``DICOMDIR`` files.
+
+    """
+
+    def __init__(
+        self,
+        base_dir: Union[Path, str],
+        update_db: bool = False,
+        recreate_db: bool = False,
+        in_memory: bool = False,
+        db_dir: Optional[Union[Path, str]] = None
+    ):
+        """Instantiate client.
+
+        Parameters
+        ----------
+        base_dir: Union[pathlib.Path, str]
+            Path to base directory containing DICOM files
+        update_db: bool, optional
+            Whether the database should be updated (default: ``False``). If
+            ``True``, the client will search `base_dir` recursively for new
+            DICOM Part10 files and create database entries for each file.
+            The client will further delete any database entries for files that
+            no longer exist on the file system.
+        recreate_db: bool, optional
+            Whether the database should be recreated (default: ``False``). If
+            ``True``, the client will search `base_dir` recursively for DICOM
+            Part10 files and create database entries for each file.
+        in_memory: bool, optional
+            Whether the database should only be stored in memory (default:
+            ``False``).
+        db_dir: Union[pathlib.Path, str, None], optional
+            Path to directory where database files should be stored (defaults
+            to `base_dir`)
+
+        """
+        self.base_dir = Path(base_dir).resolve()
+        if db_dir is None:
+            db_dir = self.base_dir
+        else:
+            db_dir = Path(db_dir).resolve()
+        self._db_manager = _DatabaseManager(
+            base_dir=self.base_dir,
+            db_dir=db_dir,
+            update_db=update_db,
+            recreate_db=recreate_db,
+            in_memory=in_memory
+        )
+
+    def _get_image_file_pointer(
+        self,
+        file_path: Path,
+        transfer_syntax_uid: str
+    ) -> DicomFileLike:
+        """Get a pointer to a given image file.
+
+        Parameters
+        ----------
+        file_path: pathlib.Path
+            Path to a DICOM file containing a data set of an image
+        transfer_syntax_uid: str
+            Unique identifier of the transfer syntax
+
+        Returns
+        -------
+        pydicom.filebase.DicomFileLike
+            Pointer to file-like object
+
+        Note
+        ----
+        Close the file-like object when done reading from it.
+
+        """
+        uid = UID(transfer_syntax_uid)
+        fp = DicomFileLike(open(file_path, 'rb'))
+        fp.is_little_endian = uid.is_little_endian
+        fp.is_implicit_VR = uid.is_implicit_VR
+        return fp
+
+    def search_for_studies(
+        self,
+        fuzzymatching: Optional[bool] = None,
+        limit: Optional[int] = None,
+        offset: Optional[int] = None,
+        fields: Optional[Sequence[str]] = None,
+        search_filters: Optional[Dict[str, Any]] = None,
+        get_remaining: bool = False
+    ) -> List[Dict[str, dict]]:
+        """Search for studies.
+
+        Parameters
+        ----------
+        fuzzymatching: Union[bool, None], optional
+            Whether fuzzy semantic matching should be performed
+        limit: Union[int, None], optional
+            Maximum number of results that should be returned
+        offset: Union[int, None], optional
+            Number of results that should be skipped
+        fields: Union[Sequence[str], None], optional
+            Names of fields (attributes) that should be included in results
+        search_filters: Union[dict, None], optional
+            Search filter criteria as key-value pairs, where *key* is a keyword
+            or a tag of the attribute and *value* is the expected value that
+            should match
+        get_remaining: bool, optional
+            Whether remaining results should be included
+
+        Returns
+        -------
+        List[Dict[str, dict]]
+            Studies
+            (see `Study Result Attributes <http://dicom.nema.org/medical/dicom/current/output/chtml/part18/sect_6.7.html#table_6.7.1-2>`_)
+
+        Note
+        ----
+        No additional `fields` are currently supported.
+
+        """  # noqa: E501
+        return self._db_manager.query_studies(
+            fuzzymatching=fuzzymatching,
+            limit=limit,
+            offset=offset,
+            fields=fields,
+            search_filters=search_filters
+        )
+
+    def search_for_series(
+        self,
+        study_instance_uid: Optional[str] = None,
+        fuzzymatching: Optional[bool] = None,
+        limit: Optional[int] = None,
+        offset: Optional[int] = None,
+        fields: Optional[Sequence[str]] = None,
+        search_filters: Optional[Dict[str, Any]] = None,
+        get_remaining: bool = False
+    ) -> List[Dict[str, dict]]:
+        """Search for series.
+
+        Parameters
+        ----------
+        study_instance_uid: Union[str, None], optional
+            Study Instance UID
+        fuzzymatching: Union[bool, None], optional
+            Whether fuzzy semantic matching should be performed
+        limit: Union[int, None], optional
+            Maximum number of results that should be returned
+        offset: Union[int, None], optional
+            Number of results that should be skipped
+        fields: Union[Sequence[str], None], optional
+            Names of fields (attributes) that should be included in results
+        search_filters: Union[dict, None], optional
+            Search filter criteria as key-value pairs, where *key* is a keyword
+            or a tag of the attribute and *value* is the expected value that
+            should match
+        get_remaining: bool, optional
+            Whether remaining results should be included
+
+        Returns
+        -------
+        List[Dict[str, dict]]
+            Series
+            (see `Series Result Attributes <http://dicom.nema.org/medical/dicom/current/output/chtml/part18/sect_6.7.html#table_6.7.1-2a>`_)
 
-            if all_instances:
-                n_series_in_study = self._count_series_in_study(
-                    study_instance_uid=dataset.StudyInstanceUID
-                )
-                dataset.NumberOfStudyRelatedSeries = n_series_in_study
+        """  # noqa: E501
+        return self._db_manager.query_series(
+            study_instance_uid=study_instance_uid,
+            fuzzymatching=fuzzymatching,
+            limit=limit,
+            offset=offset,
+            fields=fields,
+            search_filters=search_filters
+        )
 
-                n_instances_in_study = self._count_instances_in_study(
-                    study_instance_uid=dataset.StudyInstanceUID
-                )
-                dataset.NumberOfStudyRelatedInstances = n_instances_in_study
+    def search_for_instances(
+        self,
+        study_instance_uid: Optional[str] = None,
+        series_instance_uid: Optional[str] = None,
+        fuzzymatching: Optional[bool] = None,
+        limit: Optional[int] = None,
+        offset: Optional[int] = None,
+        fields: Optional[Sequence[str]] = None,
+        search_filters: Optional[Dict[str, Any]] = None,
+        get_remaining: bool = False
+    ) -> List[Dict[str, dict]]:
+        """Search for instances.
 
-                modalities_in_study = self._get_modalities_in_study(
-                    study_instance_uid=dataset.StudyInstanceUID
-                )
-                dataset.ModalitiesInStudy = modalities_in_study
+        Parameters
+        ----------
+        study_instance_uid: Union[str, None], optional
+            Study Instance UID
+        series_instance_uid: Union[str, None], optional
+            Series Instance UID
+        fuzzymatching: Union[bool, None], optional
+            Whether fuzzy semantic matching should be performed
+        limit: Union[int, None], optional
+            Maximum number of results that should be returned
+        offset: Union[int, None], optional
+            Number of results that should be skipped
+        fields: Union[Sequence[str], None], optional
+            Names of fields (attributes) that should be included in results
+        search_filters: Union[dict, None], optional
+            Search filter criteria as key-value pairs, where *key* is a keyword
+            or a tag of the attribute and *value* is the expected value that
+            should match
+        get_remaining: bool, optional
+            Whether remaining results should be included
 
-            if all_instances or study_instances:
-                n_instances_in_series = self._count_instances_in_series(
-                    series_instance_uid=dataset.SeriesInstanceUID,
-                )
-                dataset.NumberOfSeriesRelatedInstances = n_instances_in_series
+        Returns
+        -------
+        List[Dict[str, dict]]
+            Instances
+            (see `Instance Result Attributes <http://dicom.nema.org/medical/dicom/current/output/chtml/part18/sect_6.7.html#table_6.7.1-2b>`_)
 
-            collection.append(dataset.to_json_dict())
+        Note
+        ----
+        No additional `fields` are currently supported.
 
-        return collection
+        """  # noqa: E501
+        return self._db_manager.query_instances(
+            study_instance_uid=study_instance_uid,
+            series_instance_uid=series_instance_uid,
+            fuzzymatching=fuzzymatching,
+            limit=limit,
+            offset=offset,
+            fields=fields,
+            search_filters=search_filters
+        )
 
     def retrieve_bulkdata(
         self,
@@ -2149,9 +2397,11 @@ def retrieve_study_metadata(
             'retrieve metadata of all instances '
             f'of study "{study_instance_uid}"'
         )
-        series_index = self._get_series(study_instance_uid)
+        series_identifiers = self._db_manager.get_series_identifiers(
+            study_instance_uid=study_instance_uid
+        )
         collection = []
-        for series_instance_uid, study_instance_uid in series_index:
+        for series_instance_uid, study_instance_uid in series_identifiers:
             collection.extend(
                 self.retrieve_series_metadata(
                     study_instance_uid=study_instance_uid,
@@ -2184,13 +2434,19 @@ def iter_study(
         logger.info(
             f'iterate over all instances of study "{study_instance_uid}"'
         )
-        series_index = self._get_series(study_instance_uid)
-        for study_instance_uid, series_instance_uid in series_index:
-            uids = self._get_instances(
+        series_identifiers = self._db_manager.get_series_identifiers(
+            study_instance_uid=study_instance_uid
+        )
+        for study_instance_uid, series_instance_uid in series_identifiers:
+            instance_identifiers = self._db_manager.get_instance_identifiers(
                 study_instance_uid=study_instance_uid,
                 series_instance_uid=series_instance_uid,
             )
-            for study_instance_uid, series_instance_uid, sop_instance_uid in uids:  # noqa
+            for (
+                study_instance_uid,
+                series_instance_uid,
+                sop_instance_uid,
+            ) in instance_identifiers:
                 yield self.retrieve_instance(
                     study_instance_uid=study_instance_uid,
                     series_instance_uid=series_instance_uid,
@@ -2254,12 +2510,15 @@ def iter_series(
             f'iterate over all instances of series "{series_instance_uid}" '
             f'of study "{study_instance_uid}"'
         )
-        instance_index = self._get_instances(
+        instance_identifiers = self._db_manager.get_instance_identifiers(
             study_instance_uid=study_instance_uid,
             series_instance_uid=series_instance_uid,
         )
-        for i in instance_index:
-            study_instance_uid, series_instance_uid, sop_instance_uid = i
+        for (
+            study_instance_uid,
+            series_instance_uid,
+            sop_instance_uid,
+        ) in instance_identifiers:
             yield self.retrieve_instance(
                 study_instance_uid=study_instance_uid,
                 series_instance_uid=series_instance_uid,
@@ -2358,20 +2617,22 @@ def retrieve_series_metadata(
             f'series "{series_instance_uid}" of study "{study_instance_uid}"'
         )
 
-        collection = []
-        instance_index = self._get_instances(
+        instance_identifiers = self._db_manager.get_instance_identifiers(
             study_instance_uid=study_instance_uid,
             series_instance_uid=series_instance_uid,
         )
-        for i in instance_index:
-            study_instance_uid, series_instance_uid, sop_instance_uid = i
-            metadata = self.retrieve_instance_metadata(
+        return [
+            self.retrieve_instance_metadata(
                 study_instance_uid=study_instance_uid,
                 series_instance_uid=series_instance_uid,
                 sop_instance_uid=sop_instance_uid,
             )
-            collection.append(metadata)
-        return collection
+            for (
+                study_instance_uid,
+                series_instance_uid,
+                sop_instance_uid,
+            ) in instance_identifiers
+        ]
 
     def retrieve_instance_metadata(
         self,
@@ -2400,7 +2661,7 @@ def retrieve_instance_metadata(
             f'retrieve metadata of instance "{sop_instance_uid}" of '
             f'series "{series_instance_uid}" of study "{study_instance_uid}"'
         )
-        file_path = self._get_instance_file_path(
+        file_path = self._db_manager.get_instance_file_path(
             study_instance_uid,
             series_instance_uid,
             sop_instance_uid,
@@ -2491,7 +2752,7 @@ def retrieve_instance(
             supported_media_type_lut
         )
 
-        file_path = self._get_instance_file_path(
+        file_path = self._db_manager.get_instance_file_path(
             study_instance_uid,
             series_instance_uid,
             sop_instance_uid,
@@ -2571,39 +2832,63 @@ def retrieve_instance_rendered(
         Only rendering of single-frame image instances is currently supported.
 
         """  # noqa: E501
-        file_path = self._get_instance_file_path(
+        file_path = self._db_manager.get_instance_file_path(
             study_instance_uid,
             series_instance_uid,
             sop_instance_uid,
         )
-        image_file_reader = self._get_image_file_reader(file_path)
-        metadata = image_file_reader.metadata
+        metadata, transfer_syntax_uid, \
+            first_frame_offset, bot = self._db_manager.get_instance_info(
+                study_instance_uid=study_instance_uid,
+                series_instance_uid=series_instance_uid,
+                sop_instance_uid=sop_instance_uid
+            )
+        image_file_pointer = self._get_image_file_pointer(
+            file_path,
+            transfer_syntax_uid=transfer_syntax_uid
+        )
         if int(getattr(metadata, 'NumberOfFrames', '1')) > 1:
             raise ValueError(
                 'Rendering of multi-frame image instance is not supported.'
             )
         frame_index = 0
-        frame = image_file_reader.read_frame(frame_index)
-        transfer_syntax_uid = image_file_reader.transfer_syntax_uid
+        frame = _read_frame(
+            image_file_pointer,
+            first_frame_offset=first_frame_offset,
+            basic_offset_table=bot,
+            frame_index=frame_index,
+            transfer_syntax_uid=transfer_syntax_uid
+        )
 
+        # TODO: ICC Profile
         codec_name, codec_kwargs = self._get_image_codec_parameters(
-            metadata=metadata,
             transfer_syntax_uid=transfer_syntax_uid,
             media_types=media_types,
             params=params
         )
 
         if codec_name is None:
-            pixels = frame
-        else:
-            array = image_file_reader.decode_frame(frame_index, frame)
-            image = Image.fromarray(array)
-            with io.BytesIO() as fp:
-                image.save(fp, codec_name, **codec_kwargs)  # type: ignore
-                fp.seek(0)
-                pixels = fp.read()
+            return frame
 
-        return pixels
+        array = _decode_frame(
+            frame=frame,
+            frame_index=frame_index,
+            transfer_syntax_uid=transfer_syntax_uid,
+            rows=metadata.Rows,
+            columns=metadata.Columns,
+            samples_per_pixel=metadata.SamplesPerPixel,
+            bits_allocated=metadata.BitsAllocated,
+            bits_stored=metadata.BitsStored,
+            photometric_interpretation=metadata.PhotometricInterpretation,
+            pixel_representation=metadata.PixelRepresentation,
+            planar_configuration=getattr(metadata, 'PlanarConfiguration', None)
+        )
+        image = Image.fromarray(array)
+        with io.BytesIO() as fp:
+            image.save(fp, codec_name, **codec_kwargs)  # type: ignore
+            fp.seek(0)
+            reencoded_frame = fp.read()
+        return reencoded_frame
 
     def _check_media_types_for_instance_frames(
         self,
@@ -2691,7 +2976,7 @@ def _check_media_types_for_instance_frames(
         # acceptable media types.
         expected_media_type = transfer_syntax_uid_lut[transfer_syntax_uid]
         found_matching_media_type = False
-        if transfer_syntax_uid.is_encapsulated:
+        if _are_frames_encapsulated(transfer_syntax_uid):
             wildcards = {'*/*', '*/', 'image/*', 'image/'}
             if any([w in acceptable_media_type_lut for w in wildcards]):
                 found_matching_media_type = True
@@ -2776,7 +3061,7 @@ def iter_instance_frames(
             f'iterate over frames of instance "{sop_instance_uid}" of '
             f'series "{series_instance_uid}" of study "{study_instance_uid}"'
         )
-        file_path = self._get_instance_file_path(
+        file_path = self._db_manager.get_instance_file_path(
             study_instance_uid,
             series_instance_uid,
             sop_instance_uid,
@@ -2785,34 +3070,66 @@ def iter_instance_frames(
         if len(frame_numbers) == 0:
             raise ValueError('At least one frame number must be provided.')
 
-        image_file_reader = self._get_image_file_reader(file_path)
-        metadata = image_file_reader.metadata
-        transfer_syntax_uid = image_file_reader.transfer_syntax_uid
-
+        metadata, transfer_syntax_uid, \
+            first_frame_offset, bot = self._db_manager.get_instance_info(
+                study_instance_uid=study_instance_uid,
+                series_instance_uid=series_instance_uid,
+                sop_instance_uid=sop_instance_uid
+            )
         reencoding_media_type = self._check_media_types_for_instance_frames(
             transfer_syntax_uid,
             media_types
         )
+        requires_reencoding = False
+        if (
+            reencoding_media_type is not None and
+            reencoding_media_type != 'application/octet-stream'
+           ):
+            requires_reencoding = True
+
+        image_file_pointer = self._get_image_file_pointer(
+            file_path,
+            transfer_syntax_uid=transfer_syntax_uid
+        )
 
         for frame_number in frame_numbers:
-            frame_index = frame_number - 1
-            frame = image_file_reader.read_frame(frame_index)
-
             if frame_number > int(getattr(metadata, 'NumberOfFrames', '1')):
                 raise ValueError(
                     f'Provided frame number {frame_number} exceeds number '
                     'of available frames.'
                 )
-
-            if not transfer_syntax_uid.is_encapsulated:
-                pixels = frame
-            else:
-                if reencoding_media_type is None:
-                    pixels = frame
-                elif reencoding_media_type == 'image/jp2':
+            frame_index = frame_number - 1
+            frame = _read_frame(
+                image_file_pointer,
+                first_frame_offset=first_frame_offset,
+                basic_offset_table=bot,
+                frame_index=frame_index,
+                transfer_syntax_uid=transfer_syntax_uid
+            )
+            if requires_reencoding:
+                array = _decode_frame(
+                    frame=frame,
+                    frame_index=frame_index,
+                    transfer_syntax_uid=transfer_syntax_uid,
+                    rows=metadata.Rows,
+                    columns=metadata.Columns,
+                    samples_per_pixel=metadata.SamplesPerPixel,
+                    bits_allocated=metadata.BitsAllocated,
+                    bits_stored=metadata.BitsStored,
+                    photometric_interpretation=getattr(
+                        metadata,
+                        'PhotometricInterpretation'
+                    ),
+                    pixel_representation=metadata.PixelRepresentation,
+                    planar_configuration=getattr(
+                        metadata,
+                        'PlanarConfiguration',
+                        None
+                    )
+                )
+                if reencoding_media_type == 'image/jp2':
                     image_type = 'jpeg2000'
                     image_kwargs = {'irreversible': False}
-                    array = image_file_reader.decode_frame(frame_index, frame)
                     image = Image.fromarray(array)
                     with io.BytesIO() as fp:
                         image.save(
@@ -2820,14 +3137,15 @@ def iter_instance_frames(
                             image_type,
                             **image_kwargs   # type: ignore
                         )
-                        pixels = fp.getvalue()
+                        reencoded_frame = fp.getvalue()
+                    yield reencoded_frame
                 else:
                     raise ValueError(
-                        'Cannot re-encode frames using media type '
+                        'Cannot retrieve frames using media type '
                         f'"{reencoding_media_type}".'
                     )
-
-            yield pixels
+            else:
+                yield frame
 
     def retrieve_instance_frames(
         self,
@@ -2863,7 +3181,7 @@ def retrieve_instance_frames(
             f'retrieve frames of instance "{sop_instance_uid}" of '
             f'series "{series_instance_uid}" of study "{study_instance_uid}"'
         )
-        file_path = self._get_instance_file_path(
+        file_path = self._db_manager.get_instance_file_path(
             study_instance_uid,
             series_instance_uid,
             sop_instance_uid,
@@ -2872,15 +3190,29 @@ def retrieve_instance_frames(
         if len(frame_numbers) == 0:
             raise ValueError('At least one frame number must be provided.')
 
-        image_file_reader = self._get_image_file_reader(file_path)
-        metadata = image_file_reader.metadata
-        transfer_syntax_uid = image_file_reader.transfer_syntax_uid
-
+        metadata, transfer_syntax_uid, \
+            first_frame_offset, bot = self._db_manager.get_instance_info(
+                study_instance_uid=study_instance_uid,
+                series_instance_uid=series_instance_uid,
+                sop_instance_uid=sop_instance_uid
+            )
         reencoding_media_type = self._check_media_types_for_instance_frames(
             transfer_syntax_uid,
             media_types
         )
+        requires_reencoding = False
+        if (
+            reencoding_media_type is not None and
+            reencoding_media_type != 'application/octet-stream'
+           ):
+            requires_reencoding = True
+
+        image_file_pointer = self._get_image_file_pointer(
+            file_path,
+            transfer_syntax_uid=transfer_syntax_uid
+        )
 
+        # Check all indices first before attempting to read the first frame.
         frame_indices = []
         for frame_number in frame_numbers:
             if frame_number > int(getattr(metadata, 'NumberOfFrames', '1')):
@@ -2891,18 +3223,39 @@ def retrieve_instance_frames(
             frame_index = frame_number - 1
             frame_indices.append(frame_index)
 
-        reencoded_frames = []
+        retrieved_frames = []
         for frame_index in frame_indices:
-            frame = image_file_reader.read_frame(frame_index)
-            if not transfer_syntax_uid.is_encapsulated:
-                reencoded_frame = frame
-            else:
-                if reencoding_media_type is None:
-                    reencoded_frame = frame
-                elif reencoding_media_type == 'image/jp2':
+            frame = _read_frame(
+                image_file_pointer,
+                first_frame_offset=first_frame_offset,
+                basic_offset_table=bot,
+                frame_index=frame_index,
+                transfer_syntax_uid=transfer_syntax_uid
+            )
+            if requires_reencoding:
+                array = _decode_frame(
+                    frame=frame,
+                    frame_index=frame_index,
+                    transfer_syntax_uid=transfer_syntax_uid,
+                    rows=metadata.Rows,
+                    columns=metadata.Columns,
+                    samples_per_pixel=metadata.SamplesPerPixel,
+                    bits_allocated=metadata.BitsAllocated,
+                    bits_stored=metadata.BitsStored,
+                    photometric_interpretation=getattr(
+                        metadata,
+                        'PhotometricInterpretation'
+                    ),
+                    pixel_representation=metadata.PixelRepresentation,
+                    planar_configuration=getattr(
+                        metadata,
+                        'PlanarConfiguration',
+                        None
+                    )
+                )
+                if reencoding_media_type == 'image/jp2':
                     image_type = 'jpeg2000'
                     image_kwargs = {'irreversible': False}
-                    array = image_file_reader.decode_frame(frame_index, frame)
                     image = Image.fromarray(array)
                     with io.BytesIO() as fp:
                         image.save(
@@ -2913,13 +3266,14 @@ def retrieve_instance_frames(
                         reencoded_frame = fp.getvalue()
                 else:
                     raise ValueError(
-                        'Cannot re-encode frames using media type '
+                        'Cannot retrieve frames using media type '
                         f'"{reencoding_media_type}".'
                     )
+                retrieved_frames.append(reencoded_frame)
+            else:
+                retrieved_frames.append(frame)
 
-            reencoded_frames.append(reencoded_frame)
-
-        return reencoded_frames
+        return retrieved_frames
 
     def retrieve_instance_frames_rendered(
         self,
@@ -2965,48 +3319,67 @@ def retrieve_instance_frames_rendered(
                 'Only rendering of a single frame is supported for now.'
             )
         frame_number = frame_numbers[0]
+        frame_index = frame_number - 1
 
-        file_path = self._get_instance_file_path(
+        file_path = self._db_manager.get_instance_file_path(
             study_instance_uid,
             series_instance_uid,
             sop_instance_uid,
         )
-        image_file_reader = self._get_image_file_reader(file_path)
-        frame_index = frame_number - 1
-        frame = image_file_reader.read_frame(frame_index)
-        metadata = image_file_reader.metadata
-        transfer_syntax_uid = image_file_reader.transfer_syntax_uid
-
-        if frame_number > int(getattr(metadata, 'NumberOfFrames', '1')):
-            raise ValueError(
-                'Provided frame number exceeds number of frames.'
+        metadata, transfer_syntax_uid, \
+            first_frame_offset, bot = self._db_manager.get_instance_info(
+                study_instance_uid=study_instance_uid,
+                series_instance_uid=series_instance_uid,
+                sop_instance_uid=sop_instance_uid
             )
+        image_file_pointer = self._get_image_file_pointer(
+            file_path,
+            transfer_syntax_uid=transfer_syntax_uid
+        )
+        frame = _read_frame(
+            image_file_pointer,
+            first_frame_offset=first_frame_offset,
+            basic_offset_table=bot,
+            frame_index=frame_index,
+            transfer_syntax_uid=transfer_syntax_uid
+        )
 
+        # TODO: ICC Profile
         codec_name, codec_kwargs = self._get_image_codec_parameters(
-            metadata=metadata,
             transfer_syntax_uid=transfer_syntax_uid,
             media_types=media_types,
             params=params
         )
 
         if codec_name is None:
-            pixels = frame
-        else:
-            array = image_file_reader.decode_frame(frame_index, frame)
-            image = Image.fromarray(array)
-            with io.BytesIO() as fp:
-                image.save(fp, codec_name, **codec_kwargs)
-                fp.seek(0)
-                pixels = fp.read()
+            return frame
 
-        return pixels
+        array = _decode_frame(
+            frame=frame,
+            frame_index=frame_index,
+            transfer_syntax_uid=transfer_syntax_uid,
+            rows=metadata.Rows,
+            columns=metadata.Columns,
+            samples_per_pixel=metadata.SamplesPerPixel,
+            bits_allocated=metadata.BitsAllocated,
+            bits_stored=metadata.BitsStored,
+            photometric_interpretation=metadata.PhotometricInterpretation,
+            pixel_representation=metadata.PixelRepresentation,
+            planar_configuration=getattr(metadata, 'PlanarConfiguration', None)
+        )
+        image = Image.fromarray(array)
+        with io.BytesIO() as fp:
+            image.save(fp, codec_name, **codec_kwargs)  # type: ignore
+            fp.seek(0)
+            reencoded_frame = fp.read()
+        return reencoded_frame
 
     def _get_image_codec_parameters(
         self,
-        metadata: Dataset,
         transfer_syntax_uid: str,
         media_types: Optional[Tuple[Union[str, Tuple[str, str]], ...]] = None,
         params: Optional[Dict[str, str]] = None,
+        icc_profile: Optional[bytes] = None
     ) -> Tuple[Optional[str], Dict[str, Any]]:
         if media_types is not None:
             acceptable_media_types = list(set([
@@ -3065,7 +3438,8 @@ def _get_image_codec_parameters(
         if params is not None and image_type is not None:
             include_icc_profile = params.get('icc_profile', 'no')
             if include_icc_profile == 'yes':
-                icc_profile = metadata.OpticalPathSequence[0].ICCProfile
+                if icc_profile is None:
+                    icc_profile = createProfile('sRGB')
                 image_kwargs[image_type]['icc_profile'] = ImageCmsProfile(
                     icc_profile
                 )
@@ -3154,107 +3528,7 @@ def store_instances(
             message += f' of study "{study_instance_uid}"'
         logger.info(message)
 
-        # We first encode all data sets and temporarily store them in memory
-        # before inserting the metadata into the database and writing the data
-        # sets to files on disk. This will allow us to "roll back" in case of
-        # an error. We may want to consider implementing this in a more
-        # sophisticated way in case it becomes a performance bottleneck.
-        studies: Dict[
-            str,
-            Tuple[
-                str,
-                Optional[str],
-                Optional[str],
-                Optional[str],
-                Optional[str],
-                Optional[str],
-                Optional[str],
-                Optional[str],
-                Optional[str],
-            ]
-        ] = {}
-        series: Dict[
-            str,
-            Tuple[
-                str,
-                str,
-                str,
-                Optional[str],
-                Optional[int],
-            ]
-        ] = {}
-        instances: Dict[
-            str,
-            Tuple[
-                str,
-                str,
-                str,
-                str,
-                Optional[int],
-                Optional[int],
-                Optional[int],
-                Optional[int],
-                Optional[int],
-                str,
-                str,
-            ]
-        ] = {}
-        successes = []
-        failures = []
-        for ds in datasets:
-            logger.info(
-                f'store instance "{ds.SOPInstanceUID}" '
-                f'of series "{ds.SeriesInstanceUID}" '
-                f'of study "{ds.StudyInstanceUID}" '
-            )
-
-            try:
-                if study_instance_uid is not None:
-                    if ds.StudyInstanceUID != study_instance_uid:
-                        continue
-                else:
-                    study_instance_uid = ds.StudyInstanceUID
-                study_metadata = self._extract_study_metadata(ds)
-                studies[study_instance_uid] = study_metadata
-
-                series_metadata = self._extract_series_metadata(ds)
-                series_instance_uid = ds.SeriesInstanceUID
-                series[series_instance_uid] = series_metadata
-
-                sop_instance_uid = ds.SOPInstanceUID
-                rel_file_path = '/'.join([
-                    'studies',
-                    study_instance_uid,
-                    'series',
-                    series_instance_uid,
-                    'instances',
-                    sop_instance_uid
-                ])
-                instance_metadata = self._extract_instance_metadata(
-                    ds,
-                    rel_file_path
-                )
-                instances[sop_instance_uid] = instance_metadata
-
-                with io.BytesIO() as b:
-                    dcmwrite(b, ds, write_like_original=False)
-                    file_content = b.getvalue()
-
-                file_path = self.base_dir.joinpath(rel_file_path)
-                successes.append((ds, file_path, file_content))
-            except Exception as error:
-                logger.error(
-                    f'failed to store instance "{ds.SOPInstanceUID}" '
-                    f'of series "{ds.SeriesInstanceUID}" '
-                    f'of study "{ds.StudyInstanceUID}": {error}'
-                )
-                failures.append(ds)
-
-        self._insert_into_db(
-            studies.values(),
-            series.values(),
-            instances.values()
-        )
+        successes, failures = self._db_manager.insert_instances(datasets)
 
         response = Dataset()
         response.RetrieveURL = None
@@ -3297,7 +3571,7 @@ def delete_study(self, study_instance_uid: str) -> None:
             raise ValueError(
               'Study Instance UID is required for deletion of a study.'
             )
-        uids = self._get_instances(study_instance_uid)
+        uids = self._db_manager.get_instance_identifiers(study_instance_uid)
         for study_instance_uid, series_instance_uid, sop_instance_uid in uids:
             self.delete_instance(
                 study_instance_uid=study_instance_uid,
@@ -3328,11 +3602,15 @@ def delete_series(
             raise ValueError(
                 'Series Instance UID is required for deletion of a series.'
             )
-        uids = self._get_instances(
+        instance_identifiers = self._db_manager.get_instance_identifiers(
             study_instance_uid=study_instance_uid,
             series_instance_uid=series_instance_uid,
         )
-        for study_instance_uid, series_instance_uid, sop_instance_uid in uids:
+        for (
+            study_instance_uid,
+            series_instance_uid,
+            sop_instance_uid,
+        ) in instance_identifiers:
             self.delete_instance(
                 study_instance_uid=study_instance_uid,
                 series_instance_uid=series_instance_uid,
@@ -3369,12 +3647,12 @@ def delete_instance(
             raise ValueError(
                 'SOP Instance UID is required for deletion of an instance.'
             )
-        file_path = self._get_instance_file_path(
+        file_path = self._db_manager.get_instance_file_path(
             study_instance_uid=study_instance_uid,
             series_instance_uid=series_instance_uid,
             sop_instance_uid=sop_instance_uid,
         )
-        self._delete_instances_from_db(
+        self._db_manager.delete_instances(
             uids=[
                 (study_instance_uid, series_instance_uid, sop_instance_uid)
             ]
diff --git a/src/dicomweb_client/web.py b/src/dicomweb_client/web.py
index a7a5733..38dc5ba 100644
--- a/src/dicomweb_client/web.py
+++ b/src/dicomweb_client/web.py
@@ -1670,8 +1670,10 @@ def _invoke_delete_request(url: str) -> requests.models.Response:
         response = _invoke_delete_request(url)
         if response.status_code == HTTPStatus.METHOD_NOT_ALLOWED:
             logger.error(
-              'Resource could not be deleted. The origin server may not support'
-              'deletion or you may not have the necessary permissions.')
+              'Resource could not be deleted. '
+              'The origin server may not support deletion '
+              'or you may not have the necessary permissions.'
+            )
         response.raise_for_status()
         return response
 
diff --git a/tests/test_file.py b/tests/test_file.py
index 24bf2fe..4b2824c 100644
--- a/tests/test_file.py
+++ b/tests/test_file.py
@@ -30,7 +30,6 @@
     'SOPClassUID',
     'SOPInstanceUID',
     'InstanceNumber',
-    'TransferSyntaxUID',
 }
 
 

From c5c2e9b8d94bfaaba9084184bdf5db490e92dcb5 Mon Sep 17 00:00:00 2001
From: hackermd <hackermd@protonmail.com>
Date: Tue, 9 Aug 2022 13:07:28 -0400
Subject: [PATCH 2/5] Fix coding style issues

---
 src/dicomweb_client/ext/gcp/uri.py | 42 ++++++++++++++++--------------
 src/dicomweb_client/web.py         | 10 +++----
 2 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/src/dicomweb_client/ext/gcp/uri.py b/src/dicomweb_client/ext/gcp/uri.py
index 55c9fdd..324e289 100644
--- a/src/dicomweb_client/ext/gcp/uri.py
+++ b/src/dicomweb_client/ext/gcp/uri.py
@@ -27,7 +27,7 @@
 
 @dataclasses.dataclass(eq=True, frozen=True)
 class GoogleCloudHealthcareURL:
-    """Base URL container for DICOM Stores under the `Google Cloud Healthcare API`_.
+    """URL container for DICOM Stores under the `Google Cloud Healthcare API`_.
 
     This class facilitates the parsing and creation of :py:attr:`URI.base_url`
     corresponding to DICOMweb API Service URLs under the v1_ API. The URLs are
@@ -37,24 +37,26 @@ class GoogleCloudHealthcareURL:
     .. _Google Cloud Healthcare API: https://cloud.google.com/healthcare
     .. _v1: https://cloud.google.com/healthcare/docs/how-tos/transition-guide
 
-    Attributes:
-        project_id: str
-            The ID of the `GCP Project
-            <https://cloud.google.com/healthcare/docs/concepts/projects-datasets-data-stores#projects>`_
-            that contains the DICOM Store.
-        location: str
-            The `Region name
-            <https://cloud.google.com/healthcare/docs/concepts/regions>`_ of the
-            geographic location configured for the Dataset that contains the
-            DICOM Store.
-        dataset_id: str
-            The ID of the `Dataset
-            <https://cloud.google.com/healthcare/docs/concepts/projects-datasets-data-stores#datasets_and_data_stores>`_
-            that contains the DICOM Store.
-        dicom_store_id: str
-            The ID of the `DICOM Store
-            <https://cloud.google.com/healthcare/docs/concepts/dicom#dicom_stores>`_.
-    """
+    Attributes
+    ----------
+    project_id: str
+        The ID of the `GCP Project
+        <https://cloud.google.com/healthcare/docs/concepts/projects-datasets-data-stores#projects>`_
+        that contains the DICOM Store.
+    location: str
+        The `Region name
+        <https://cloud.google.com/healthcare/docs/concepts/regions>`_ of the
+        geographic location configured for the Dataset that contains the
+        DICOM Store.
+    dataset_id: str
+        The ID of the `Dataset
+        <https://cloud.google.com/healthcare/docs/concepts/projects-datasets-data-stores#datasets_and_data_stores>`_
+        that contains the DICOM Store.
+    dicom_store_id: str
+        The ID of the `DICOM Store
+        <https://cloud.google.com/healthcare/docs/concepts/dicom#dicom_stores>`_.
+
+    """  # noqa: E501
     project_id: str
     location: str
     dataset_id: str
@@ -84,7 +86,7 @@ def __str__(self) -> str:
 
     @classmethod
     def from_string(cls, base_url: str) -> 'GoogleCloudHealthcareURL':
-        """Creates an instance from ``base_url``.
+        """Create an instance from `base_url`.
 
         Parameters
         ----------
diff --git a/src/dicomweb_client/web.py b/src/dicomweb_client/web.py
index 38dc5ba..6e08a44 100644
--- a/src/dicomweb_client/web.py
+++ b/src/dicomweb_client/web.py
@@ -331,19 +331,19 @@ def _parse_qido_query_parameters(
         """
         params: Dict[str, Union[int, str, List[str]]] = {}
         if limit is not None:
-            if not(isinstance(limit, int)):
+            if not isinstance(limit, int):
                 raise TypeError('Parameter "limit" must be an integer.')
             if limit < 0:
                 raise ValueError('Parameter "limit" must not be negative.')
             params['limit'] = limit
         if offset is not None:
-            if not(isinstance(offset, int)):
+            if not isinstance(offset, int):
                 raise TypeError('Parameter "offset" must be an integer.')
             if offset < 0:
                 raise ValueError('Parameter "offset" must not be negative.')
             params['offset'] = offset
         if fuzzymatching is not None:
-            if not(isinstance(fuzzymatching, bool)):
+            if not isinstance(fuzzymatching, bool):
                 raise TypeError('Parameter "fuzzymatching" must be boolean.')
             if fuzzymatching:
                 params['fuzzymatching'] = 'true'
@@ -352,13 +352,13 @@ def _parse_qido_query_parameters(
         if fields is not None:
             includefields = []
             for field in set(fields):
-                if not(isinstance(field, str)):
+                if not isinstance(field, str):
                     raise TypeError('Elements of "fields" must be a string.')
                 includefields.append(field)
             params['includefield'] = includefields
         if search_filters is not None:
             for field, criterion in search_filters.items():
-                if not(isinstance(field, str)):
+                if not isinstance(field, str):
                     raise TypeError(
                         'Keys of "search_filters" must be strings.'
                     )

From 8a6c8304bbd36f01676c653a22cd322cdcd4156f Mon Sep 17 00:00:00 2001
From: hackermd <hackermd@protonmail.com>
Date: Thu, 11 Aug 2022 12:58:24 -0400
Subject: [PATCH 3/5] Unify attributes of web and file clients

---
 src/dicomweb_client/file.py     | 67 +++++++++++++++++++++++++--------
 src/dicomweb_client/protocol.py | 10 ++++-
 src/dicomweb_client/web.py      | 20 +++++-----
 3 files changed, 71 insertions(+), 26 deletions(-)

diff --git a/src/dicomweb_client/file.py b/src/dicomweb_client/file.py
index e929a11..796ef33 100644
--- a/src/dicomweb_client/file.py
+++ b/src/dicomweb_client/file.py
@@ -22,6 +22,7 @@
     Tuple,
     Union,
 )
+from urllib.parse import urlparse
 
 import numpy as np
 import requests
@@ -396,7 +397,6 @@ def _build_bot(
     while True:
         frame_position = fp.tell()
         tag = TupleTag(fp.read_tag())
-        print(tag)
         if int(tag) == SequenceDelimiterTag:
             break
         if int(tag) != ItemTag:
@@ -512,7 +512,7 @@ class _DatabaseManager:
 
     def __init__(
         self,
-        base_dir: Path,
+        url: str,
         db_dir: Path,
         update_db: bool = False,
         recreate_db: bool = False,
@@ -522,8 +522,8 @@ def __init__(
 
         Parameters
         ----------
-        base_dir: pathlib.Path
-            Base path to the directory where DICOM files are stored
+        url: pathlib.Path
+            Location of the DICOM files. URL with either a ``file://`` scheme.
         db_dir: pathlib.Path
             Path to the directory where database files should be stored
         update_db: bool, optional
@@ -541,7 +541,9 @@ def __init__(
             ``False``).
 
         """
-        self.base_dir = base_dir
+        components = urlparse(url)
+        self.base_dir = Path(components.path)
+
         if in_memory:
             filename = ':memory:'
             self._db_file_identifier = filename
@@ -794,7 +796,6 @@ def _read_selected_metadata(self, fp: DicomFileLike) -> Dataset:
     @_enforce_standard_conformance
     def _update_db(self):
         """Update database."""
-
         indexed_file_paths = set(self._get_indexed_file_paths())
         found_file_paths = set()
 
@@ -807,8 +808,8 @@ def _update_db(self):
                 continue
 
             rel_file_path = file_path.relative_to(self.base_dir)
-            found_file_paths.add(rel_file_path)
-            if file_path in indexed_file_paths:
+            found_file_paths.add(file_path)
+            if rel_file_path in indexed_file_paths:
                 logger.debug(f'skip indexed file {file_path}')
                 continue
 
@@ -818,7 +819,12 @@ def _update_db(self):
             with open(file_path, 'rb') as f:
                 try:
                     ds = self._read_selected_metadata(f)
-                except (InvalidDicomError, AttributeError, ValueError):
+                except (
+                    InvalidDicomError,
+                    AttributeError,
+                    ValueError,
+                    LookupError,
+                ):
                     logger.warning(f'failed to read file "{file_path}"')
                     continue
 
@@ -2075,11 +2081,30 @@ class DICOMfileClient:
     This is **not** an implementation of the DICOM File Service and does not
     depend on the presence of ``DICOMDIR`` files.
 
+    Attributes
+    ----------
+    base_url: str
+        Unique resource locator of the DICOMweb service
+    scheme: str
+        Name of the scheme (``file``)
+    protocol: str
+        Name of the protocol (``None``)
+    url_prefix: str
+        URL path prefix for DICOMweb services (part of `base_url`)
+    qido_url_prefix: Union[str, None]
+        URL path prefix for QIDO-RS (not part of `base_url`)
+    wado_url_prefix: Union[str, None]
+        URL path prefix for WADO-RS (not part of `base_url`)
+    stow_url_prefix: Union[str, None]
+        URL path prefix for STOW-RS (not part of `base_url`)
+    delete_url_prefix: Union[str, None]
+        URL path prefix for DELETE (not part of `base_url`)
+
     """
 
     def __init__(
         self,
-        base_dir: Union[Path, str],
+        url: str,
         update_db: bool = False,
         recreate_db: bool = False,
         in_memory: bool = False,
@@ -2089,8 +2114,9 @@ def __init__(
 
         Parameters
         ----------
-        base_dir: Union[pathlib.Path, str]
-            Path to base directory containing DICOM files
+        url: str
+            Unique resource locator of directory where data is stored
+            (must have ``file`` scheme)
         update_db: bool, optional
             Whether the database should be updated (default: ``False``). If
             ``True``, the client will search `base_dir` recursively for new
@@ -2109,13 +2135,24 @@ def __init__(
             to `base_dir`)
 
         """
-        self.base_dir = Path(base_dir).resolve()
+        self.base_url = url
+        components = urlparse(url)
+        self.scheme = components.scheme
+        if self.scheme != 'file':
+            raise ValueError(f'URL scheme "{self.scheme}" is not supported.')
+        self.protocol = None
+        self.url_prefix = components.path
+        self.qido_url_prefix = None
+        self.wado_url_prefix = None
+        self.stow_url_prefix = None
+        self.delete_url_prefix = None
         if db_dir is None:
-            db_dir = self.base_dir
+            base_dir = Path(components.path)
+            db_dir = base_dir
         else:
             db_dir = Path(db_dir).resolve()
         self._db_manager = _DatabaseManager(
-            base_dir=self.base_dir,
+            url=url,
             db_dir=db_dir,
             update_db=update_db,
             recreate_db=recreate_db,
diff --git a/src/dicomweb_client/protocol.py b/src/dicomweb_client/protocol.py
index bbed139..cec553d 100644
--- a/src/dicomweb_client/protocol.py
+++ b/src/dicomweb_client/protocol.py
@@ -19,9 +19,17 @@
 
 @runtime_checkable
 class DICOMClient(Protocol):
-
     """Protocol for DICOM clients based on DICOMweb interface."""
 
+    base_url: str
+    scheme: str
+    protocol: Optional[str]
+    url_prefix: str
+    qido_url_prefix: Optional[str] = None
+    wado_url_prefix: Optional[str] = None
+    stow_url_prefix: Optional[str] = None
+    delete_url_prefix: Optional[str] = None
+
     def search_for_studies(
         self,
         fuzzymatching: Optional[bool] = None,
diff --git a/src/dicomweb_client/web.py b/src/dicomweb_client/web.py
index 6e08a44..3ca14f9 100644
--- a/src/dicomweb_client/web.py
+++ b/src/dicomweb_client/web.py
@@ -88,6 +88,8 @@ class DICOMwebClient:
     ----------
     base_url: str
         Unique resource locator of the DICOMweb service
+    scheme: str
+        Name of the scheme, e.g. ``"https"``
     protocol: str
         Name of the protocol, e.g. ``"https"``
     host: str
@@ -104,11 +106,6 @@ class DICOMwebClient:
         URL path prefix for STOW-RS (not part of `base_url`)
     delete_url_prefix: Union[str, None]
         URL path prefix for DELETE (not part of `base_url`)
-    chunk_size: int
-        Maximum number of bytes that should be transferred per data chunk
-        when streaming data from the server using chunked transfer encoding
-        (used by ``iter_*()`` methods as well as the ``store_instances()``
-        method)
 
     """
 
@@ -270,8 +267,12 @@ def __init__(
         match = re.match(pattern, self.base_url)
         if match is None:
             raise ValueError(f'Malformed URL: {self.base_url}')
+        url_components = urlparse(url)
+        self.scheme = url_components.scheme
+        self.protocol = self.scheme
+        if not self.scheme.startswith('http'):
+            raise ValueError(f'URL scheme "{self.scheme}" is not supported.')
         try:
-            self.protocol = match.group('scheme')
             self.host = match.group('host')
             port = match.group('port')
         except AttributeError:
@@ -279,15 +280,14 @@ def __init__(
         if port:
             self.port = int(port)
         else:
-            if self.protocol == 'http':
+            if self.scheme == 'http':
                 self.port = 80
-            elif self.protocol == 'https':
+            elif self.scheme == 'https':
                 self.port = 443
             else:
                 raise ValueError(
-                    f'URL scheme "{self.protocol}" is not supported.'
+                    f'URL scheme "{self.scheme}" is not supported.'
                 )
-        url_components = urlparse(url)
         self.url_prefix = url_components.path
         if headers is not None:
             self._session.headers.update(headers)

From 0ca69552febd14d11cc9b3703f346be59fb14c08 Mon Sep 17 00:00:00 2001
From: hackermd <hackermd@protonmail.com>
Date: Thu, 11 Aug 2022 13:02:11 -0400
Subject: [PATCH 4/5] Check scheme and protocol of client instances

---
 tests/conftest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index bc1f27a..019b2b9 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -34,5 +34,5 @@ def client(httpserver):
 @pytest.fixture
 def file_client():
     '''Instance of `dicomweb_client.api.DICOMwebClient`.'''
-    base_dir = Path(DATA_ROOT)
-    return DICOMfileClient(base_dir, recreate_db=True, in_memory=True)
+    url = f'file://{DATA_ROOT}'
+    return DICOMfileClient(url, recreate_db=True, in_memory=True)

From 18ab4d1e5e141e2c2af39506cef33186db30d03d Mon Sep 17 00:00:00 2001
From: hackermd <hackermd@protonmail.com>
Date: Thu, 18 Aug 2022 09:13:13 -0400
Subject: [PATCH 5/5] Reset inventories when inserting metadata into db

---
 src/dicomweb_client/file.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/dicomweb_client/file.py b/src/dicomweb_client/file.py
index 796ef33..43e5e44 100644
--- a/src/dicomweb_client/file.py
+++ b/src/dicomweb_client/file.py
@@ -888,6 +888,9 @@ def _update_db(self):
                     series.values(),
                     instances.values()
                 )
+                studies = {}
+                series = {}
+                instances = {}
 
         self._insert_into_db(
             studies.values(),