From 69e236dfe305e8ee87c509c41adf42d03043730f Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 8 Apr 2025 11:28:19 +0200 Subject: [PATCH 01/15] made initial backend functions for adapter subsetting, need to still make the squeeze function and link it to the front end --- bayesflow/adapters/transforms/subset_array.py | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 bayesflow/adapters/transforms/subset_array.py diff --git a/bayesflow/adapters/transforms/subset_array.py b/bayesflow/adapters/transforms/subset_array.py new file mode 100644 index 000000000..adeddcb5e --- /dev/null +++ b/bayesflow/adapters/transforms/subset_array.py @@ -0,0 +1,62 @@ +import numpy as np +from keras.saving import register_keras_serializable as serializable + +from .elementwise_transform import ElementwiseTransform + + + +@serializable(package="bayesflow.adapters") +class SubsetArray(ElementwiseTransform): + """ + A transform to reduce the dimensionality of arrays output by the summary network + Sometimes, the simulators may return larger arrays than we want to use in our networks, so it would be great to have some subsetting adapter transforms. + + We need: + + Subsetting within an axis (taking only some elements) + while keeping the number of axes the same. This is essentially + the np.take functionality so we might want to call this transform + take. + + In contrast to np.take, I would make axis a mandatory + arguments or default it to the last axis. + Example: adapter.take("x", 1:3, axis = -1) + + Subsetting using a random set of indices (of user-specified size) + within an axis. We might call this subsample. Internally it would + call take after sampling the indices. + Example: adapter.subsample("x", size = 3, axis = -1) + Removing an axis of length one. Following numpy, + I would call this transform squeeze: Example: adapter.squeeze("x", axis = 1) + + """ + + def __init__(self, forward = str, inverse = str ): + + super().__init__() + + + + + + def take(self, data, indices, axis = -1): + # ithink that indices needs to be a list or a slice, if its a list then + # we can have that subsample provides a list of random slices + # warn if no axis is provided + # my question is how does np.take deal with a list of numbers? + # it will gladly take a listof numbers even non consecutive ones , + # it also warns for out of bounds + return np.take(data, indices, axis) + + + def subsample(self, data, sample_size, axis): + + max_sample_size = data.shape[axis] + + sample_indices = np.random.permutation(max_sample_size)[0:sample_size-1] # random sample without replacement + + self.take(data, sample_indices, axis) + + + + From 9c0da4c2be039e389beb439a1bded45468528d07 Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Fri, 11 Apr 2025 10:55:29 +0200 Subject: [PATCH 02/15] added subsample functionality, to do would be adding them to testing procedures --- .gitignore | 3 ++ bayesflow/adapters/adapter.py | 33 +++++++++++++ bayesflow/adapters/transforms/__init__.py | 1 + .../{subset_array.py => subsample_array.py} | 47 +++++++++++++------ 4 files changed, 70 insertions(+), 14 deletions(-) rename bayesflow/adapters/transforms/{subset_array.py => subsample_array.py} (54%) diff --git a/.gitignore b/.gitignore index bf64c1380..1ca9eaef6 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,6 @@ docs/ # MacOS .DS_Store + +# Rproj +.Rproj.user diff --git a/bayesflow/adapters/adapter.py b/bayesflow/adapters/adapter.py index e84e309fe..39409e585 100644 --- a/bayesflow/adapters/adapter.py +++ b/bayesflow/adapters/adapter.py @@ -27,6 +27,7 @@ Standardize, ToArray, Transform, + SubsampleArray ) from .transforms.filter_transform import Predicate @@ -631,3 +632,35 @@ def to_array( ) self.transforms.append(transform) return self + + def subsample_array(self, + *, + predicate: Predicate = None, + include: str | Sequence[str] = None, + exclude: str | Sequence[str] = None, + **kwargs, + ): + """ + Append a :py:class:`~transforms.SubsampleArray` transform to the adapter. + + Parameters + ---------- + predicate : Predicate, optional + Function that indicates which variables should be transformed. + include : str or Sequence of str, optional + Names of variables to include in the transform. + exclude : str or Sequence of str, optional + Names of variables to exclude from the transform. + **kwargs : dict + Additional keyword arguments passed to the transform. + + """ + transform = FilterTransform( + transform_constructor=SubsampleArray, + predicate=predicate, + include=include, + exclude=exclude, + **kwargs, + ) + self.transforms.append(transform) + return self diff --git a/bayesflow/adapters/transforms/__init__.py b/bayesflow/adapters/transforms/__init__.py index c0a1fce24..bc5d73f4f 100644 --- a/bayesflow/adapters/transforms/__init__.py +++ b/bayesflow/adapters/transforms/__init__.py @@ -18,6 +18,7 @@ from .standardize import Standardize from .to_array import ToArray from .transform import Transform +from .subsample_array import SubsampleArray from ...utils._docs import _add_imports_to_all diff --git a/bayesflow/adapters/transforms/subset_array.py b/bayesflow/adapters/transforms/subsample_array.py similarity index 54% rename from bayesflow/adapters/transforms/subset_array.py rename to bayesflow/adapters/transforms/subsample_array.py index adeddcb5e..bcf31d5aa 100644 --- a/bayesflow/adapters/transforms/subset_array.py +++ b/bayesflow/adapters/transforms/subsample_array.py @@ -6,8 +6,16 @@ @serializable(package="bayesflow.adapters") -class SubsetArray(ElementwiseTransform): +class SubsampleArray(ElementwiseTransform): """ + A transform that takes a random subsample of the data within an axis. + + Example: adapter.subsample("x", sample_size = 3, axis = -1) + + """ + + """ + A transform to reduce the dimensionality of arrays output by the summary network Sometimes, the simulators may return larger arrays than we want to use in our networks, so it would be great to have some subsetting adapter transforms. @@ -31,31 +39,42 @@ class SubsetArray(ElementwiseTransform): """ - def __init__(self, forward = str, inverse = str ): + def __init__(self): super().__init__() + def forward(self, data: np.ndarray, sample_size: int, axis = -1): + + max_sample_size = data.shape[axis] + sample_indices = np.random.permutation(max_sample_size)[0:sample_size-1] # random sample without replacement + + return self.take(data, sample_indices, axis) + + def inverse(self, data, **kwargs): + return super().inverse(data, **kwargs) + - def take(self, data, indices, axis = -1): - # ithink that indices needs to be a list or a slice, if its a list then - # we can have that subsample provides a list of random slices - # warn if no axis is provided - # my question is how does np.take deal with a list of numbers? - # it will gladly take a listof numbers even non consecutive ones , - # it also warns for out of bounds - return np.take(data, indices, axis) + + # def take(self, data, indices, axis = -1): + # # ithink that indices needs to be a list or a slice, if its a list then + # # we can have that subsample provides a list of random slices + # # warn if no axis is provided + # # my question is how does np.take deal with a list of numbers? + # # it will gladly take a listof numbers even non consecutive ones , + # # it also warns for out of bounds + # return np.take(data, indices, axis) - def subsample(self, data, sample_size, axis): + # def subsample(self, data, sample_size, axis): - max_sample_size = data.shape[axis] + # max_sample_size = data.shape[axis] - sample_indices = np.random.permutation(max_sample_size)[0:sample_size-1] # random sample without replacement + # sample_indices = np.random.permutation(max_sample_size)[0:sample_size-1] # random sample without replacement - self.take(data, sample_indices, axis) + # self.take(data, sample_indices, axis) From d57aee4639b62d53c04815c494c4a01543f43ba5 Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Fri, 11 Apr 2025 11:34:09 +0200 Subject: [PATCH 03/15] made the take function and ran the linter --- bayesflow/adapters/adapter.py | 37 +++++++++- bayesflow/adapters/transforms/__init__.py | 1 + .../adapters/transforms/subsample_array.py | 71 +++---------------- bayesflow/adapters/transforms/take.py | 24 +++++++ tests/test_adapters/conftest.py | 2 + tests/test_links/test_links.py | 12 ++-- .../test_point_inference_network.py | 6 +- 7 files changed, 81 insertions(+), 72 deletions(-) create mode 100644 bayesflow/adapters/transforms/take.py diff --git a/bayesflow/adapters/adapter.py b/bayesflow/adapters/adapter.py index 39409e585..1ce25d7f0 100644 --- a/bayesflow/adapters/adapter.py +++ b/bayesflow/adapters/adapter.py @@ -27,7 +27,8 @@ Standardize, ToArray, Transform, - SubsampleArray + SubsampleArray, + Take ) from .transforms.filter_transform import Predicate @@ -640,7 +641,7 @@ def subsample_array(self, exclude: str | Sequence[str] = None, **kwargs, ): - """ + """ Append a :py:class:`~transforms.SubsampleArray` transform to the adapter. Parameters @@ -664,3 +665,35 @@ def subsample_array(self, ) self.transforms.append(transform) return self + + def take(self, + *, + predicate: Predicate = None, + include: str | Sequence[str] = None, + exclude: str | Sequence[str] = None, + **kwargs,): + """ + Append a :py:class:`~transforms.Take` transform to the adapter. + + Parameters + ---------- + predicate : Predicate, optional + Function that indicates which variables should be transformed. + include : str or Sequence of str, optional + Names of variables to include in the transform. + exclude : str or Sequence of str, optional + Names of variables to exclude from the transform. + **kwargs : dict + Additional keyword arguments passed to the transform. """ + transform = FilterTransform( + transform_constructor=Take, + predicate=predicate, + include=include, + exclude=exclude, + **kwargs, + ) + self.transforms.append(transform) + return self + + + diff --git a/bayesflow/adapters/transforms/__init__.py b/bayesflow/adapters/transforms/__init__.py index bc5d73f4f..4bacb0a8e 100644 --- a/bayesflow/adapters/transforms/__init__.py +++ b/bayesflow/adapters/transforms/__init__.py @@ -19,6 +19,7 @@ from .to_array import ToArray from .transform import Transform from .subsample_array import SubsampleArray +from .take import Take from ...utils._docs import _add_imports_to_all diff --git a/bayesflow/adapters/transforms/subsample_array.py b/bayesflow/adapters/transforms/subsample_array.py index bcf31d5aa..7c12554f0 100644 --- a/bayesflow/adapters/transforms/subsample_array.py +++ b/bayesflow/adapters/transforms/subsample_array.py @@ -4,78 +4,27 @@ from .elementwise_transform import ElementwiseTransform - @serializable(package="bayesflow.adapters") class SubsampleArray(ElementwiseTransform): """ - A transform that takes a random subsample of the data within an axis. + A transform that takes a random subsample of the data within an axis. Example: adapter.subsample("x", sample_size = 3, axis = -1) - """ - - """ - - A transform to reduce the dimensionality of arrays output by the summary network - Sometimes, the simulators may return larger arrays than we want to use in our networks, so it would be great to have some subsetting adapter transforms. - - We need: - - Subsetting within an axis (taking only some elements) - while keeping the number of axes the same. This is essentially - the np.take functionality so we might want to call this transform - take. - - In contrast to np.take, I would make axis a mandatory - arguments or default it to the last axis. - Example: adapter.take("x", 1:3, axis = -1) - - Subsetting using a random set of indices (of user-specified size) - within an axis. We might call this subsample. Internally it would - call take after sampling the indices. - Example: adapter.subsample("x", size = 3, axis = -1) - Removing an axis of length one. Following numpy, - I would call this transform squeeze: Example: adapter.squeeze("x", axis = 1) - """ def __init__(self): - super().__init__() - - def forward(self, data: np.ndarray, sample_size: int, axis = -1): - - max_sample_size = data.shape[axis] - - sample_indices = np.random.permutation(max_sample_size)[0:sample_size-1] # random sample without replacement - return self.take(data, sample_indices, axis) - - def inverse(self, data, **kwargs): - return super().inverse(data, **kwargs) - - - - - - # def take(self, data, indices, axis = -1): - # # ithink that indices needs to be a list or a slice, if its a list then - # # we can have that subsample provides a list of random slices - # # warn if no axis is provided - # # my question is how does np.take deal with a list of numbers? - # # it will gladly take a listof numbers even non consecutive ones , - # # it also warns for out of bounds - # return np.take(data, indices, axis) - - - # def subsample(self, data, sample_size, axis): - - # max_sample_size = data.shape[axis] - - # sample_indices = np.random.permutation(max_sample_size)[0:sample_size-1] # random sample without replacement - - # self.take(data, sample_indices, axis) + def forward(self, data: np.ndarray, sample_size: int, axis=-1): + max_sample_size = data.shape[axis] + sample_indices = np.random.permutation(max_sample_size)[ + 0 : sample_size - 1 + ] # random sample without replacement - + return np.take(data, sample_indices, axis) + def inverse(self, data, **kwargs): + # non invertible transform + return data diff --git a/bayesflow/adapters/transforms/take.py b/bayesflow/adapters/transforms/take.py new file mode 100644 index 000000000..094fac6d3 --- /dev/null +++ b/bayesflow/adapters/transforms/take.py @@ -0,0 +1,24 @@ +import numpy as np +from keras.saving import register_keras_serializable as serializable + +from .elementwise_transform import ElementwiseTransform + + +@serializable(package="bayesflow.adapters") +class Take(ElementwiseTransform): + """ + A transform to reduce the dimensionality of arrays output by the summary network + Axis is a mandatory argument and will default to the last axis. + Example: adapter.take("x", 1:3, axis = -1) + + """ + + def __init__(self): + super().__init__() + + def forward(self, data, indices, axis=-1): + return np.take(data, indices, axis) + + def inverse(self, data): + # not a true invertible function + return data diff --git a/tests/test_adapters/conftest.py b/tests/test_adapters/conftest.py index b020523d9..0f3ac79df 100644 --- a/tests/test_adapters/conftest.py +++ b/tests/test_adapters/conftest.py @@ -25,6 +25,7 @@ def adapter(): .one_hot("o1", 10) .keep(["x", "y", "z1", "p1", "p2", "s1", "s2", "t1", "t2", "o1"]) .rename("o1", "o2") + .subsample_array("s3", sample_size = 3, axis = 0) ) return d @@ -47,4 +48,5 @@ def random_data(): "d1": np.random.standard_normal(size=(32, 2)), "d2": np.random.standard_normal(size=(32, 2)), "o1": np.random.randint(0, 9, size=(32, 2)), + "s3": np.random.standard_normal(size=(32,2)) } diff --git a/tests/test_links/test_links.py b/tests/test_links/test_links.py index b0ea22242..42389673b 100644 --- a/tests/test_links/test_links.py +++ b/tests/test_links/test_links.py @@ -30,9 +30,9 @@ def check_ordering(output, axis): assert np.all(np.diff(output, axis=axis) > 0), f"is not ordered along specified axis: {axis}." for i in range(output.ndim): if i != axis % output.ndim: - assert not np.all(np.diff(output, axis=i) > 0), ( - f"is ordered along axis which is not meant to be ordered: {i}." - ) + assert not np.all( + np.diff(output, axis=i) > 0 + ), f"is ordered along axis which is not meant to be ordered: {i}." @pytest.mark.parametrize("axis", [0, 1, 2]) @@ -69,6 +69,6 @@ def test_positive_semi_definite(random_matrix_batch): output = keras.ops.convert_to_numpy(output) eigenvalues = np.linalg.eig(output).eigenvalues - assert np.all(eigenvalues.real > 0) and np.all(np.isclose(eigenvalues.imag, 0)), ( - f"output is not positive semi-definite: real={eigenvalues.real}, imag={eigenvalues.imag}" - ) + assert np.all(eigenvalues.real > 0) and np.all( + np.isclose(eigenvalues.imag, 0) + ), f"output is not positive semi-definite: real={eigenvalues.real}, imag={eigenvalues.imag}" diff --git a/tests/test_networks/test_point_inference_network/test_point_inference_network.py b/tests/test_networks/test_point_inference_network/test_point_inference_network.py index 38ba8ea4e..8992e923a 100644 --- a/tests/test_networks/test_point_inference_network/test_point_inference_network.py +++ b/tests/test_networks/test_point_inference_network/test_point_inference_network.py @@ -44,9 +44,9 @@ def test_save_and_load(tmp_path, point_inference_network, random_samples, random for key_outer in out1.keys(): for key_inner in out1[key_outer].keys(): - assert keras.ops.all(keras.ops.isclose(out1[key_outer][key_inner], out2[key_outer][key_inner])), ( - "Output of original and loaded model differs significantly." - ) + assert keras.ops.all( + keras.ops.isclose(out1[key_outer][key_inner], out2[key_outer][key_inner]) + ), "Output of original and loaded model differs significantly." def test_copy_unequal(point_inference_network, random_samples, random_conditions): From 8d834da7f54b8e0672f74f2749b7d856a38110e1 Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 14:38:47 +0200 Subject: [PATCH 04/15] changed name of subsampling function --- bayesflow/adapters/adapter.py | 2 +- tests/test_adapters/conftest.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bayesflow/adapters/adapter.py b/bayesflow/adapters/adapter.py index 1ce25d7f0..d820e7286 100644 --- a/bayesflow/adapters/adapter.py +++ b/bayesflow/adapters/adapter.py @@ -634,7 +634,7 @@ def to_array( self.transforms.append(transform) return self - def subsample_array(self, + def random_subsample(self, *, predicate: Predicate = None, include: str | Sequence[str] = None, diff --git a/tests/test_adapters/conftest.py b/tests/test_adapters/conftest.py index 0f3ac79df..1f6e83df2 100644 --- a/tests/test_adapters/conftest.py +++ b/tests/test_adapters/conftest.py @@ -25,7 +25,8 @@ def adapter(): .one_hot("o1", 10) .keep(["x", "y", "z1", "p1", "p2", "s1", "s2", "t1", "t2", "o1"]) .rename("o1", "o2") - .subsample_array("s3", sample_size = 3, axis = 0) + .random_subsample("s3", sample_size = 33, axis = 0) + .take("s3", indices = np.arange(0,32), axis = 0) ) return d @@ -48,5 +49,5 @@ def random_data(): "d1": np.random.standard_normal(size=(32, 2)), "d2": np.random.standard_normal(size=(32, 2)), "o1": np.random.randint(0, 9, size=(32, 2)), - "s3": np.random.standard_normal(size=(32,2)) + "s3": np.random.standard_normal(size=(35,2)) } From 6c1d503caca6aa543bfd86782bdea0431590fb8d Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 14:39:53 +0200 Subject: [PATCH 05/15] changed documentation, to be consistent with external notation, rather than internal shorthand --- bayesflow/adapters/transforms/take.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bayesflow/adapters/transforms/take.py b/bayesflow/adapters/transforms/take.py index 094fac6d3..643abae0f 100644 --- a/bayesflow/adapters/transforms/take.py +++ b/bayesflow/adapters/transforms/take.py @@ -9,7 +9,7 @@ class Take(ElementwiseTransform): """ A transform to reduce the dimensionality of arrays output by the summary network Axis is a mandatory argument and will default to the last axis. - Example: adapter.take("x", 1:3, axis = -1) + Example: adapter.take("x", np.arange(0,3), axis = -1) """ From 2e83846872decb85ccf5d32a8e2895a916cd92e7 Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 14:40:40 +0200 Subject: [PATCH 06/15] small formation change to documentation --- bayesflow/adapters/transforms/take.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bayesflow/adapters/transforms/take.py b/bayesflow/adapters/transforms/take.py index 643abae0f..aa9328e91 100644 --- a/bayesflow/adapters/transforms/take.py +++ b/bayesflow/adapters/transforms/take.py @@ -9,7 +9,7 @@ class Take(ElementwiseTransform): """ A transform to reduce the dimensionality of arrays output by the summary network Axis is a mandatory argument and will default to the last axis. - Example: adapter.take("x", np.arange(0,3), axis = -1) + Example: adapter.take("x", np.arange(0,3), axis=-1) """ From dee45340c7174f7aab9997f4e225861bac9a0af1 Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 15:02:39 +0200 Subject: [PATCH 07/15] changed subsample to have sample size and axis in the constructor --- bayesflow/adapters/transforms/subsample_array.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/bayesflow/adapters/transforms/subsample_array.py b/bayesflow/adapters/transforms/subsample_array.py index 7c12554f0..c7d905964 100644 --- a/bayesflow/adapters/transforms/subsample_array.py +++ b/bayesflow/adapters/transforms/subsample_array.py @@ -13,10 +13,19 @@ class SubsampleArray(ElementwiseTransform): """ - def __init__(self): + def __init__( + self, + sample_size: int, + axis: int = -1, + ): super().__init__() + self.sample_size = sample_size + self.axis = axis - def forward(self, data: np.ndarray, sample_size: int, axis=-1): + def forward(self, data: np.ndarray): + sample_size = self.sample_size + axis = self.axis + max_sample_size = data.shape[axis] sample_indices = np.random.permutation(max_sample_size)[ From 71dc35afeba60e06f18034a21780c3e3071673ac Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 15:06:03 +0200 Subject: [PATCH 08/15] moved transforms in the adapter.py so they're in alphabetical order like the other transforms --- bayesflow/adapters/adapter.py | 88 +++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/bayesflow/adapters/adapter.py b/bayesflow/adapters/adapter.py index d820e7286..7105fd2ec 100644 --- a/bayesflow/adapters/adapter.py +++ b/bayesflow/adapters/adapter.py @@ -543,6 +543,40 @@ def one_hot(self, keys: str | Sequence[str], num_classes: int): transform = MapTransform({key: OneHot(num_classes=num_classes) for key in keys}) self.transforms.append(transform) return self + + def random_subsample(self, + sample_size: int, + axis: int=-1, + *, + predicate: Predicate = None, + include: str | Sequence[str] = None, + exclude: str | Sequence[str] = None, + **kwargs, + ): + """ + Append a :py:class:`~transforms.SubsampleArray` transform to the adapter. + + Parameters + ---------- + predicate : Predicate, optional + Function that indicates which variables should be transformed. + include : str or Sequence of str, optional + Names of variables to include in the transform. + exclude : str or Sequence of str, optional + Names of variables to exclude from the transform. + **kwargs : dict + Additional keyword arguments passed to the transform. + + """ + transform = MapTransform( + transform_constructor=SubsampleArray(sample_size=sample_size, axis=axis), + predicate=predicate, + include=include, + exclude=exclude, + **kwargs, + ) + self.transforms.append(transform) + return self def rename(self, from_key: str, to_key: str): """Append a :py:class:`~transforms.Rename` transform to the adapter. @@ -603,15 +637,14 @@ def standardize( self.transforms.append(transform) return self - def to_array( - self, + def take(self, *, predicate: Predicate = None, include: str | Sequence[str] = None, exclude: str | Sequence[str] = None, - **kwargs, - ): - """Append a :py:class:`~transforms.ToArray` transform to the adapter. + **kwargs,): + """ + Append a :py:class:`~transforms.Take` transform to the adapter. Parameters ---------- @@ -622,10 +655,9 @@ def to_array( exclude : str or Sequence of str, optional Names of variables to exclude from the transform. **kwargs : dict - Additional keyword arguments passed to the transform. - """ + Additional keyword arguments passed to the transform. """ transform = FilterTransform( - transform_constructor=ToArray, + transform_constructor=Take, predicate=predicate, include=include, exclude=exclude, @@ -634,15 +666,16 @@ def to_array( self.transforms.append(transform) return self - def random_subsample(self, + + def to_array( + self, *, predicate: Predicate = None, include: str | Sequence[str] = None, exclude: str | Sequence[str] = None, **kwargs, ): - """ - Append a :py:class:`~transforms.SubsampleArray` transform to the adapter. + """Append a :py:class:`~transforms.ToArray` transform to the adapter. Parameters ---------- @@ -654,10 +687,9 @@ def random_subsample(self, Names of variables to exclude from the transform. **kwargs : dict Additional keyword arguments passed to the transform. - """ transform = FilterTransform( - transform_constructor=SubsampleArray, + transform_constructor=ToArray, predicate=predicate, include=include, exclude=exclude, @@ -666,34 +698,8 @@ def random_subsample(self, self.transforms.append(transform) return self - def take(self, - *, - predicate: Predicate = None, - include: str | Sequence[str] = None, - exclude: str | Sequence[str] = None, - **kwargs,): - """ - Append a :py:class:`~transforms.Take` transform to the adapter. - - Parameters - ---------- - predicate : Predicate, optional - Function that indicates which variables should be transformed. - include : str or Sequence of str, optional - Names of variables to include in the transform. - exclude : str or Sequence of str, optional - Names of variables to exclude from the transform. - **kwargs : dict - Additional keyword arguments passed to the transform. """ - transform = FilterTransform( - transform_constructor=Take, - predicate=predicate, - include=include, - exclude=exclude, - **kwargs, - ) - self.transforms.append(transform) - return self + + From 6c34a5d87870bbcff589535d9bd26ef63434323d Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 15:18:53 +0200 Subject: [PATCH 09/15] changed random_subsample to maptransform rather than filter transform --- bayesflow/adapters/adapter.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/bayesflow/adapters/adapter.py b/bayesflow/adapters/adapter.py index 7105fd2ec..b1d103b9c 100644 --- a/bayesflow/adapters/adapter.py +++ b/bayesflow/adapters/adapter.py @@ -545,12 +545,10 @@ def one_hot(self, keys: str | Sequence[str], num_classes: int): return self def random_subsample(self, + keys: str | Sequence[str], + *, sample_size: int, axis: int=-1, - *, - predicate: Predicate = None, - include: str | Sequence[str] = None, - exclude: str | Sequence[str] = None, **kwargs, ): """ @@ -568,13 +566,17 @@ def random_subsample(self, Additional keyword arguments passed to the transform. """ + if isinstance(keys, str): + keys = [keys] + transform = MapTransform( - transform_constructor=SubsampleArray(sample_size=sample_size, axis=axis), - predicate=predicate, - include=include, - exclude=exclude, - **kwargs, + transform_map={ + key: SubsampleArray(sample_size=sample_size, axis=axis) + for key in keys + } + ) + self.transforms.append(transform) return self From c3640cb8623ca9792cc37324835072cfcdae50b0 Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 15:24:45 +0200 Subject: [PATCH 10/15] updated documentation with new naming convention --- bayesflow/adapters/transforms/subsample_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bayesflow/adapters/transforms/subsample_array.py b/bayesflow/adapters/transforms/subsample_array.py index c7d905964..8fa4304dd 100644 --- a/bayesflow/adapters/transforms/subsample_array.py +++ b/bayesflow/adapters/transforms/subsample_array.py @@ -9,7 +9,7 @@ class SubsampleArray(ElementwiseTransform): """ A transform that takes a random subsample of the data within an axis. - Example: adapter.subsample("x", sample_size = 3, axis = -1) + Example: adapter.random_subsample("x", sample_size = 3, axis = -1) """ From f17322f14a482057255e538846e488d28ed05698 Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 15:28:28 +0200 Subject: [PATCH 11/15] added arguments of take to the constructor --- bayesflow/adapters/adapter.py | 6 ++++-- bayesflow/adapters/transforms/take.py | 9 ++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/bayesflow/adapters/adapter.py b/bayesflow/adapters/adapter.py index b1d103b9c..9394c336a 100644 --- a/bayesflow/adapters/adapter.py +++ b/bayesflow/adapters/adapter.py @@ -576,7 +576,7 @@ def random_subsample(self, } ) - + self.transforms.append(transform) return self @@ -640,6 +640,8 @@ def standardize( return self def take(self, + indices, + axis, *, predicate: Predicate = None, include: str | Sequence[str] = None, @@ -659,7 +661,7 @@ def take(self, **kwargs : dict Additional keyword arguments passed to the transform. """ transform = FilterTransform( - transform_constructor=Take, + transform_constructor=Take(indices=indices, axis=axis), predicate=predicate, include=include, exclude=exclude, diff --git a/bayesflow/adapters/transforms/take.py b/bayesflow/adapters/transforms/take.py index aa9328e91..b2b478ba6 100644 --- a/bayesflow/adapters/transforms/take.py +++ b/bayesflow/adapters/transforms/take.py @@ -13,11 +13,14 @@ class Take(ElementwiseTransform): """ - def __init__(self): + def __init__(self,indices, axis=-1): super().__init__() + self.indices = indices + self.axis = axis - def forward(self, data, indices, axis=-1): - return np.take(data, indices, axis) + + def forward(self, data): + return np.take(data, self.indices, self.axis) def inverse(self, data): # not a true invertible function From 5312c5f52ed576ea1569e7fe8027403997b0ace3 Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 15:55:23 +0200 Subject: [PATCH 12/15] added feature to specify a percentage of the data to subsample rather than only integer input --- bayesflow/adapters/transforms/subsample_array.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/bayesflow/adapters/transforms/subsample_array.py b/bayesflow/adapters/transforms/subsample_array.py index 8fa4304dd..7e1ec42aa 100644 --- a/bayesflow/adapters/transforms/subsample_array.py +++ b/bayesflow/adapters/transforms/subsample_array.py @@ -15,18 +15,26 @@ class SubsampleArray(ElementwiseTransform): def __init__( self, - sample_size: int, + sample_size: int | float, axis: int = -1, ): super().__init__() + if isinstance(sample_size, float): + if sample_size <= 0 or sample_size >= 1: + ValueError("Sample size as a percentage must be a float between 0 and 1 exclustive. ") self.sample_size = sample_size self.axis = axis + def forward(self, data: np.ndarray): - sample_size = self.sample_size - axis = self.axis + axis = self.axis max_sample_size = data.shape[axis] + + if isinstance(self.sample_size, int): + sample_size = self.sample_size + else: + sample_size = np.round(self.sample_size * max_sample_size) sample_indices = np.random.permutation(max_sample_size)[ 0 : sample_size - 1 From 5361c04a0a30c34a3a0f73032a454a2489f1d09a Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 15:56:29 +0200 Subject: [PATCH 13/15] changed subsample in adapter.py to allow float as an input for the sample size --- bayesflow/adapters/adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bayesflow/adapters/adapter.py b/bayesflow/adapters/adapter.py index 9394c336a..754aad8e5 100644 --- a/bayesflow/adapters/adapter.py +++ b/bayesflow/adapters/adapter.py @@ -547,7 +547,7 @@ def one_hot(self, keys: str | Sequence[str], num_classes: int): def random_subsample(self, keys: str | Sequence[str], *, - sample_size: int, + sample_size: int | float, axis: int=-1, **kwargs, ): From 504344b40b6950fcad50fb0ce54c699eddb90d27 Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 17:47:38 +0200 Subject: [PATCH 14/15] renamed subsample_array and associated classes/functions to RandomSubsample and random_subsample respectively --- bayesflow/adapters/adapter.py | 4 ++-- bayesflow/adapters/transforms/__init__.py | 2 +- .../transforms/{subsample_array.py => random_subsample.py} | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) rename bayesflow/adapters/transforms/{subsample_array.py => random_subsample.py} (96%) diff --git a/bayesflow/adapters/adapter.py b/bayesflow/adapters/adapter.py index 754aad8e5..0072858cb 100644 --- a/bayesflow/adapters/adapter.py +++ b/bayesflow/adapters/adapter.py @@ -27,7 +27,7 @@ Standardize, ToArray, Transform, - SubsampleArray, + RandomSubsample, Take ) from .transforms.filter_transform import Predicate @@ -571,7 +571,7 @@ def random_subsample(self, transform = MapTransform( transform_map={ - key: SubsampleArray(sample_size=sample_size, axis=axis) + key:RandomSubsample(sample_size=sample_size, axis=axis) for key in keys } diff --git a/bayesflow/adapters/transforms/__init__.py b/bayesflow/adapters/transforms/__init__.py index 4bacb0a8e..7e82714c4 100644 --- a/bayesflow/adapters/transforms/__init__.py +++ b/bayesflow/adapters/transforms/__init__.py @@ -18,7 +18,7 @@ from .standardize import Standardize from .to_array import ToArray from .transform import Transform -from .subsample_array import SubsampleArray +from .random_subsample import RandomSubsample from .take import Take from ...utils._docs import _add_imports_to_all diff --git a/bayesflow/adapters/transforms/subsample_array.py b/bayesflow/adapters/transforms/random_subsample.py similarity index 96% rename from bayesflow/adapters/transforms/subsample_array.py rename to bayesflow/adapters/transforms/random_subsample.py index 7e1ec42aa..4998169fd 100644 --- a/bayesflow/adapters/transforms/subsample_array.py +++ b/bayesflow/adapters/transforms/random_subsample.py @@ -5,7 +5,7 @@ @serializable(package="bayesflow.adapters") -class SubsampleArray(ElementwiseTransform): +class RandomSubsample(ElementwiseTransform): """ A transform that takes a random subsample of the data within an axis. From 4218b703d069c7c1b5c5c92c8344ecf0e9fd0ae1 Mon Sep 17 00:00:00 2001 From: Leona Odole Date: Tue, 22 Apr 2025 18:11:05 +0200 Subject: [PATCH 15/15] included TypeError to force users to only subsample one dataset at a time --- bayesflow/adapters/adapter.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/bayesflow/adapters/adapter.py b/bayesflow/adapters/adapter.py index 0072858cb..b67e12c87 100644 --- a/bayesflow/adapters/adapter.py +++ b/bayesflow/adapters/adapter.py @@ -545,7 +545,7 @@ def one_hot(self, keys: str | Sequence[str], num_classes: int): return self def random_subsample(self, - keys: str | Sequence[str], + key: str | Sequence[str], *, sample_size: int | float, axis: int=-1, @@ -566,8 +566,13 @@ def random_subsample(self, Additional keyword arguments passed to the transform. """ - if isinstance(keys, str): - keys = [keys] + + + if isinstance(key, Sequence[str]) and len(keys) >1: + TypeError("`key` should be either a string or a list of length one. Only one dataset may be modified at a time.") + + if isinstance(key, str): + keys = [key] transform = MapTransform( transform_map={ @@ -688,7 +693,7 @@ def to_array( include : str or Sequence of str, optional Names of variables to include in the transform. exclude : str or Sequence of str, optional - Names of variables to exclude from the transform. + Names of variabxles to exclude from the transform. **kwargs : dict Additional keyword arguments passed to the transform. """