From c3825ea59c60c8780aaacaadedc9c3f7ad09b2de Mon Sep 17 00:00:00 2001 From: "Arun S. Maiya" <arun@maiya.net> Date: Sun, 31 Jul 2022 22:40:48 -0400 Subject: [PATCH 1/9] support for Tensorflow Keras --- eli5/keras/explain_prediction.py | 6 +++++- eli5/keras/gradcam.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/eli5/keras/explain_prediction.py b/eli5/keras/explain_prediction.py index 73deb25b..c2274e65 100644 --- a/eli5/keras/explain_prediction.py +++ b/eli5/keras/explain_prediction.py @@ -5,7 +5,11 @@ import PIL import numpy as np -import keras +import os +if'TF_KERAS' in os.environ and os.environ['TF_KERAS'] == '1': + from tensorflow import keras +else: + import keras import keras.backend as K from keras.models import Model from keras.layers import Layer diff --git a/eli5/keras/gradcam.py b/eli5/keras/gradcam.py index c8bb5922..1953faa8 100644 --- a/eli5/keras/gradcam.py +++ b/eli5/keras/gradcam.py @@ -3,7 +3,11 @@ from typing import Union, Optional, Tuple, List import numpy as np -import keras +import os +if 'TF_KERAS' in os.environ and os.environ['TF_KERAS'] == '1': + from tensorflow import keras +else: + import keras import keras.backend as K from keras.models import Model from keras.layers import Layer From e69c31211173ae10c234ad8db1a4b3b62413ecb7 Mon Sep 17 00:00:00 2001 From: "Arun S. Maiya" <arun@maiya.net> Date: Sun, 31 Jul 2022 22:51:05 -0400 Subject: [PATCH 2/9] changes due to breaking changes in TF 2.9 and scipy 1.9 --- eli5/keras/explain_prediction.py | 9 ++++++++- eli5/lime/samplers.py | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/eli5/keras/explain_prediction.py b/eli5/keras/explain_prediction.py index c2274e65..12afc31c 100644 --- a/eli5/keras/explain_prediction.py +++ b/eli5/keras/explain_prediction.py @@ -20,7 +20,14 @@ GlobalMaxPooling2D, GlobalAveragePooling2D, ) -from keras.preprocessing.image import array_to_img +try: + # tensorflow<2.9 + from keras.preprocessing.image import array_to_img +except: + # tensorflow>=2.9 + # reference: https://www.tensorflow.org/api_docs/python/tf/keras/utils/array_to_img + from tensorflow.keras.utils import array_to_img + from eli5.base import Explanation, TargetExplanation from eli5.explain import explain_prediction diff --git a/eli5/lime/samplers.py b/eli5/lime/samplers.py index d079b471..7680a438 100644 --- a/eli5/lime/samplers.py +++ b/eli5/lime/samplers.py @@ -6,7 +6,6 @@ import six import numpy as np -from scipy.stats import itemfreq from sklearn.base import BaseEstimator, clone from sklearn.neighbors import KernelDensity from sklearn.metrics import pairwise_distances @@ -188,7 +187,8 @@ def _sampler_n_samples(self, n_samples): p=self.weights) return [ (self.samplers[idx], freq) - for idx, freq in itemfreq(sampler_indices) + # use np.unique due to removal of scipy.stats.itemfreq + for idx, freq in np.vstack( np.unique(sampler_indices, return_counts=True) ).transpose() ] From 05e61b7dabc0ea689c0bf52def2144db3066c21e Mon Sep 17 00:00:00 2001 From: Arun Maiya <arun@maiya.net> Date: Mon, 1 Aug 2022 13:46:48 -0400 Subject: [PATCH 3/9] fixes for imports --- eli5/keras/explain_prediction.py | 21 ++++++++++----------- eli5/keras/gradcam.py | 6 +++--- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/eli5/keras/explain_prediction.py b/eli5/keras/explain_prediction.py index 12afc31c..726b79c2 100644 --- a/eli5/keras/explain_prediction.py +++ b/eli5/keras/explain_prediction.py @@ -10,19 +10,18 @@ from tensorflow import keras else: import keras -import keras.backend as K -from keras.models import Model -from keras.layers import Layer -from keras.layers import ( - Conv2D, - MaxPooling2D, - AveragePooling2D, - GlobalMaxPooling2D, - GlobalAveragePooling2D, -) +K = keras.backend +Model = keras.models.Model +Layer = keras.layers.Layer +Conv2D = keras.layers.Conv2D +MaxPooling2D = keras.layers.MaxPooling2D +AveragePooling2D = keras.layers.AveragePooling2D +GlobalMaxPooling2D = keras.layers.GlobalMaxPooling2D +GlobalAveragePooling2D = keras.layers.GlobalAveragePooling2D + try: # tensorflow<2.9 - from keras.preprocessing.image import array_to_img + array_to_img = keras.preprocessing.image.array_to_img except: # tensorflow>=2.9 # reference: https://www.tensorflow.org/api_docs/python/tf/keras/utils/array_to_img diff --git a/eli5/keras/gradcam.py b/eli5/keras/gradcam.py index 1953faa8..df533a4b 100644 --- a/eli5/keras/gradcam.py +++ b/eli5/keras/gradcam.py @@ -8,9 +8,9 @@ from tensorflow import keras else: import keras -import keras.backend as K -from keras.models import Model -from keras.layers import Layer +K = keras.backend +Model = keras.models.Model +Layer = keras.layers.Layer def gradcam(weights, activations): From dff9a573bd4fcb8a2b7def54ff37455a1673ee7b Mon Sep 17 00:00:00 2001 From: "Arun S. Maiya" <arun@maiya.net> Date: Wed, 22 Mar 2023 13:25:29 -0400 Subject: [PATCH 4/9] replace distutils LooseVersion with packaging.version.parse --- eli5/lime/lime.py | 4 +++- eli5/lime/utils.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/eli5/lime/lime.py b/eli5/lime/lime.py index 2968da04..3914cdce 100644 --- a/eli5/lime/lime.py +++ b/eli5/lime/lime.py @@ -27,6 +27,8 @@ ) from eli5.lime._vectorizer import SingleDocumentVectorizer +from packaging.version import parse + class TextExplainer(BaseEstimator): """ @@ -325,7 +327,7 @@ def _default_clf(self): alpha=1e-3, random_state=self.rng_ ) - if sklearn_version() >= '0.19': + if sklearn_version() >= parse('0.19'): kwargs['tol'] = 1e-3 return SGDClassifier(**kwargs) diff --git a/eli5/lime/utils.py b/eli5/lime/utils.py index 120dbfbd..cec7fdb2 100644 --- a/eli5/lime/utils.py +++ b/eli5/lime/utils.py @@ -12,6 +12,8 @@ from eli5.utils import vstack from eli5.sklearn.utils import sklearn_version +from packaging.version import parse + def fit_proba(clf, X, y_proba, expand_factor=10, sample_weight=None, shuffle=True, random_state=None, @@ -83,7 +85,7 @@ def score(self, X, y=None, **score_params): def score_with_sample_weight(estimator, X, y=None, sample_weight=None): - if sklearn_version() < '0.19': + if sklearn_version() < parse('0.19'): if isinstance(estimator, Pipeline) and sample_weight is not None: estimator = _PipelinePatched(estimator.steps) if sample_weight is None: From 7b2a5a28b493020379ae6f239ec21ba1bfa66fd0 Mon Sep 17 00:00:00 2001 From: "Arun S. Maiya" <arun@maiya.net> Date: Wed, 22 Mar 2023 13:42:07 -0400 Subject: [PATCH 5/9] use get_feature_names_out to ensure text is highlighted properly --- eli5/sklearn/utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/eli5/sklearn/utils.py b/eli5/sklearn/utils.py index 286d078a..fe7205ad 100644 --- a/eli5/sklearn/utils.py +++ b/eli5/sklearn/utils.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -from distutils.version import LooseVersion +#from distutils.version import LooseVersion # deprecated +from packaging.version import parse as LooseVersion from typing import Any, Optional, List, Tuple import numpy as np @@ -80,7 +81,9 @@ def get_feature_names(clf, vec=None, bias_name='<BIAS>', feature_names=None, bias_name = None if feature_names is None: - if vec and hasattr(vec, 'get_feature_names'): + if vec and hasattr(vec, 'get_feature_names_out'): + return FeatureNames(vec.get_feature_names_out(), bias_name=bias_name) + elif vec and hasattr(vec, 'get_feature_names'): return FeatureNames(vec.get_feature_names(), bias_name=bias_name) else: if estimator_feature_names is None: From 840695d869e47b8e6cc05baca428d24881113fb6 Mon Sep 17 00:00:00 2001 From: "Arun S. Maiya" <arun@maiya.net> Date: Sat, 22 Jul 2023 16:20:28 -0400 Subject: [PATCH 6/9] changes to support scikit-learn>=1.3 --- eli5/lime/lime.py | 2 +- eli5/lime/utils.py | 12 ++++++++++-- eli5/sklearn/permutation_importance.py | 19 +++++++++++++------ 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/eli5/lime/lime.py b/eli5/lime/lime.py index 3914cdce..ad21e4f0 100644 --- a/eli5/lime/lime.py +++ b/eli5/lime/lime.py @@ -322,7 +322,7 @@ def _fix_target_names(self, kwargs): def _default_clf(self): kwargs = dict( - loss='log', + loss='log_loss', penalty='elasticnet', alpha=1e-3, random_state=self.rng_ diff --git a/eli5/lime/utils.py b/eli5/lime/utils.py index cec7fdb2..3f78e0eb 100644 --- a/eli5/lime/utils.py +++ b/eli5/lime/utils.py @@ -6,7 +6,7 @@ from scipy.stats import entropy from sklearn.pipeline import Pipeline from sklearn.utils import check_random_state, issparse -from sklearn.utils.metaestimators import if_delegate_has_method +from sklearn.utils.metaestimators import available_if from sklearn.utils import shuffle as _shuffle from eli5.utils import vstack @@ -75,7 +75,15 @@ def fix_multiclass_predict_proba(y_proba, # type: np.ndarray class _PipelinePatched(Pipeline): # Patch from https://github.com/scikit-learn/scikit-learn/pull/7723; # only needed for scikit-learn < 0.19. - @if_delegate_has_method(delegate='_final_estimator') + + # Reference: https://github.com/scikit-learn/scikit-learn/issues/20506 + def _estimator_has(attr): + def check(self): + return hasattr(self.estimator, attr) + + return check + + @available_if(_estimator_has('_final_estimator')) def score(self, X, y=None, **score_params): Xt = X for name, transform in self.steps[:-1]: diff --git a/eli5/sklearn/permutation_importance.py b/eli5/sklearn/permutation_importance.py index ca73d769..293e4985 100644 --- a/eli5/sklearn/permutation_importance.py +++ b/eli5/sklearn/permutation_importance.py @@ -4,7 +4,7 @@ import numpy as np from sklearn.model_selection import check_cv -from sklearn.utils.metaestimators import if_delegate_has_method +from sklearn.utils.metaestimators import available_if from sklearn.utils import check_array, check_random_state from sklearn.base import ( BaseEstimator, @@ -247,23 +247,30 @@ def caveats_(self): # ============= Exposed methods of a wrapped estimator: - @if_delegate_has_method(delegate='wrapped_estimator_') + # Reference: https://github.com/scikit-learn/scikit-learn/issues/20506 + def _estimator_has(attr): + def check(self): + return hasattr(self.estimator, attr) + + return check + + @available_if(_estimator_has('wrapped_estimator_')) def score(self, X, y=None, *args, **kwargs): return self.wrapped_estimator_.score(X, y, *args, **kwargs) - @if_delegate_has_method(delegate='wrapped_estimator_') + @available_if(_estimator_has('wrapped_estimator_')) def predict(self, X): return self.wrapped_estimator_.predict(X) - @if_delegate_has_method(delegate='wrapped_estimator_') + @available_if(_estimator_has('wrapped_estimator_')) def predict_proba(self, X): return self.wrapped_estimator_.predict_proba(X) - @if_delegate_has_method(delegate='wrapped_estimator_') + @available_if(_estimator_has('wrapped_estimator_')) def predict_log_proba(self, X): return self.wrapped_estimator_.predict_log_proba(X) - @if_delegate_has_method(delegate='wrapped_estimator_') + @available_if(_estimator_has('wrapped_estimator_')) def decision_function(self, X): return self.wrapped_estimator_.decision_function(X) From 54639eb3c7a53208ed819a1e40518e18f6d3d6ac Mon Sep 17 00:00:00 2001 From: "Arun S. Maiya" <arun@maiya.net> Date: Sat, 22 Jul 2023 17:02:13 -0400 Subject: [PATCH 7/9] change deprecated Image.LANCZOS calls --- eli5/formatters/image.py | 10 +++++----- tests/test_keras_integration.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/eli5/formatters/image.py b/eli5/formatters/image.py index f776b2c2..de0abe43 100644 --- a/eli5/formatters/image.py +++ b/eli5/formatters/image.py @@ -10,12 +10,12 @@ def format_as_image(expl, # type: Explanation - resampling_filter=Image.LANCZOS, # type: int + resampling_filter=Image.Resampling.LANCZOS, # type: int colormap=matplotlib.cm.viridis, # type: Callable[[np.ndarray], np.ndarray] alpha_limit=0.65, # type: Optional[Union[float, int]] ): # type: (...) -> Image - """format_as_image(expl, resampling_filter=Image.LANCZOS, colormap=matplotlib.cm.viridis, alpha_limit=0.65) + """format_as_image(expl, resampling_filter=Image.Resampling.LANCZOS, colormap=matplotlib.cm.viridis, alpha_limit=0.65) Format a :class:`eli5.base.Explanation` object as an image. @@ -50,7 +50,7 @@ def format_as_image(expl, # type: Explanation *Note that these attributes are integer values*. - Default is ``PIL.Image.LANCZOS``. + Default is ``PIL.Image.Resampling.LANCZOS``. :type resampling_filter: int, optional :param colormap: @@ -239,7 +239,7 @@ def _cap_alpha(alpha_arr, alpha_limit): 'got: {}'.format(alpha_limit)) -def expand_heatmap(heatmap, image, resampling_filter=Image.LANCZOS): +def expand_heatmap(heatmap, image, resampling_filter=Image.Resampling.LANCZOS): # type: (np.ndarray, Image, Union[None, int]) -> Image """ Resize the ``heatmap`` image array to fit over the original ``image``, @@ -286,4 +286,4 @@ def _overlay_heatmap(heatmap, image): """ # note that the order of alpha_composite arguments matters overlayed_image = Image.alpha_composite(image, heatmap) - return overlayed_image \ No newline at end of file + return overlayed_image diff --git a/tests/test_keras_integration.py b/tests/test_keras_integration.py index d1e1292f..4c9872f0 100644 --- a/tests/test_keras_integration.py +++ b/tests/test_keras_integration.py @@ -81,7 +81,7 @@ def assert_attention_over_area(expl, area): heatmap = expl.targets[0].heatmap # fit heatmap over image - heatmap = expand_heatmap(heatmap, image, Image.LANCZOS) + heatmap = expand_heatmap(heatmap, image, Image.Resampling.LANCZOS) heatmap = np.array(heatmap) # get a slice of the area @@ -159,4 +159,4 @@ def test_show_prediction_nodeps(show_nodeps, keras_clf, cat_dog_image): ]) def test_explain_prediction_not_supported(model, doc): res = eli5.explain_prediction(model, doc) - assert 'supported' in res.error \ No newline at end of file + assert 'supported' in res.error From 708f4bd884c3a79bc96e0f4263b103decd21dc00 Mon Sep 17 00:00:00 2001 From: "Arun S. Maiya" <arun@maiya.net> Date: Sat, 18 Nov 2023 11:16:39 -0500 Subject: [PATCH 8/9] fix re.error: global flags not at the start of the expression --- eli5/lime/textutils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eli5/lime/textutils.py b/eli5/lime/textutils.py index 98da0428..08ff2dae 100644 --- a/eli5/lime/textutils.py +++ b/eli5/lime/textutils.py @@ -14,7 +14,7 @@ # the same as scikit-learn token pattern, but allows single-char tokens -DEFAULT_TOKEN_PATTERN = r'(?u)\b\w+\b' +DEFAULT_TOKEN_PATTERN = r'\b\w+\b' # non-whitespace chars CHAR_TOKEN_PATTERN = r'[^\s]' @@ -183,7 +183,7 @@ def __init__(self, parts): def fromtext(cls, text, token_pattern=DEFAULT_TOKEN_PATTERN): # type: (str, str) -> SplitResult token_pattern = u"(%s)" % token_pattern - parts = re.split(token_pattern, text) + parts = re.split(token_pattern, text, flags=re.UNICODE) return cls(parts) @property From 9e124c6476032633dd321ce16837fb86f714a5ee Mon Sep 17 00:00:00 2001 From: "Arun S. Maiya" <arun@maiya.net> Date: Thu, 13 Jun 2024 21:29:38 -0400 Subject: [PATCH 9/9] import issparse from scipy instead of sklearn.utils --- eli5/lime/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eli5/lime/utils.py b/eli5/lime/utils.py index 3f78e0eb..429647c6 100644 --- a/eli5/lime/utils.py +++ b/eli5/lime/utils.py @@ -5,7 +5,8 @@ import numpy as np from scipy.stats import entropy from sklearn.pipeline import Pipeline -from sklearn.utils import check_random_state, issparse +from sklearn.utils import check_random_state +from scipy.sparse import issparse from sklearn.utils.metaestimators import available_if from sklearn.utils import shuffle as _shuffle