diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index b13a2c5a..fefa50e2 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -17,26 +17,24 @@ jobs:
       fail-fast: false
       matrix:
         include:
-            - python-version: '3.6'
+            - python-version: '3.10'
               tox-env: 'mypy'
-            - python-version: '3.9'
+            - python-version: '3.10'
               tox-env: 'docs'
-            - python-version: '3.6'
-              tox-env: 'py36'
-            - python-version: '3.6'
-              tox-env: 'py36-nodeps'
-            - python-version: '3.6'
-              tox-env: 'py36-extra'
-            - python-version: '3.7'
-              tox-env: 'py37'
-            - python-version: '3.8'
-              tox-env: 'py38'
-            - python-version: '3.8'
-              tox-env: 'py38-nodeps'
             - python-version: '3.9'
               tox-env: 'py39'
-            - python-version: '3.9'
-              tox-env: 'py39-nodeps'
+            - python-version: '3.10'
+              tox-env: 'py310'
+            - python-version: '3.10'
+              tox-env: 'py310-nodeps'
+            - python-version: '3.10'
+              tox-env: 'py310-extra'
+            - python-version: '3.11'
+              tox-env: 'py311'
+            - python-version: '3.12'
+              tox-env: 'py312'
+            - python-version: '3.13'
+              tox-env: 'py313'
 
     steps:
     - uses: actions/checkout@v2
diff --git a/CHANGES.rst b/CHANGES.rst
index a733d762..856644ce 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,6 +1,12 @@
 Changelog
 =========
 
+0.14.0 (?)
+-------------------
+
+* drop support for python 3.6, 3.7, 3.8
+* add support for python 3.11, 3.12, 3.13
+
 0.13.0 (2022-05-11)
 -------------------
 
diff --git a/_ci/runtests_default.sh b/_ci/runtests_default.sh
deleted file mode 100644
index d6dbcc6d..00000000
--- a/_ci/runtests_default.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env bash
-
-py.test --doctest-modules \
-        --ignore eli5/xgboost.py \
-        --ignore eli5/lightgbm.py \
-        --ignore eli5/catboost.py \
-        --ignore eli5/keras \
-        --ignore eli5/sklearn_crfsuite \
-        --ignore eli5/formatters/image.py \
-        --ignore tests/utils_image.py \
-        --cov=eli5 --cov-report=html --cov-report=term  "$@"
diff --git a/_ci/runtests_default_with_crfsuite.sh b/_ci/runtests_default_with_crfsuite.sh
deleted file mode 100644
index 48bfa4b1..00000000
--- a/_ci/runtests_default_with_crfsuite.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env bash
-
-py.test --doctest-modules \
-        --ignore eli5/xgboost.py \
-        --ignore eli5/lightgbm.py \
-        --ignore eli5/catboost.py \
-        --ignore eli5/keras \
-        --ignore eli5/formatters/image.py \
-        --ignore tests/utils_image.py \
-        --cov=eli5 --cov-report=html --cov-report=term  "$@"
diff --git a/_ci/runtests_extra.sh b/_ci/runtests_extra.sh
deleted file mode 100644
index 6bfddbc5..00000000
--- a/_ci/runtests_extra.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env bash
-py.test --doctest-modules \
-        --ignore tests/test_lime.py \
-        --ignore tests/test_formatters.py \
-        --ignore tests/test_samplers.py \
-        --ignore tests/test_sklearn_explain_prediction.py \
-        --ignore tests/test_sklearn_explain_weights.py \
-        --ignore tests/test_sklearn_vectorizers.py \
-        --ignore tests/test_utils.py \
-        --ignore eli5/lightning.py \
-        --ignore eli5/sklearn_crfsuite \
-        --cov=eli5 --cov-report=html --cov-report=term "$@"
diff --git a/_ci/runtests_nodeps.sh b/_ci/runtests_nodeps.sh
deleted file mode 100644
index 36074072..00000000
--- a/_ci/runtests_nodeps.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env bash
-py.test --doctest-modules \
-        --ignore eli5/lightning.py \
-        --ignore eli5/sklearn_crfsuite \
-        --ignore eli5/ipython.py \
-        --ignore eli5/xgboost.py \
-        --ignore eli5/lightgbm.py \
-        --ignore eli5/catboost.py \
-        --ignore eli5/keras \
-        --ignore eli5/formatters/as_dataframe.py \
-        --ignore eli5/formatters/image.py \
-        --ignore tests/utils_image.py \
-        --cov=eli5 --cov-report=html --cov-report=term "$@"
diff --git a/docs/requirements.txt b/docs/requirements.txt
index ea3b328e..940c8e0e 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -4,5 +4,5 @@ sphinx_rtd_theme
 ipython
 scipy
 numpy > 1.9.0
-scikit-learn >= 0.20
-typing
+pandas
+scikit-learn >= 1.6.0
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 8a6c5723..c4ffee72 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -54,7 +54,7 @@ def __getattr__(cls, name):
     'keras.models',
     'keras.layers',
     'keras.preprocessing.image',
-    'pandas',
+    # 'pandas',
     'PIL',
     'matplotlib',
     'matplotlib.pyplot',
@@ -69,7 +69,7 @@ def __getattr__(cls, name):
 
 def setup(app):
     # see https://github.com/snide/sphinx_rtd_theme/issues/117
-    app.add_stylesheet("rtfd_overrides.css")
+    app.add_css_file("rtfd_overrides.css")
 
 suppress_warnings = ['image.nonlocal_uri']
 
@@ -123,7 +123,7 @@ def setup(app):
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = 'en'
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
diff --git a/docs/source/libraries/keras.rst b/docs/source/libraries/keras.rst
index c835606a..690d2863 100644
--- a/docs/source/libraries/keras.rst
+++ b/docs/source/libraries/keras.rst
@@ -8,7 +8,7 @@ Keras_ is "a high-level neural networks API, written in Python and capable of ru
 Keras can be used for many Machine Learning tasks, and it has support for both popular
 and experimental neural network architectures.
 
-Note: only TensorFlow 1.x is supported, recommended Keras version is 2.3.1 or earlier.
+Note: only TensorFlow 1.x is supported, recommended Keras version is 2.3.1 or earlier, and eli5 version 0.13 or earlier, as you can't install TensorFlow 1.x on Python 3.9+ which is required for eli5 0.14+
 
 .. _Keras: https://keras.io/
 
diff --git a/docs/source/libraries/xgboost.rst b/docs/source/libraries/xgboost.rst
index ad4384f1..c1897716 100644
--- a/docs/source/libraries/xgboost.rst
+++ b/docs/source/libraries/xgboost.rst
@@ -6,7 +6,9 @@ XGBoost
 XGBoost_ is a popular Gradient Boosting library with Python interface.
 eli5 supports :func:`eli5.explain_weights` and :func:`eli5.explain_prediction`
 for XGBClassifer_, XGBRegressor_ and Booster_ estimators. It is tested for
-xgboost >= 0.6a2.
+xgboost >= 0.6a2 and < 2.0.0.
+Versions starting from 2.0.0 likely produce incorrect results in
+:func:`eli5.explain_prediction`, and will issue a warning.
 
 .. _XGBoost: https://github.com/dmlc/xgboost
 .. _XGBClassifer: https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.XGBClassifier
diff --git a/eli5/_feature_names.py b/eli5/_feature_names.py
index ff1fd80c..c4808db3 100644
--- a/eli5/_feature_names.py
+++ b/eli5/_feature_names.py
@@ -1,9 +1,6 @@
 import re
-import six
 from typing import (
-    Any, Iterable, Iterator, Tuple, Sized, List, Optional, Dict,
-    Union, Callable, Pattern
-)
+    Any, Iterable, Iterator, Sized, Optional, Union, Callable, Pattern)
 
 import numpy as np
 import scipy.sparse as sp
@@ -14,15 +11,14 @@ class FeatureNames(Sized, Iterable):
     A list-like object with feature names. It allows
     feature names for unknown features to be generated using
     a provided template, and to avoid making copies of large objects
-    in get_feature_names.
+    in get_feature_names_out.
     """
     def __init__(self,
                  feature_names=None,
-                 bias_name=None,  # type: str
-                 unkn_template=None,  # type: str
-                 n_features=None,  # type: int
+                 bias_name: Optional[str] = None,
+                 unkn_template: Optional[str] = None,
+                 n_features: Optional[int] = None,
                  ):
-        # type: (...) -> None
         if not (feature_names is not None or
                     (unkn_template is not None and n_features)):
             raise ValueError(
@@ -39,20 +35,17 @@ def __init__(self,
                         'unkn_template should be set for sparse features')
         self.feature_names = feature_names
         self.unkn_template = unkn_template
-        self.n_features = n_features or len(feature_names)  # type: int
+        self.n_features: int = n_features or len(feature_names)
         self.bias_name = bias_name
 
-    def __repr__(self):
-        # type: () -> str
+    def __repr__(self) -> str:
         return '<FeatureNames: {} features {} bias>'.format(
             self.n_features, 'with' if self.has_bias else 'without')
 
-    def __len__(self):
-        # type: () -> int
+    def __len__(self) -> int:
         return self.n_features + int(self.has_bias)
 
-    def __iter__(self):
-        # type: () -> Iterator[str]
+    def __iter__(self) -> Iterator[str]:
         return (self[i] for i in range(len(self)))
 
     def __getitem__(self, idx):
@@ -69,10 +62,10 @@ def __getitem__(self, idx):
                 return self.unkn_template % idx
         raise IndexError('Feature index out of range')
 
-    def _slice(self, aslice):
-        # type: (slice) -> Any
+    def _slice(self, aslice: slice):
         if isinstance(self.feature_names, (list, np.ndarray)):
             # Fast path without going through __getitem__
+            lst: Union[list, np.ndarray]
             if self.has_bias:
                 lst = list(self.feature_names)
                 lst.append(self.bias_name)
@@ -84,29 +77,26 @@ def _slice(self, aslice):
             return [self[idx] for idx in indices]
 
     @property
-    def has_bias(self):
-        # type: () -> bool
+    def has_bias(self) -> bool:
         return self.bias_name is not None
 
     @property
-    def bias_idx(self):
-        # type: () -> Optional[int]
+    def bias_idx(self) -> Optional[int]:
         if self.has_bias:
             return self.n_features
         return None
 
-    def filtered(self, feature_filter, x=None):
-        # type: (Callable, Any) -> Tuple[FeatureNames, List[int]]
+    def filtered(self, feature_filter: Callable, x=None) -> tuple['FeatureNames', list[int]]:
         """ Return feature names filtered by a regular expression 
         ``feature_re``, and indices of filtered elements.
         """
         indices = []
         filtered_feature_names = []
-        indexed_names = None  # type: Optional[Iterable[Tuple[int, Any]]]
+        indexed_names: Optional[Iterable[tuple[int, Any]]] = None
         if isinstance(self.feature_names, (np.ndarray, list)):
             indexed_names = enumerate(self.feature_names)
         elif isinstance(self.feature_names, dict):
-            indexed_names = six.iteritems(self.feature_names)
+            indexed_names = self.feature_names.items()
         elif self.feature_names is None:
             indexed_names = []
         assert indexed_names is not None
@@ -116,8 +106,7 @@ def filtered(self, feature_filter, x=None):
                 assert x.shape[0] == 1
                 flt = lambda nm, i: feature_filter(nm, x[0, i])
             else:
-                # FIXME: mypy warns about x[i] because it thinks x can be None
-                flt = lambda nm, i: feature_filter(nm, x[i])  # type: ignore
+                flt = lambda nm, i: feature_filter(nm, x[i])
         else:
             flt = lambda nm, i: feature_filter(nm)
 
@@ -141,10 +130,9 @@ def filtered(self, feature_filter, x=None):
 
     def handle_filter(self,
                       feature_filter,
-                      feature_re,  # type: Pattern[str]
-                      x=None,  # type: Any
-                      ):
-        # type: (...) -> Tuple[FeatureNames, Union[List[int], None]]
+                      feature_re: Pattern[str],
+                      x=None,
+                      ) -> tuple['FeatureNames', Union[list[int], None]]:
         if feature_re is not None and feature_filter:
             raise ValueError('pass either feature_filter or feature_re')
         if feature_re is not None:
@@ -158,8 +146,7 @@ def handle_filter(self,
         else:
             return self, None
 
-    def add_feature(self, feature):
-        # type: (Any) -> int
+    def add_feature(self, feature) -> int:
         """ Add a new feature name, return it's index.
         """
         # A copy of self.feature_names is always made, because it might be
@@ -179,8 +166,7 @@ def add_feature(self, feature):
         return idx
 
 
-def _all_feature_names(name):
-    # type: (Union[str, bytes, List[Dict]]) -> List[str]
+def _all_feature_names(name: Union[str, bytes, list[dict]]) -> list[str]:
     """ All feature names for a feature: usually just the feature itself,
     but can be several features for unhashed features with collisions.
     """
diff --git a/eli5/base.py b/eli5/base.py
index 3bac3b5b..bb072499 100644
--- a/eli5/base.py
+++ b/eli5/base.py
@@ -1,5 +1,4 @@
-# -*- coding: utf-8 -*-
-from typing import Any, List, Tuple, Union, Optional
+from typing import Union, Optional
 
 import numpy as np
 
@@ -17,19 +16,18 @@ class Explanation(object):
     it can either explain weights or a single prediction.
     """
     def __init__(self,
-                 estimator,  # type: str
-                 description=None,  # type: Optional[str]
-                 error=None,  # type: Optional[str]
-                 method=None,  # type: Optional[str]
-                 is_regression=False,  # type: bool
-                 targets=None,  # type: Optional[List[TargetExplanation]]
-                 feature_importances=None,  # type: Optional[FeatureImportances]
-                 decision_tree=None,  # type: Optional[TreeInfo]
-                 highlight_spaces=None,  # type: Optional[bool]
-                 transition_features=None,  # type: Optional[TransitionFeatureWeights]
-                 image=None, # type: Any
+                 estimator: str,
+                 description: Optional[str] = None,
+                 error: Optional[str] = None,
+                 method: Optional[str] = None,
+                 is_regression: bool = False,
+                 targets: Optional[list['TargetExplanation']] = None,
+                 feature_importances: Optional['FeatureImportances'] = None,
+                 decision_tree: Optional['TreeInfo'] = None,
+                 highlight_spaces: Optional[bool] = None,
+                 transition_features: Optional['TransitionFeatureWeights'] = None,
+                 image=None,
                  ):
-        # type: (...) -> None
         self.estimator = estimator
         self.description = description
         self.error = error
@@ -55,9 +53,8 @@ class FeatureImportances(object):
     """ Feature importances with number of remaining non-zero features.
     """
     def __init__(self, importances, remaining):
-        # type: (...) -> None
-        self.importances = importances  # type: List[FeatureWeight]
-        self.remaining = remaining  # type: int
+        self.importances: list[FeatureWeight] = importances
+        self.remaining: int = remaining
 
     @classmethod
     def from_names_values(cls, names, values, std=None, **kwargs):
@@ -75,14 +72,13 @@ class TargetExplanation(object):
     Spatial values are stored in the :heatmap: attribute.
     """
     def __init__(self,
-                 target,  # type: Union[str, int]
-                 feature_weights=None,  # type: Optional[FeatureWeights]
-                 proba=None,  # type: Optional[float]
-                 score=None,  # type: Optional[float]
-                 weighted_spans=None,  # type: Optional[WeightedSpans]
-                 heatmap=None, # type: Optional[np.ndarray]
+                 target: Union[str, int],
+                 feature_weights: Optional['FeatureWeights'] = None,
+                 proba: Optional[float] = None,
+                 score: Optional[float] = None,
+                 weighted_spans: Optional['WeightedSpans'] = None,
+                 heatmap: Optional[np.ndarray] = None,
                  ):
-        # type: (...) -> None
         self.target = target
         self.feature_weights = feature_weights
         self.proba = proba
@@ -92,7 +88,7 @@ def __init__(self,
 
 
 # List is currently used for unhashed features
-Feature = Union[str, List, FormattedFeatureName]
+Feature = Union[str, list, FormattedFeatureName]
 
 
 @attrs
@@ -103,12 +99,11 @@ class FeatureWeights(object):
     :pos_remaining: and :neg_remaining: attributes.
     """
     def __init__(self,
-                 pos,  # type: List[FeatureWeight]
-                 neg,  # type: List[FeatureWeight]
-                 pos_remaining=0,  # type: int
-                 neg_remaining=0,  # type: int
+                 pos: list['FeatureWeight'],
+                 neg: list['FeatureWeight'],
+                 pos_remaining: int = 0,
+                 neg_remaining: int = 0,
                  ):
-        # type: (...) -> None
         self.pos = pos
         self.neg = neg
         self.pos_remaining = pos_remaining
@@ -117,13 +112,7 @@ def __init__(self,
 
 @attrs
 class FeatureWeight(object):
-    def __init__(self,
-                 feature,  # type: Feature
-                 weight,  # type: float
-                 std=None,  # type: float
-                 value=None,  # type: Any
-                 ):
-        # type: (...) -> None
+    def __init__(self, feature: Feature, weight: float, std: Optional[float] = None, value=None):
         self.feature = feature
         self.weight = weight
         self.std = std
@@ -136,17 +125,16 @@ class WeightedSpans(object):
     object for each vectorizer, and other features not highlighted anywhere.
     """
     def __init__(self,
-                 docs_weighted_spans,  # type: List[DocWeightedSpans]
-                 other=None,  # type: FeatureWeights
+                 docs_weighted_spans: list['DocWeightedSpans'],
+                 other: Optional[FeatureWeights] = None,
                  ):
-        # type: (...) -> None
         self.docs_weighted_spans = docs_weighted_spans
         self.other = other
 
 
-WeightedSpan = Tuple[
+WeightedSpan = tuple[
     Feature,
-    List[Tuple[int, int]],  # list of spans (start, end) for this feature
+    list[tuple[int, int]],  # list of spans (start, end) for this feature
     float,  # feature weight
 ]
 
@@ -161,12 +149,11 @@ class DocWeightedSpans(object):
     and to False for word features.
     """
     def __init__(self,
-                 document,  # type: str
-                 spans,  # type: List[WeightedSpan]
-                 preserve_density=None,  # type: bool
-                 vec_name=None,  # type: str
+                 document: str,
+                 spans: list[WeightedSpan],
+                 preserve_density: Optional[bool] = None,
+                 vec_name: Optional[str] = None,
                  ):
-        # type: (...) -> None
         self.document = document
         self.spans = spans
         self.preserve_density = preserve_density
@@ -176,11 +163,7 @@ def __init__(self,
 @attrs
 class TransitionFeatureWeights(object):
     """ Weights matrix for transition features. """
-    def __init__(self,
-                 class_names,  # type: List[str]
-                 coef,
-                 ):
-        # type: (...) -> None
+    def __init__(self, class_names: list[str], coef):
         self.class_names = class_names
         self.coef = coef
 
@@ -191,13 +174,7 @@ class TreeInfo(object):
     the function to measure the quality of a split, :tree: holds all nodes
     of the tree, and :graphviz: is the tree rendered in graphviz .dot format.
     """
-    def __init__(self,
-                 criterion,  # type: str
-                 tree,  # type: NodeInfo
-                 graphviz,  # type: str
-                 is_classification, # type: bool
-                 ):
-        # type: (...) -> None
+    def __init__(self, criterion: str, tree: 'NodeInfo', graphviz: str, is_classification: bool):
         self.criterion = criterion
         self.tree = tree
         self.graphviz = graphviz
@@ -210,20 +187,19 @@ class NodeInfo(object):
     Pointers to left and right children are in :left: and :right: attributes.
     """
     def __init__(self,
-                 id,                 # type: int
-                 is_leaf,            # type: bool
+                 id: int,
+                 is_leaf: bool,
                  value,
                  value_ratio,
-                 impurity,           # type: float
-                 samples,            # type: int
-                 sample_ratio,       # type: float
-                 feature_name=None,  # type: str
-                 feature_id=None,    # type: int
-                 threshold=None,     # type: float
-                 left=None,          # type: NodeInfo
-                 right=None,         # type: NodeInfo
+                 impurity: float,
+                 samples: int,
+                 sample_ratio: float,
+                 feature_name: Optional[str] = None,
+                 feature_id: Optional[int] = None,
+                 threshold: Optional[float] = None,
+                 left: Optional['NodeInfo'] = None,
+                 right: Optional['NodeInfo'] = None,
                  ):
-        # type: (...) -> None
         self.id = id
         self.is_leaf = is_leaf
         self.value = value
diff --git a/eli5/formatters/as_dataframe.py b/eli5/formatters/as_dataframe.py
index 5b801e75..4d30dcf7 100644
--- a/eli5/formatters/as_dataframe.py
+++ b/eli5/formatters/as_dataframe.py
@@ -1,5 +1,5 @@
 from itertools import chain
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 import warnings
 
 import pandas as pd
@@ -12,8 +12,7 @@
 from eli5.base_utils import singledispatch
 
 
-def explain_weights_df(estimator, **kwargs):
-    # type: (...) -> pd.DataFrame
+def explain_weights_df(estimator, **kwargs) -> pd.DataFrame:
     """ Explain weights and export them to ``pandas.DataFrame``.
     All keyword arguments are passed to :func:`eli5.explain_weights`.
     Weights of all features are exported by default.
@@ -23,8 +22,7 @@ def explain_weights_df(estimator, **kwargs):
         eli5.explain_weights(estimator, **kwargs))
 
 
-def explain_weights_dfs(estimator, **kwargs):
-    # type: (...) -> Dict[str, pd.DataFrame]
+def explain_weights_dfs(estimator, **kwargs) -> dict[str, pd.DataFrame]:
     """ Explain weights and export them to a dict with ``pandas.DataFrame``
     values (as :func:`eli5.formatters.as_dataframe.format_as_dataframes` does).
     All keyword arguments are passed to :func:`eli5.explain_weights`.
@@ -35,8 +33,7 @@ def explain_weights_dfs(estimator, **kwargs):
         eli5.explain_weights(estimator, **kwargs))
 
 
-def explain_prediction_df(estimator, doc, **kwargs):
-    # type: (...) -> pd.DataFrame
+def explain_prediction_df(estimator, doc, **kwargs) -> pd.DataFrame:
     """ Explain prediction and export explanation to ``pandas.DataFrame``
     All keyword arguments are passed to :func:`eli5.explain_prediction`.
     Weights of all features are exported by default.
@@ -46,8 +43,7 @@ def explain_prediction_df(estimator, doc, **kwargs):
         eli5.explain_prediction(estimator, doc, **kwargs))
 
 
-def explain_prediction_dfs(estimator, doc, **kwargs):
-    # type: (...) -> Dict[str, pd.DataFrame]
+def explain_prediction_dfs(estimator, doc, **kwargs) -> dict[str, pd.DataFrame]:
     """ Explain prediction and export explanation
     to a dict with ``pandas.DataFrame`` values
     (as :func:`eli5.formatters.as_dataframe.format_as_dataframes` does).
@@ -69,8 +65,7 @@ def _set_defaults(kwargs):
 _EXPORTED_ATTRIBUTES = ['transition_features', 'targets', 'feature_importances']
 
 
-def format_as_dataframes(explanation):
-    # type: (Explanation) -> Dict[str, pd.DataFrame]
+def format_as_dataframes(explanation: Explanation) -> dict[str, pd.DataFrame]:
     """ Export an explanation to a dictionary with ``pandas.DataFrame`` values
     and string keys that correspond to explanation attributes.
     Use this method if several dataframes can be exported from a single
@@ -90,8 +85,7 @@ def format_as_dataframes(explanation):
 
 
 @singledispatch
-def format_as_dataframe(explanation):
-    # type: (Explanation) -> Optional[pd.DataFrame]
+def format_as_dataframe(explanation) -> Optional[pd.DataFrame]:
     """ Export an explanation to a single ``pandas.DataFrame``.
     In case several dataframes could be exported by
     :func:`eli5.formatters.as_dataframe.format_as_dataframes`,
@@ -117,8 +111,7 @@ def format_as_dataframe(explanation):
 
 
 @format_as_dataframe.register(FeatureImportances)
-def _feature_importances_to_df(feature_importances):
-    # type: (FeatureImportances) -> pd.DataFrame
+def _feature_importances_to_df(feature_importances: FeatureImportances) -> pd.DataFrame:
     weights = feature_importances.importances
     df = pd.DataFrame(
         {'feature': [fw.feature for fw in weights],
@@ -133,12 +126,11 @@ def _feature_importances_to_df(feature_importances):
 
 
 @format_as_dataframe.register(list)
-def _targets_to_df(targets):
-    # type: (List[TargetExplanation]) -> pd.DataFrame
+def _targets_to_df(targets: list[TargetExplanation]) -> pd.DataFrame:
     if targets and not isinstance(targets[0], TargetExplanation):
         raise ValueError('Only lists of TargetExplanation are supported')
     columns = ['target', 'feature', 'weight', 'std', 'value']
-    df_data = {f: [] for f in columns}  # type: Dict[str, List[Any]]
+    df_data: dict[str, list[Any]] = {f: [] for f in columns}
     for target in targets:
         assert target.feature_weights is not None
         for fw in chain(target.feature_weights.pos,
@@ -156,8 +148,7 @@ def _targets_to_df(targets):
 
 
 @format_as_dataframe.register(TransitionFeatureWeights)
-def _transition_features_to_df(transition_features):
-    # type: (TransitionFeatureWeights) -> pd.DataFrame
+def _transition_features_to_df(transition_features: TransitionFeatureWeights) -> pd.DataFrame:
     class_names = list(transition_features.class_names)
     return pd.DataFrame(
         {'from': [f for f in class_names for _ in class_names],
diff --git a/eli5/formatters/as_dict.py b/eli5/formatters/as_dict.py
index fbad5ee5..880c457f 100644
--- a/eli5/formatters/as_dict.py
+++ b/eli5/formatters/as_dict.py
@@ -1,38 +1,12 @@
-import six
-
 import attr
-import numpy as np
 
-from .features import FormattedFeatureName
+from .utils import numpy_to_python
 
 
 def format_as_dict(explanation):
     """ Return a dictionary representing the explanation that can be JSON-encoded.
     It accepts parts of explanation (for example feature weights) as well.
     """
-    return _numpy_to_python(attr.asdict(explanation))
-
+    return numpy_to_python(attr.asdict(explanation))
 
-_numpy_string_types = (np.string_, np.unicode_) if six.PY2 else np.str_
 
-
-def _numpy_to_python(obj):
-    """ Convert an nested dict/list/tuple that might contain numpy objects
-    to their python equivalents. Return converted object.
-    """
-    if isinstance(obj, dict):
-        return {k: _numpy_to_python(v) for k, v in obj.items()}
-    elif isinstance(obj, (list, tuple, np.ndarray)):
-        return [_numpy_to_python(x) for x in obj]
-    elif isinstance(obj, FormattedFeatureName):
-        return obj.value
-    elif isinstance(obj, _numpy_string_types):
-        return six.text_type(obj)
-    elif hasattr(obj, 'dtype') and np.isscalar(obj):
-        if np.issubdtype(obj, np.floating):
-            return float(obj)
-        elif np.issubdtype(obj, np.integer):
-            return int(obj)
-        elif np.issubdtype(obj, np.bool_):
-            return bool(obj)
-    return obj
diff --git a/eli5/formatters/features.py b/eli5/formatters/features.py
index 021428c1..6750489e 100644
--- a/eli5/formatters/features.py
+++ b/eli5/formatters/features.py
@@ -1,13 +1,8 @@
-# -*- coding: utf-8 -*-
-
-import six
-
-
 class FormattedFeatureName(object):
     """ Feature name that does not need any additional escaping.
     """
     def __init__(self, value):
-        if not isinstance(value, six.string_types):
+        if not isinstance(value, str):
             raise TypeError('"value" must be a string, got {} instead'
                             .format(type(value)))
         self.value = value
diff --git a/eli5/formatters/image.py b/eli5/formatters/image.py
index f776b2c2..07fa2113 100644
--- a/eli5/formatters/image.py
+++ b/eli5/formatters/image.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 from typing import Union, Optional, Callable
 
 import numpy as np
@@ -9,12 +7,12 @@
 from eli5.base import Explanation
 
 
-def format_as_image(expl, # type: Explanation
-    resampling_filter=Image.LANCZOS, # type: int
-    colormap=matplotlib.cm.viridis, # type: Callable[[np.ndarray], np.ndarray]
-    alpha_limit=0.65, # type: Optional[Union[float, int]]
-    ):
-    # type: (...) -> Image
+def format_as_image(
+        expl: Explanation,
+        resampling_filter: int = Image.LANCZOS,  # type: ignore
+        colormap: Callable[[np.ndarray], np.ndarray] = matplotlib.cm.viridis,
+        alpha_limit: Optional[Union[float, int]] = 0.65,
+    ) -> Image.Image:
     """format_as_image(expl, resampling_filter=Image.LANCZOS, colormap=matplotlib.cm.viridis, alpha_limit=0.65)
 
     Format a :class:`eli5.base.Explanation` object as an image.
@@ -106,6 +104,7 @@ def format_as_image(expl, # type: Explanation
     else:
         assert len(expl.targets) == 1
         heatmap = expl.targets[0].heatmap
+        assert heatmap is not None
         _validate_heatmap(heatmap)
 
     # The order of our operations is: 1. colorize 2. resize
@@ -120,13 +119,12 @@ def format_as_image(expl, # type: Explanation
     # cap the intensity so that it's not too opaque when near maximum value
     _update_alpha(heatmap, starting_array=heatvals, alpha_limit=alpha_limit)
 
-    heatmap = expand_heatmap(heatmap, image, resampling_filter=resampling_filter)
-    overlay = _overlay_heatmap(heatmap, image)
+    heatmap_image = expand_heatmap(heatmap, image, resampling_filter=resampling_filter)
+    overlay = _overlay_heatmap(heatmap_image, image)
     return overlay
 
 
-def heatmap_to_image(heatmap):
-    # type: (np.ndarray) -> Image
+def heatmap_to_image(heatmap: np.ndarray) -> Image.Image:
     """
     Convert the numpy array ``heatmap`` to a Pillow image.
 
@@ -185,8 +183,7 @@ def _validate_heatmap(heatmap):
                          'and maximum: {}'.format(mi, ma))
 
 
-def _colorize(heatmap, colormap):
-    # type: (np.ndarray, Callable[[np.ndarray], np.ndarray]) -> np.ndarray
+def _colorize(heatmap: np.ndarray, colormap: Callable[[np.ndarray], np.ndarray]) -> np.ndarray:
     """
     Apply the ``colormap`` function to a grayscale 
     rank 2 ``heatmap`` array (with float values in interval [0, 1]).
@@ -196,8 +193,10 @@ def _colorize(heatmap, colormap):
     return heatmap
 
 
-def _update_alpha(image_array, starting_array=None, alpha_limit=None):
-    # type: (np.ndarray, Optional[np.ndarray], Optional[Union[float, int]]) -> None
+def _update_alpha(
+        image_array: np.ndarray,
+        starting_array: Optional[np.ndarray] = None,
+        alpha_limit: Optional[Union[float, int]] = None) -> None:
     """
     Update the alpha channel values of an RGBA rank 3 ndarray ``image_array``,
     optionally creating the alpha channel from rank 2 ``starting_array``, 
@@ -218,8 +217,7 @@ def _update_alpha(image_array, starting_array=None, alpha_limit=None):
     image_array[:,:,3] = alpha
 
 
-def _cap_alpha(alpha_arr, alpha_limit):
-    # type: (np.ndarray, Union[None, float, int]) -> np.ndarray
+def _cap_alpha(alpha_arr: np.ndarray, alpha_limit: Union[None, float, int]) -> np.ndarray:
     """
     Limit the alpha values in ``alpha_arr``
     by setting the maximum alpha value to ``alpha_limit``.
@@ -239,8 +237,10 @@ def _cap_alpha(alpha_arr, alpha_limit):
                         'got: {}'.format(alpha_limit))
 
 
-def expand_heatmap(heatmap, image, resampling_filter=Image.LANCZOS):
-    # type: (np.ndarray, Image, Union[None, int]) -> Image
+def expand_heatmap(
+        heatmap: np.ndarray, image: Image.Image,
+        resampling_filter: Optional[int] = Image.LANCZOS,  # type: ignore
+        ) -> Image.Image:
     """
     Resize the ``heatmap`` image array to fit over the original ``image``,
     using the specified ``resampling_filter`` method.
@@ -271,14 +271,11 @@ def expand_heatmap(heatmap, image, resampling_filter=Image.LANCZOS):
     if not isinstance(image, Image.Image):
         raise TypeError('image must be a PIL.Image.Image instance. '
                         'Got: {}'.format(image))
-    heatmap = heatmap_to_image(heatmap)
     spatial_dimensions = (image.width, image.height)
-    heatmap = heatmap.resize(spatial_dimensions, resample=resampling_filter)
-    return heatmap
+    return heatmap_to_image(heatmap).resize(spatial_dimensions, resample=resampling_filter)
 
 
-def _overlay_heatmap(heatmap, image):
-    # type: (Image, Image) -> Image
+def _overlay_heatmap(heatmap: Image.Image, image: Image.Image) -> Image.Image:
     """
     Blend (combine) ``heatmap`` over ``image``, 
     using alpha channel values appropriately (must have mode `RGBA`).
@@ -286,4 +283,4 @@ def _overlay_heatmap(heatmap, image):
     """
     # note that the order of alpha_composite arguments matters
     overlayed_image = Image.alpha_composite(image, heatmap)
-    return overlayed_image
\ No newline at end of file
+    return overlayed_image
diff --git a/eli5/formatters/text.py b/eli5/formatters/text.py
index e6abb286..902b7a8d 100644
--- a/eli5/formatters/text.py
+++ b/eli5/formatters/text.py
@@ -1,9 +1,6 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 from itertools import chain
-import six
 from tabulate import tabulate
-from typing import List, Optional, Iterator
+from typing import Optional, Iterator
 
 from eli5.base import Explanation, FeatureImportances
 from . import fields
@@ -11,21 +8,20 @@
 from .utils import (
     format_signed, format_value, format_weight, has_any_values_for_weights,
     replace_spaces, should_highlight_spaces)
-from .utils import tabulate as eli5_tabulate
+from .utils import tabulate as eli5_tabulate, numpy_to_python
 from .trees import tree2text
 
 
-_PLUS_MINUS = "+-" if six.PY2 else "±"
-_ELLIPSIS = '...' if six.PY2 else '…'
-_SPACE = '_' if six.PY2 else '░'
+_PLUS_MINUS = "±"
+_ELLIPSIS = '…'
+_SPACE = '░'
 
 
-def format_as_text(expl,  # type: Explanation
+def format_as_text(expl: Explanation,
                    show=fields.ALL,
-                   highlight_spaces=None,  # type: Optional[bool]
-                   show_feature_values=False,  # type: bool
-                   ):
-    # type: (...) -> str
+                   highlight_spaces: Optional[bool] = None,
+                   show_feature_values: bool = False,
+                   ) -> str:
     """ Format explanation as text.
 
     Parameters
@@ -44,7 +40,7 @@ def format_as_text(expl,  # type: Explanation
         When True, feature values are shown along with feature contributions.
         Default is False.
 
-    show : List[str], optional
+    show : list[str], optional
         List of sections to show. Allowed values:
 
         * 'targets' - per-target feature weights;
@@ -59,7 +55,7 @@ def format_as_text(expl,  # type: Explanation
         ``INFO`` (method and description), ``WEIGHTS`` (all the rest),
         and ``ALL`` (all).
     """
-    lines = []  # type: List[str]
+    lines: list[str] = []
 
     if highlight_spaces is None:
         highlight_spaces = should_highlight_spaces(expl)
@@ -101,23 +97,20 @@ def format_as_text(expl,  # type: Explanation
     return '\n'.join(lines)
 
 
-def _method_lines(explanation):
-    # type: (Explanation) -> List[str]
+def _method_lines(explanation: Explanation) -> list[str]:
     return ['Explained as: {}'.format(explanation.method)]
 
 
-def _description_lines(explanation):
-    # type: (Explanation) -> List[str]
+def _description_lines(explanation: Explanation) -> list[str]:
     return [explanation.description or '']
 
 
-def _error_lines(explanation):
-    # type: (Explanation) -> List[str]
+def _error_lines(explanation: Explanation) -> list[str]:
     return ['Error: {}'.format(explanation.error)]
 
 
-def _feature_importances_lines(explanation, hl_spaces):
-    # type: (Explanation, Optional[bool]) -> Iterator[str]
+def _feature_importances_lines(
+        explanation: Explanation, hl_spaces: Optional[bool]) -> Iterator[str]:
     max_width = 0
     assert explanation.feature_importances is not None
     for line in _fi_lines(explanation.feature_importances, hl_spaces):
@@ -128,8 +121,9 @@ def _feature_importances_lines(explanation, hl_spaces):
             explanation.feature_importances.remaining, kind='', width=max_width)
 
 
-def _fi_lines(feature_importances, hl_spaces):
-    # type: (FeatureImportances, Optional[bool]) -> Iterator[str]
+def _fi_lines(
+        feature_importances: FeatureImportances, hl_spaces: Optional[bool],
+    ) -> Iterator[str]:
     for fw in feature_importances.importances:
         featname = _format_feature(fw.feature, hl_spaces)
         if fw.std or fw.weight:
@@ -147,14 +141,12 @@ def _fi_lines(feature_importances, hl_spaces):
             )
 
 
-def _decision_tree_lines(explanation):
-    # type: (Explanation) -> List[str]
+def _decision_tree_lines(explanation: Explanation) -> list[str]:
     assert explanation.decision_tree is not None
     return ["", tree2text(explanation.decision_tree)]
 
 
-def _transition_features_lines(explanation):
-    # type: (Explanation) -> List[str]
+def _transition_features_lines(explanation: Explanation) -> list[str]:
     tf = explanation.transition_features
     assert tf is not None
     return [
@@ -166,12 +158,11 @@ def _transition_features_lines(explanation):
     ]
 
 
-def _targets_lines(explanation,  # type: Explanation
-                   hl_spaces,  # type: Optional[bool]
-                   show_feature_values,  # type: bool
-                   explaining_prediction,  # type: bool
-                   ):
-    # type: (...) -> List[str]
+def _targets_lines(explanation: Explanation,
+                   hl_spaces: Optional[bool],
+                   show_feature_values: bool,
+                   explaining_prediction: bool,
+                   ) -> list[str]:
     lines = []
     assert explanation.targets is not None
     for target in explanation.targets:
@@ -181,7 +172,7 @@ def _targets_lines(explanation,  # type: Explanation
 
         header = "%s%r%s top features" % (
             'y=' if not explanation.is_regression else '',
-            target.target,
+            numpy_to_python(target.target),
             scores)
         lines.append(header)
 
@@ -228,8 +219,7 @@ def _targets_lines(explanation,  # type: Explanation
     return lines
 
 
-def _format_scores(proba, score):
-    # type: (Optional[float], Optional[float]) -> str
+def _format_scores(proba: Optional[float], score: Optional[float]) -> str:
     scores = []
     if proba is not None:
         scores.append("probability=%0.3f" % proba)
@@ -238,8 +228,7 @@ def _format_scores(proba, score):
     return ", ".join(scores)
 
 
-def _format_remaining(remaining, kind, width):
-    # type: (int, str, int) -> str
+def _format_remaining(remaining: int, kind: str, width: int) -> str:
     s = '{ellipsis} {remaining} more {kind}{ellipsis}'.format(
         ellipsis=_ELLIPSIS,
         remaining=remaining,
@@ -248,8 +237,7 @@ def _format_remaining(remaining, kind, width):
     return ('{:^%d}' % width).format(s)
 
 
-def _format_feature(name, hl_spaces):
-    # type: (...) -> str
+def _format_feature(name, hl_spaces) -> str:
     if isinstance(name, bytes):
         name = name.decode('utf8')
     if isinstance(name, FormattedFeatureName):
@@ -261,16 +249,14 @@ def _format_feature(name, hl_spaces):
         return _format_single_feature(name, hl_spaces=hl_spaces)
 
 
-def _format_single_feature(feature, hl_spaces):
-    # type: (str, bool) -> str
+def _format_single_feature(feature: str, hl_spaces: bool) -> str:
     if hl_spaces:
         return replace_spaces(feature, lambda n, _: _SPACE * n)
     else:
         return feature
 
 
-def _format_unhashed_feature(name, hl_spaces, sep=' | '):
-    # type: (List, bool, str) -> str
+def _format_unhashed_feature(name: list, hl_spaces: bool, sep=' | ') -> str:
     """
     Format feature name for hashed features.
     """
diff --git a/eli5/formatters/utils.py b/eli5/formatters/utils.py
index 2e6d2d39..7ade569b 100644
--- a/eli5/formatters/utils.py
+++ b/eli5/formatters/utils.py
@@ -1,8 +1,6 @@
-from __future__ import absolute_import
 from itertools import chain
 import re
-import six
-from typing import Any, Union, List, Dict, Callable, Match, Optional
+from typing import Any, Union, Callable, Match, Optional
 
 import numpy as np
 
@@ -10,8 +8,7 @@
 from .features import FormattedFeatureName
 
 
-def replace_spaces(s, replacer):
-    # type: (str, Callable[[int, str], str]) -> str
+def replace_spaces(s: str, replacer: Callable[[int, str], str]) -> str:
     """
     >>> replace_spaces('ab', lambda n, l: '_' * n)
     'ab'
@@ -24,8 +21,7 @@ def replace_spaces(s, replacer):
     >>> replace_spaces(' a b  ', lambda n, _: '0 0' * n)
     '0 0a0 0b0 00 0'
     """
-    def replace(m):
-        # type: (Match[str]) -> str
+    def replace(m: Match[str]) -> str:
         if m.start() == 0:
             side = 'left'
         elif m.end() == len(s):
@@ -37,11 +33,10 @@ def replace(m):
     return re.sub(r'[ ]+', replace, s)
 
 
-def format_signed(feature,  # type: Dict[str, Any]
-                  formatter=None,  # type: Callable[..., str]
+def format_signed(feature: dict[str, Any],
+                  formatter: Optional[Callable[..., str]]=None,
                   **kwargs
-                  ):
-    # type: (...) -> str
+                  ) -> str:
     """
     Format unhashed feature with sign.
 
@@ -53,14 +48,13 @@ def format_signed(feature,  # type: Dict[str, Any]
     '(-)" foo"'
     """
     txt = '' if feature['sign'] > 0 else '(-)'
-    name = feature['name']  # type: str
+    name: str = feature['name']
     if formatter is not None:
         name = formatter(name, **kwargs)
     return '{}{}'.format(txt, name)
 
 
-def should_highlight_spaces(explanation):
-    # type: (Explanation) -> bool
+def should_highlight_spaces(explanation: Explanation) -> bool:
     hl_spaces = bool(explanation.highlight_spaces)
     if explanation.feature_importances:
         hl_spaces = hl_spaces or any(
@@ -75,8 +69,7 @@ def should_highlight_spaces(explanation):
     return hl_spaces
 
 
-def _has_invisible_spaces(name):
-    # type: (Union[str, List[Dict], FormattedFeatureName]) -> bool
+def _has_invisible_spaces(name: Union[str, list[dict], FormattedFeatureName]) -> bool:
     if isinstance(name, FormattedFeatureName):
         return False
     elif isinstance(name, list):
@@ -85,8 +78,7 @@ def _has_invisible_spaces(name):
         return name.startswith(' ') or name.endswith(' ')
 
 
-def has_any_values_for_weights(explanation):
-    # type: (Explanation) -> bool
+def has_any_values_for_weights(explanation: Explanation) -> bool:
     if explanation.targets:
         return any(fw.value is not None
                    for t in explanation.targets
@@ -97,11 +89,10 @@ def has_any_values_for_weights(explanation):
         return False
 
 
-def tabulate(data,  # type: List[List[Any]]
-             header=None,  # type: Optional[List[Any]]
-             col_align=None,  # type: Union[str, List[str]]
-             ):
-    # type: (...) -> List[str]
+def tabulate(data: list[list[Any]],
+             header: Optional[list[Any]] = None,
+             col_align: Optional[Union[str, list[str]]] = None,
+             ) -> list[str]:
     """ Format data as a table without any fancy features.
     col_align: l/r/c or a list/string of l/r/c. l = left, r = right, c = center
     Return a list of strings (lines of the table).
@@ -118,7 +109,7 @@ def tabulate(data,  # type: List[List[Any]]
 
     if col_align is None:
         col_align = ['l'] * n_cols
-    elif isinstance(col_align, six.string_types) and len(col_align) == 1:
+    elif isinstance(col_align, str) and len(col_align) == 1:
         col_align = [col_align] * n_cols
     else:
         col_align = list(col_align)
@@ -130,7 +121,7 @@ def tabulate(data,  # type: List[List[Any]]
 
     if header:
         data = [header] + data
-    data = [[six.text_type(x) for x in row] for row in data]
+    data = [[str(x) for x in row] for row in data]
     col_width = [max(len(row[col_i]) for row in data) for col_i in range(n_cols)]
     if header:
         data.insert(1, ['-' * width for width in col_width])
@@ -141,16 +132,33 @@ def tabulate(data,  # type: List[List[Any]]
     return [line_tpl.format(*row) for row in data]
 
 
-def format_weight(value):
-    # type: (float) -> str
+def format_weight(value: float) -> str:
     return '{:+.3f}'.format(value)
 
 
-def format_value(value):
-    # type: (Optional[float]) -> str
+def format_value(value: Optional[float]) -> str:
     if value is None:
         return ''
     elif np.isnan(value):
         return 'Missing'
     else:
         return '{:.3f}'.format(value)
+
+
+def numpy_to_python(obj):
+    """ Convert an nested dict/list/tuple that might contain numpy objects
+    to their python equivalents. Return converted object.
+    """
+    if isinstance(obj, dict):
+        return {k: numpy_to_python(v) for k, v in obj.items()}
+    elif isinstance(obj, (list, tuple, np.ndarray)):
+        return [numpy_to_python(x) for x in obj]
+    elif isinstance(obj, FormattedFeatureName):
+        return obj.value
+    elif isinstance(obj, np.str_):
+        return str(obj)
+    elif hasattr(obj, 'dtype') and np.isscalar(obj):
+        if np.issubdtype(obj, np.floating): return float(obj)  # type: ignore
+        elif np.issubdtype(obj, np.integer): return int(obj)  # type: ignore
+        elif np.issubdtype(obj, np.bool_): return bool(obj)  # type: ignore
+    return obj
diff --git a/eli5/lightgbm.py b/eli5/lightgbm.py
index d3b886b5..0e6e72dd 100644
--- a/eli5/lightgbm.py
+++ b/eli5/lightgbm.py
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, division
 from collections import defaultdict
-from typing import DefaultDict, Any, Tuple, Optional
+from typing import DefaultDict, Optional, Union
 
 import numpy as np
 import lightgbm
@@ -166,6 +164,7 @@ def explain_prediction_lightgbm(
     if is_regression is None:
         raise ValueError('Please specify is_regression argument')
 
+    names: Union[list[str], np.ndarray]
     if is_regression:
         names = ['y']
     elif isinstance(lgb, lightgbm.Booster):
@@ -204,8 +203,7 @@ def get_score_weights(_label_id):
      )
 
 
-def _check_booster_args(lgb, is_regression=None):
-    # type: (Any, Optional[bool]) -> Tuple[lightgbm.Booster, Optional[bool]]
+def _check_booster_args(lgb, is_regression: Optional[bool] = None) -> tuple[lightgbm.Booster, Optional[bool]]:
     if isinstance(lgb, lightgbm.Booster):
         booster = lgb
         if is_regression is None:
@@ -332,7 +330,7 @@ def _get_prediction_feature_weights(booster, X, n_targets):
 
     res = []
     for target in range(n_targets):
-        feature_weights = defaultdict(float)  # type: DefaultDict[Optional[str], float]
+        feature_weights: DefaultDict[Optional[str], float] = defaultdict(float)
         for info, leaf_id in zip(tree_info[:, target], pred_leafs[:, target]):
             leaf_index, split_index = _get_leaf_split_indices(
                 info['tree_structure']
diff --git a/eli5/lightning.py b/eli5/lightning.py
index 2f648064..643c4695 100644
--- a/eli5/lightning.py
+++ b/eli5/lightning.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
 from lightning.impl.base import BaseEstimator
 from lightning import classification, regression
 from sklearn.multiclass import OneVsRestClassifier
diff --git a/eli5/lime/_vectorizer.py b/eli5/lime/_vectorizer.py
index 5356d6cd..ce1aca0b 100644
--- a/eli5/lime/_vectorizer.py
+++ b/eli5/lime/_vectorizer.py
@@ -1,6 +1,4 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from typing import Tuple, Callable, Dict, Optional, List
+from typing import Callable
 
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin
@@ -29,11 +27,10 @@ def transform(self, X):
         return np.ones(len(self.text_.tokens)).reshape((1, -1))
 
     def get_doc_weighted_spans(self,
-                               doc,              # type: str
-                               feature_weights,  # type: FeatureWeights
-                               feature_fn        # type: Callable[[str], str]
-                               ):
-        # type: (...) -> Tuple[Dict[Tuple[str, int], float], DocWeightedSpans]
+                               doc: str,
+                               feature_weights: FeatureWeights,
+                               feature_fn: Callable[[str], str],
+                               ) -> tuple[dict[tuple[str, int], float], DocWeightedSpans]:
         feature_weights_dict = _get_feature_weights_dict(feature_weights,
                                                          feature_fn)
         spans = []
@@ -53,11 +50,9 @@ def get_doc_weighted_spans(self,
         )
         return found_features, doc_weighted_spans
 
-    def _featname(self, idx, token):
-        # type: (int, str) -> str
+    def _featname(self, idx: int, token: str) -> str:
         return "[{}] {}".format(idx, token)
 
-    def get_feature_names(self):
-        # type: () -> List[str]
+    def get_feature_names_out(self) -> list[str]:
         return [self._featname(idx, token)
                 for idx, token in enumerate(self.text_.tokens)]
diff --git a/eli5/lime/lime.py b/eli5/lime/lime.py
index 2968da04..97cfe1ca 100644
--- a/eli5/lime/lime.py
+++ b/eli5/lime/lime.py
@@ -1,10 +1,8 @@
-# -*- coding: utf-8 -*-
 """
 An impementation of LIME (http://arxiv.org/abs/1602.04938), an algorithm to
 explain predictions of black-box models.
 """
-from __future__ import absolute_import
-from typing import Any, Callable, Dict, Optional
+from typing import Any, Callable, Optional
 
 import numpy as np
 from sklearn.feature_extraction.text import CountVectorizer
@@ -14,7 +12,6 @@
 from sklearn.base import clone, BaseEstimator
 
 import eli5
-from eli5.sklearn.utils import sklearn_version
 from eli5.lime.samplers import BaseSampler
 from eli5.lime.textutils import DEFAULT_TOKEN_PATTERN, CHAR_TOKEN_PATTERN
 from eli5.lime.samplers import MaskingTextSamplers
@@ -139,18 +136,17 @@ class TextExplainer(BaseEstimator):
         Only available after :func:`fit`.
     """
     def __init__(self,
-                 n_samples=5000,  # type: int
-                 char_based=None,  # type: bool
+                 n_samples: int = 5000,
+                 char_based: Optional[bool] = None,
                  clf=None,
                  vec=None,
-                 sampler=None,  # type: BaseSampler
-                 position_dependent=False,  # type: bool
-                 rbf_sigma=None,  # type: float
+                 sampler: Optional[BaseSampler] = None,
+                 position_dependent: bool = False,
+                 rbf_sigma: Optional[float] = None,
                  random_state=None,
-                 expand_factor=10,  # type: Optional[int]
-                 token_pattern=None,  # type: Optional[str]
-                 ):
-        # type: (...) -> None
+                 expand_factor: Optional[int] = 10,
+                 token_pattern: Optional[str] = None,
+                 ) -> None:
         self.n_samples = n_samples
         self.random_state = random_state
         self.expand_factor = expand_factor
@@ -161,8 +157,8 @@ def __init__(self,
 
         if char_based is None:
             if token_pattern is None:
-                self.char_based = False  # type: Optional[bool]
-                self.token_pattern = DEFAULT_TOKEN_PATTERN  # type: str
+                self.char_based: Optional[bool] = False
+                self.token_pattern: str = DEFAULT_TOKEN_PATTERN
             else:
                 self.char_based = None
                 self.token_pattern = token_pattern
@@ -203,11 +199,7 @@ def __init__(self,
                     )
             self.vec = vec
 
-    def fit(self,
-            doc,             # type: str
-            predict_proba,   # type: Callable[[Any], Any]
-            ):
-        # type: (...) -> TextExplainer
+    def fit(self, doc: str, predict_proba: Callable[[Any], Any]) -> 'TextExplainer':
         """
         Explain ``predict_proba`` probabilistic classification function
         for the ``doc`` example. This method fits a local classification
@@ -320,26 +312,23 @@ def _fix_target_names(self, kwargs):
 
     def _default_clf(self):
         kwargs = dict(
-            loss='log',
+            loss='log_loss',
             penalty='elasticnet',
             alpha=1e-3,
-            random_state=self.rng_
+            random_state=self.rng_,
+            tol=1e-3,
         )
-        if sklearn_version() >= '0.19':
-            kwargs['tol'] = 1e-3
         return SGDClassifier(**kwargs)
 
 
-
 def _train_local_classifier(estimator,
                             samples,
-                            similarity,        # type: np.ndarray
-                            y_proba,           # type: np.ndarray
-                            expand_factor=10,  # type: Optional[int]
-                            test_size=0.3,     # type: float
+                            similarity: np.ndarray,
+                            y_proba: np.ndarray,
+                            expand_factor: Optional[int] = 10,
+                            test_size: float = 0.3,
                             random_state=None,
-                            ):
-    # type: (...) -> Dict[str, float]
+                            ) -> dict[str, float]:
     rng = check_random_state(random_state)
 
     (X_train, X_test,
diff --git a/eli5/lime/samplers.py b/eli5/lime/samplers.py
index 2475f883..5de546da 100644
--- a/eli5/lime/samplers.py
+++ b/eli5/lime/samplers.py
@@ -1,9 +1,6 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 import abc
 from functools import partial
-from typing import List, Tuple, Any, Union, Dict, Optional
-import six
+from typing import Any, Union, Optional
 
 import numpy as np
 from sklearn.base import BaseEstimator, clone
@@ -17,8 +14,7 @@
 from .textutils import generate_samples, DEFAULT_TOKEN_PATTERN, TokenizedText
 
 
-@six.add_metaclass(abc.ABCMeta)
-class BaseSampler(BaseEstimator):
+class BaseSampler(BaseEstimator, metaclass=abc.ABCMeta):
     """
     Base sampler class.
     Sampler is an object which generates examples similar to a given example.
@@ -67,15 +63,14 @@ class MaskingTextSampler(BaseSampler):
         Default is 1, meaning individual tokens are replaced.
     """
     def __init__(self,
-                 token_pattern=None,  # type: Optional[str]
-                 bow=True,            # type: bool
+                 token_pattern: Optional[str] = None,
+                 bow: bool = True,
                  random_state=None,
-                 replacement='',      # type: str
-                 min_replace=1,       # type: Union[int, float]
-                 max_replace=1.0,     # type: Union[int, float]
-                 group_size=1,        # type: int
+                 replacement: str = '',
+                 min_replace: Union[int, float] = 1,
+                 max_replace: Union[int, float] = 1.0,
+                 group_size: int = 1,
                  ):
-        # type: (...) -> None
         self.token_pattern = token_pattern or DEFAULT_TOKEN_PATTERN
         self.bow = bow
         self.random_state = random_state
@@ -85,18 +80,17 @@ def __init__(self,
         self.group_size = group_size
         self.rng_ = check_random_state(self.random_state)
 
-    def sample_near(self, doc, n_samples=1):
-        # type: (str, int) -> Tuple[List[str], np.ndarray]
+    def sample_near(self, doc: str, n_samples: int = 1) -> tuple[list[str], np.ndarray]:
         docs, similarities, mask, text = self.sample_near_with_mask(
             doc=doc, n_samples=n_samples
         )
         return docs, similarities
 
-    def sample_near_with_mask(self,
-                              doc,         # type: Union[TokenizedText, str]
-                              n_samples=1  # type: int
-                              ):
-        # type: (...) -> Tuple[List[str], np.ndarray, np.ndarray, TokenizedText]
+    def sample_near_with_mask(
+            self,
+            doc: Union[TokenizedText, str],
+            n_samples: int = 1,
+            ) -> tuple[list[str], np.ndarray, np.ndarray, TokenizedText]:
         if not isinstance(doc, TokenizedText):
             doc = TokenizedText(doc, token_pattern=self.token_pattern)
 
@@ -125,35 +119,33 @@ class MaskingTextSamplers(BaseSampler):
     with :class:`MaskingTextSampler` paremeters.
     """
     def __init__(self,
-                 sampler_params,      # type: List[Dict[str, Any]]
-                 token_pattern=None,  # type: Optional[str]
+                 sampler_params: list[dict[str, Any]],
+                 token_pattern: Optional[str] = None,
                  random_state=None,
-                 weights=None,        # type: Union[np.ndarray, List[float]]
+                 weights: Optional[Union[np.ndarray, list[float]]] = None,
                  ):
-        # type: (...) -> None
         self.random_state = random_state
         self.rng_ = check_random_state(random_state)
         self.token_pattern = token_pattern
         self.samplers = list(map(self._create_sampler, sampler_params))
+        self.weights: np.ndarray
         if weights is None:
             self.weights = np.ones(len(self.samplers))
         else:
             self.weights = np.array(weights)
         self.weights /= self.weights.sum()
 
-    def _create_sampler(self, extra):
-        # type: (Dict) -> MaskingTextSampler
-        params = dict(
+    def _create_sampler(self, extra: dict) -> MaskingTextSampler:
+        params: dict[str, Any] = dict(
             token_pattern=self.token_pattern,
             random_state=self.rng_,
-        )  # type: Dict[str, Any]
+        )
         params.update(extra)
         return MaskingTextSampler(**params)
 
-    def sample_near(self, doc, n_samples=1):
-        # type: (str, int) -> Tuple[List[str], np.ndarray]
+    def sample_near(self, doc: str, n_samples: int = 1) -> tuple[list[str], np.ndarray]:
         assert n_samples >= 1
-        all_docs = []  # type: List[str]
+        all_docs: list[str] = []  # type
         similarities = []
         for sampler, freq in self._sampler_n_samples(n_samples):
             docs, sims = sampler.sample_near(doc, n_samples=freq)
@@ -161,15 +153,13 @@ def sample_near(self, doc, n_samples=1):
             similarities.append(sims)
         return all_docs, np.hstack(similarities)
 
-    def sample_near_with_mask(self,
-                              doc,         # type: str
-                              n_samples=1  # type: int
-                              ):
-        # type: (...) -> Tuple[List[str], np.ndarray, np.ndarray, TokenizedText]
+    def sample_near_with_mask(
+            self, doc: str, n_samples: int = 1,
+            ) -> tuple[list[str], np.ndarray, np.ndarray, TokenizedText]:
         assert n_samples >= 1
         assert self.token_pattern is not None
         text = TokenizedText(doc, token_pattern=self.token_pattern)
-        all_docs = []  # type: List[str]
+        all_docs: list[str] = []
         similarities = []
         masks = []
         for sampler, freq in self._sampler_n_samples(n_samples):
@@ -222,8 +212,7 @@ def _get_grid(self):
         return GridSearchCV(self.kde, param_grid=param_grid, n_jobs=self.n_jobs,
                             cv=cv)
 
-    def _fit_kde(self, kde, X):
-        # type: (KernelDensity, np.ndarray) -> Tuple[GridSearchCV, KernelDensity]
+    def _fit_kde(self, kde: KernelDensity, X: np.ndarray) -> tuple[GridSearchCV, KernelDensity]:
         if self.fit_bandwidth:
             grid = self._get_grid()
             grid.fit(X)
@@ -253,7 +242,7 @@ class MultivariateKernelDensitySampler(_BaseKernelDensitySampler):
     It is a problem e.g. when features have different variances
     (e.g. some of them are one-hot encoded and other are continuous).
     """
-    def fit(self, X, y=None):
+    def fit(self, X=None, y=None):
         self.grid_, self.kde_ = self._fit_kde(self.kde, X)
         self._set_sigma(self.kde_.bandwidth)
         return self
@@ -280,9 +269,9 @@ class UnivariateKernelDensitySampler(_BaseKernelDensitySampler):
     Also, at sampling time it replaces only random subsets
     of the features instead of generating totally new examples.
     """
-    def fit(self, X, y=None):
-        self.kdes_ = []  # type: List[KernelDensity]
-        self.grids_ = []  # type: List[GridSearchCV]
+    def fit(self, X=None, y=None):
+        self.kdes_: list[KernelDensity] = []
+        self.grids_: list[GridSearchCV] = []
         num_features = X.shape[-1]
         for i in range(num_features):
             grid, kde = self._fit_kde(self.kde, X[:, i].reshape(-1, 1))
@@ -307,8 +296,8 @@ def sample_near(self, doc, n_samples=1):
                 kde = self.kdes_[i]
                 new_doc[i] = kde.sample(random_state=self.rng_).ravel()
             samples.append(new_doc)
-        samples = np.asarray(samples)
-        return samples, self._similarity(doc, samples)
+        samples_array = np.asarray(samples)
+        return samples_array, self._similarity(doc, samples_array)
 
 
 def _distances(doc, samples, metric):
diff --git a/eli5/lime/textutils.py b/eli5/lime/textutils.py
index 98da0428..362e23b6 100644
--- a/eli5/lime/textutils.py
+++ b/eli5/lime/textutils.py
@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
 """
 Utilities for text generation.
 """
-from __future__ import absolute_import
 import re
 import math
 from typing import List, Tuple, Union, Optional
@@ -14,22 +12,21 @@
 
 
 # the same as scikit-learn token pattern, but allows single-char tokens
-DEFAULT_TOKEN_PATTERN = r'(?u)\b\w+\b'
+DEFAULT_TOKEN_PATTERN = r'\b\w+\b'
 
 # non-whitespace chars
 CHAR_TOKEN_PATTERN = r'[^\s]'
 
 
-def generate_samples(text,                # type: TokenizedText
-                     n_samples=500,       # type: int
-                     bow=True,            # type: bool
+def generate_samples(text: 'TokenizedText',
+                     n_samples=500,
+                     bow=True,
                      random_state=None,
-                     replacement='',      # type: str
-                     min_replace=1,       # type: Union[int, float]
-                     max_replace=1.0,     # type: Union[int, float]
-                     group_size=1,        # type: int
-                     ):
-    # type: (...) -> Tuple[List[str], np.ndarray, np.ndarray]
+                     replacement='',
+                     min_replace=1.0,
+                     max_replace=1.0,
+                     group_size=1,
+                     ) -> Tuple[List[str], np.ndarray, np.ndarray]:
     """
     Return ``n_samples`` changed versions of text (with some words removed),
     along with distances between the original text and a generated
@@ -66,21 +63,19 @@ def cosine_similarity_vec(num_tokens, num_removed_vec):
 
 
 class TokenizedText(object):
-    def __init__(self, text, token_pattern=DEFAULT_TOKEN_PATTERN):
-        # type: (str, str) -> None
+    def __init__(self, text: str, token_pattern=DEFAULT_TOKEN_PATTERN):
         self.text = text
         self.split = SplitResult.fromtext(text, token_pattern)
-        self._vocab = None  # type: Optional[List[str]]
+        self._vocab: Optional[list[str]] = None
 
     def replace_random_tokens(self,
-                              n_samples,  # type: int
-                              replacement='',  # type: str
+                              n_samples: int,
+                              replacement='',
                               random_state=None,
-                              min_replace=1,  # type: Union[int, float]
-                              max_replace=1.0,  # type: Union[int, float]
-                              group_size=1  # type: int
-                              ):
-        # type: (...) -> List[Tuple[str, int, np.ndarray]]
+                              min_replace=1.0,
+                              max_replace=1.0,
+                              group_size=1,
+                              ) -> list[tuple[str, int, np.ndarray]]:
         """ 
         Return a list of ``(text, replaced_count, mask)``
         tuples with n_samples versions of text with some words replaced.
@@ -110,13 +105,12 @@ def replace_random_tokens(self,
         return res
     
     def replace_random_tokens_bow(self,
-                                  n_samples,  # type: int
-                                  replacement='',  # type: str
+                                  n_samples: int,
+                                  replacement='',
                                   random_state=None,
-                                  min_replace=1,  # type: Union[int, float]
-                                  max_replace=1.0, # type: Union[int, float]
-                                  ):
-        # type: (...) -> List[Tuple[str, int, np.ndarray]]
+                                  min_replace=1.0,
+                                  max_replace=1.0,
+                                  ) -> list[tuple[str, int, np.ndarray]]:
         """
         Return a list of ``(text, replaced_words_count, mask)`` tuples with
         n_samples versions of text with some words replaced.
@@ -144,11 +138,10 @@ def replace_random_tokens_bow(self,
         return res
 
     def _get_min_max(self,
-                     min_replace,  # type: Union[int, float]
-                     max_replace,  # type: Union[int, float]
-                     hard_maximum  # type: int
-                     ):
-        # type: (...) -> Tuple[int, int]
+                     min_replace: Union[int, float],
+                     max_replace: Union[int, float],
+                     hard_maximum: int,
+                     ) -> tuple[int, int]:
         if isinstance(min_replace, float):
             min_replace = int(math.floor(hard_maximum * min_replace)) or 1
         if isinstance(max_replace, float):
@@ -158,8 +151,7 @@ def _get_min_max(self,
         return min_replace, max_replace
 
     @property
-    def vocab(self):
-        # type: () -> List[str]
+    def vocab(self) -> list[str]:
         if self._vocab is None:
             self._vocab = sorted(set(self.tokens))
         return self._vocab
@@ -180,8 +172,7 @@ def __init__(self, parts):
         self.starts = self.lenghts.cumsum()
 
     @classmethod
-    def fromtext(cls, text, token_pattern=DEFAULT_TOKEN_PATTERN):
-        # type: (str, str) -> SplitResult
+    def fromtext(cls, text: str, token_pattern=DEFAULT_TOKEN_PATTERN) -> 'SplitResult':
         token_pattern = u"(%s)" % token_pattern
         parts = re.split(token_pattern, text)
         return cls(parts)
@@ -195,21 +186,17 @@ def tokens(self):
         return self.parts[1::2]
 
     @property
-    def token_spans(self):
-        # type: () -> List[Tuple[int, int]]
+    def token_spans(self) -> list[tuple[int, int]]:
         return list(zip(self.starts[::2], self.starts[1::2]))
 
-    def copy(self):
-        # type: () -> SplitResult
+    def copy(self) -> 'SplitResult':
         return self.__class__(self.parts.copy())
 
-    def masked(self, invmask, replacement=''):
-        # type: (Union[np.ndarray, List[int]], str) -> SplitResult
+    def masked(self, invmask: Union[np.ndarray, list[int]], replacement='') -> 'SplitResult':
         s = self.copy()
         s.tokens[invmask] = replacement
         return s
 
     @property
-    def text(self):
-        # type: () -> str
+    def text(self) -> str:
         return "".join(self.parts)
diff --git a/eli5/lime/utils.py b/eli5/lime/utils.py
index 120dbfbd..3e21722c 100644
--- a/eli5/lime/utils.py
+++ b/eli5/lime/utils.py
@@ -1,16 +1,11 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from typing import List, Any
-
 import numpy as np
 from scipy.stats import entropy
+from scipy.sparse import issparse
 from sklearn.pipeline import Pipeline
-from sklearn.utils import check_random_state, issparse
-from sklearn.utils.metaestimators import if_delegate_has_method
+from sklearn.utils import check_random_state
 from sklearn.utils import shuffle as _shuffle
 
 from eli5.utils import vstack
-from eli5.sklearn.utils import sklearn_version
 
 
 def fit_proba(clf, X, y_proba, expand_factor=10, sample_weight=None,
@@ -48,11 +43,8 @@ def with_sample_weight(clf, sample_weight, fit_params):
     return params
 
 
-def fix_multiclass_predict_proba(y_proba,          # type: np.ndarray
-                                 seen_classes,
-                                 complete_classes
-                                 ):
-    # type: (...) -> np.ndarray
+def fix_multiclass_predict_proba(
+        y_proba: np.ndarray, seen_classes, complete_classes) -> np.ndarray:
     """
     Add missing columns to predict_proba result.
 
@@ -70,22 +62,7 @@ def fix_multiclass_predict_proba(y_proba,          # type: np.ndarray
     return y_proba_fixed
 
 
-class _PipelinePatched(Pipeline):
-    # Patch from https://github.com/scikit-learn/scikit-learn/pull/7723;
-    # only needed for scikit-learn < 0.19.
-    @if_delegate_has_method(delegate='_final_estimator')
-    def score(self, X, y=None, **score_params):
-        Xt = X
-        for name, transform in self.steps[:-1]:
-            if transform is not None:
-                Xt = transform.transform(Xt)
-        return self.steps[-1][-1].score(Xt, y, **score_params)
-
-
 def score_with_sample_weight(estimator, X, y=None, sample_weight=None):
-    if sklearn_version() < '0.19':
-        if isinstance(estimator, Pipeline) and sample_weight is not None:
-            estimator = _PipelinePatched(estimator.steps)
     if sample_weight is None:
         return estimator.score(X, y)
     return estimator.score(X, y, sample_weight=sample_weight)
diff --git a/eli5/sklearn/_span_analyzers.py b/eli5/sklearn/_span_analyzers.py
index 75673440..2e4a594b 100644
--- a/eli5/sklearn/_span_analyzers.py
+++ b/eli5/sklearn/_span_analyzers.py
@@ -1,7 +1,4 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 import re
-from six.moves import xrange
 
 
 def build_span_analyzer(document, vec):
@@ -49,9 +46,9 @@ def _word_ngrams(vec, tokens, stop_words=None):
         original_tokens = tokens
         tokens = []
         n_original_tokens = len(original_tokens)
-        for n in xrange(min_n,
+        for n in range(min_n,
                         min(max_n + 1, n_original_tokens + 1)):
-            for i in xrange(n_original_tokens - n + 1):
+            for i in range(n_original_tokens - n + 1):
                 ngram_tokens = original_tokens[i: i + n]
                 tokens.append((
                     [s for s, _ in ngram_tokens],
@@ -65,8 +62,8 @@ def _char_ngrams(vec, text_document):
     text_len = len(text_document)
     ngrams = []
     min_n, max_n = vec.ngram_range
-    for n in xrange(min_n, min(max_n + 1, text_len + 1)):
-        for i in xrange(text_len - n + 1):
+    for n in range(min_n, min(max_n + 1, text_len + 1)):
+        for i in range(text_len - n + 1):
             ngrams.append(([(i, i + n)], text_document[i: i + n]))
     return ngrams
 
@@ -81,7 +78,7 @@ def _char_wb_ngrams(vec, text_document):
         w = m.group(0)
         w = ' ' + w + ' '
         w_len = len(w)
-        for n in xrange(min_n, max_n + 1):
+        for n in range(min_n, max_n + 1):
             offset = 0
             ngrams.append((
                 [(w_start + offset - 1, w_start + offset + n - 1)],
diff --git a/eli5/sklearn/explain_prediction.py b/eli5/sklearn/explain_prediction.py
index 18dcc36f..ec530dda 100644
--- a/eli5/sklearn/explain_prediction.py
+++ b/eli5/sklearn/explain_prediction.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 from functools import partial
 
 import numpy as np
@@ -12,6 +11,7 @@
     RandomForestClassifier,
     RandomForestRegressor,
 )
+from sklearn.ensemble._gb import _init_raw_predictions
 from sklearn.linear_model import (
     ElasticNet,  # includes Lasso, MultiTaskElasticNet, etc.
     ElasticNetCV,
@@ -53,7 +53,7 @@
 from eli5.base_utils import singledispatch
 from eli5.utils import (
     get_target_display_names,
-    get_binary_target_scale_label_id
+    get_binary_target_scale_label_id,
 )
 from eli5.sklearn.utils import (
     add_intercept,
@@ -61,6 +61,7 @@
     get_default_target_names,
     get_X,
     get_X0,
+    is_classifier,
     is_multiclass_classifier,
     is_multitarget_regressor,
     predict_proba,
@@ -581,9 +582,11 @@ def _trees_feature_weights(clf, X, feature_names, num_targets):
         if hasattr(clf, 'init_'):
             if clf.init_ == 'zero':
                 bias_init = 0
-            elif is_grad_boost and hasattr(clf.loss_, 'get_init_raw_predictions'):
-                bias_init = clf.loss_.get_init_raw_predictions(
-                    X, clf.init_).astype(np.float64)[0]
+            elif is_grad_boost:
+                bias_init_arr = _init_raw_predictions(
+                    X, clf.init_, clf._loss, is_classifier(clf)
+                )
+                bias_init = bias_init_arr.astype(np.float64)[0]
             else:
                 bias_init = clf.init_.predict(X)[0]
             feature_weights[feature_names.bias_idx] += bias_init
diff --git a/eli5/sklearn/explain_weights.py b/eli5/sklearn/explain_weights.py
index 49010fb4..099110ad 100644
--- a/eli5/sklearn/explain_weights.py
+++ b/eli5/sklearn/explain_weights.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
 import numpy as np
 
 from sklearn.base import BaseEstimator, RegressorMixin
@@ -38,7 +35,7 @@
     OneClassSVM,
 )
 # TODO: see https://github.com/scikit-learn/scikit-learn/pull/2250
-from sklearn.naive_bayes import BernoulliNB, MultinomialNB
+# from sklearn.naive_bayes import BernoulliNB, MultinomialNB
 from sklearn.ensemble import (
     GradientBoostingClassifier,
     GradientBoostingRegressor,
@@ -54,11 +51,10 @@
     DecisionTreeRegressor,
 )
 
-from eli5.base import (
-    Explanation, TargetExplanation, FeatureImportances)
+from eli5.base import Explanation, TargetExplanation
 from eli5.base_utils import singledispatch
 from eli5._feature_weights import get_top_features
-from eli5.utils import argsort_k_largest_positive, get_target_display_names
+from eli5.utils import get_target_display_names
 from eli5.sklearn.unhashing import handle_hashing_vec, is_invhashing
 from eli5.sklearn.treeinspect import get_tree_info
 from eli5.sklearn.utils import (
diff --git a/eli5/sklearn/permutation_importance.py b/eli5/sklearn/permutation_importance.py
index 0c68cca4..55f841d7 100644
--- a/eli5/sklearn/permutation_importance.py
+++ b/eli5/sklearn/permutation_importance.py
@@ -1,31 +1,30 @@
-# -*- coding: utf-8 -*-
 from functools import partial
-from typing import List
 
 import numpy as np
 from sklearn.model_selection import check_cv
 from sklearn.utils.metaestimators import available_if
 from sklearn.utils import check_array, check_random_state
-from sklearn.base import (
-    BaseEstimator,
-    MetaEstimatorMixin,
-    clone,
-    is_classifier
-)
+from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone
 from sklearn.metrics import check_scoring
 
 from eli5.permutation_importance import get_score_importances
-from eli5.sklearn.utils import pandas_available
+from eli5.sklearn.utils import pandas_available, is_classifier
 
 if pandas_available:
     import pandas as pd
 
-def _estimator_has(attr):
+def _wrapped_estimator_has(attr):
     def check(self):
         return hasattr(self.wrapped_estimator_, attr)
 
     return check
 
+def _estimator_has(attr):
+    def check(self):
+        return hasattr(self.estimator, attr)
+
+    return check
+
 CAVEATS_CV_NONE = """
 Feature importances are computed on the same data as used for training, 
 i.e. feature importances don't reflect importance of features for 
@@ -151,7 +150,6 @@ class PermutationImportance(BaseEstimator, MetaEstimatorMixin):
     """
     def __init__(self, estimator, scoring=None, n_iter=5, random_state=None,
                  cv='prefit', refit=True):
-        # type: (...) -> None
         if isinstance(cv, str) and cv != "prefit":
             raise ValueError("Invalid cv value: {!r}".format(cv))
         self.refit = refit
@@ -168,8 +166,7 @@ def pd_scorer(model, X, y):
             return base_scorer(model, X, y)
         return pd_scorer
 
-    def fit(self, X, y, groups=None, **fit_params):
-        # type: (...) -> PermutationImportance
+    def fit(self, X, y, groups=None, **fit_params) -> 'PermutationImportance':
         """Compute ``feature_importances_`` attribute and optionally
         fit the base estimator.
 
@@ -202,7 +199,7 @@ def fit(self, X, y, groups=None, **fit_params):
             self.estimator_ = clone(self.estimator)
             self.estimator_.fit(X, y, **fit_params)
 
-        X = check_array(X, force_all_finite='allow-nan')
+        X = check_array(X, ensure_all_finite='allow-nan')
 
         if self.cv not in (None, "prefit"):
             si = self._cv_scores_importances(X, y, groups=groups, **fit_params)
@@ -218,8 +215,8 @@ def fit(self, X, y, groups=None, **fit_params):
     def _cv_scores_importances(self, X, y, groups=None, **fit_params):
         assert self.cv is not None
         cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))
-        feature_importances = []  # type: List
-        base_scores = []  # type: List[float]
+        feature_importances: list = []
+        base_scores: list[float] = []
         weights = fit_params.pop('sample_weight', None)
         fold_fit_params = fit_params.copy()
         for train, test in cv.split(X, y, groups):
@@ -243,8 +240,7 @@ def _get_score_importances(self, score_func, X, y):
                                      random_state=self.rng_)
 
     @property
-    def caveats_(self):
-        # type: () -> str
+    def caveats_(self) -> str:
         if self.cv == 'prefit':
             return CAVEATS_PREFIT
         elif self.cv is None:
@@ -253,26 +249,30 @@ def caveats_(self):
 
     # ============= Exposed methods of a wrapped estimator:
 
-    @available_if(_estimator_has('score'))
+    @available_if(_wrapped_estimator_has('score'))
     def score(self, X, y=None, *args, **kwargs):
         return self.wrapped_estimator_.score(X, y, *args, **kwargs)
 
-    @available_if(_estimator_has('predict'))
+    @available_if(_wrapped_estimator_has('predict'))
     def predict(self, X):
         return self.wrapped_estimator_.predict(X)
 
-    @available_if(_estimator_has('predict_proba'))
+    @available_if(_wrapped_estimator_has('predict_proba'))
     def predict_proba(self, X):
         return self.wrapped_estimator_.predict_proba(X)
 
-    @available_if(_estimator_has('predict_log_proba'))
+    @available_if(_wrapped_estimator_has('predict_log_proba'))
     def predict_log_proba(self, X):
         return self.wrapped_estimator_.predict_log_proba(X)
 
-    @available_if(_estimator_has('decision_function'))
+    @available_if(_wrapped_estimator_has('decision_function'))
     def decision_function(self, X):
         return self.wrapped_estimator_.decision_function(X)
 
+    @available_if(_estimator_has('__sklearn_tags__'))
+    def __sklearn_tags__(self):
+        return self.estimator.__sklearn_tags__()
+
     @property
     def wrapped_estimator_(self):
         if self.cv == "prefit" or not self.refit:
diff --git a/eli5/sklearn/unhashing.py b/eli5/sklearn/unhashing.py
index f4f79b8d..4c08827c 100644
--- a/eli5/sklearn/unhashing.py
+++ b/eli5/sklearn/unhashing.py
@@ -1,14 +1,11 @@
-# -*- coding: utf-8 -*-
 """
 Utilities to reverse transformation done by FeatureHasher or HashingVectorizer.
 """
-from __future__ import absolute_import
 from collections import defaultdict, Counter
 from itertools import chain
-from typing import List, Iterable, Any, Dict, Tuple, Union
+from typing import Iterable, Union
 
 import numpy as np
-import six
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.feature_extraction.text import (
     HashingVectorizer,
@@ -30,7 +27,7 @@ class InvertableHashingVectorizer(BaseEstimator, TransformerMixin):
     Unlike HashingVectorizer it can be fit. During fitting
     :class:`~.InvertableHashingVectorizer` learns which input terms map to
     which feature columns/signs; this allows to provide more meaningful
-    :meth:`get_feature_names`. The cost is that it is no longer stateless.
+    :meth:`get_feature_names_out`. The cost is that it is no longer stateless.
 
     You can fit :class:`~.InvertableHashingVectorizer` on a random sample
     of documents (not necessarily on the whole training and testing data),
@@ -41,16 +38,14 @@ class InvertableHashingVectorizer(BaseEstimator, TransformerMixin):
 
     :meth:`transform` works the same as HashingVectorizer.transform.
     """
-    def __init__(self, vec,
-                 unkn_template="FEATURE[%d]"):
-        # type: (HashingVectorizer, str) -> None
+    def __init__(self, vec: HashingVectorizer, unkn_template="FEATURE[%d]"):
         self.vec = vec
         self.unkn_template = unkn_template
         self.unhasher = FeatureUnhasher(
             hasher=vec._get_hasher(),
             unkn_template=unkn_template,
         )
-        self.n_features = vec.n_features  # type: int
+        self.n_features: int = vec.n_features
 
     def fit(self, X, y=None):
         """ Extract possible terms from documents """
@@ -64,8 +59,7 @@ def partial_fit(self, X):
     def transform(self, X):
         return self.vec.transform(X)
 
-    def get_feature_names(self, always_signed=True):
-        # type: (bool) -> FeatureNames
+    def get_feature_names_out(self, always_signed=True) -> FeatureNames:
         """
         Return feature names.
         This is a best-effort function which tries to reconstruct feature
@@ -79,7 +73,7 @@ def get_feature_names(self, always_signed=True):
         unprocessed classifier coefficients, and always_signed=False
         if you've taken care of :attr:`column_signs_`.
         """
-        return self.unhasher.get_feature_names(
+        return self.unhasher.get_feature_names_out(
             always_signed=always_signed,
             always_positive=self._always_positive(),
         )
@@ -105,8 +99,7 @@ def column_signs_(self):
         self.unhasher.recalculate_attributes()
         return self.unhasher.column_signs_
 
-    def _always_positive(self):
-        # type: () -> bool
+    def _always_positive(self) -> bool:
         return (
             self.vec.binary
             or getattr(self.vec, 'non_negative', False)
@@ -118,32 +111,28 @@ class FeatureUnhasher(BaseEstimator):
     """
     Class for recovering a mapping used by FeatureHasher.
     """
-    def __init__(self, hasher, unkn_template="FEATURE[%d]"):
-        # type: (FeatureHasher, str) -> None
+    def __init__(self, hasher: FeatureHasher, unkn_template="FEATURE[%d]"):
         if hasher.input_type != 'string':
             raise ValueError("FeatureUnhasher only supports hashers with "
                              "input_type 'string', got %r." % hasher.input_type)
         self.hasher = hasher
-        self.n_features = self.hasher.n_features  # type: int
+        self.n_features: int = self.hasher.n_features
         self.unkn_template = unkn_template
         self._attributes_dirty = True
-        self._term_counts = Counter()  # type: Counter
+        self._term_counts: Counter[str] = Counter()
 
-    def fit(self, X, y=None):
-        # type: (Iterable[str], Any) -> FeatureUnhasher
+    def fit(self, X: Iterable[str], y=None) -> 'FeatureUnhasher':
         self._term_counts.clear()
         self.partial_fit(X, y)
         self.recalculate_attributes(force=True)
         return self
 
-    def partial_fit(self, X, y=None):
-        # type: (Iterable[str], Any) -> FeatureUnhasher
+    def partial_fit(self, X: Iterable[str], y=None) -> 'FeatureUnhasher':
         self._term_counts.update(X)
         self._attributes_dirty = True
         return self
 
-    def get_feature_names(self, always_signed=True, always_positive=False):
-        # type: (bool, bool) -> FeatureNames
+    def get_feature_names_out(self, always_signed=True, always_positive=False) -> FeatureNames:
         self.recalculate_attributes()
 
         # lists of names with signs of known features
@@ -155,7 +144,7 @@ def get_feature_names(self, always_signed=True, always_positive=False):
                                          for name in names]
             else:
                 if not always_signed and _invert_signs(signs):
-                    signs = [-sign for sign in signs]
+                    signs = -signs
                 feature_names[col_id] = [{'name': name, 'sign': sign}
                                          for name, sign in zip(names, signs)]
         return FeatureNames(
@@ -164,23 +153,18 @@ def get_feature_names(self, always_signed=True, always_positive=False):
             unkn_template=self.unkn_template)
 
     def recalculate_attributes(self, force=False):
-        # type: (bool) -> None
         """
         Update all computed attributes. It is only needed if you need to access
         computed attributes after :meth:`patrial_fit` was called.
         """
         if not self._attributes_dirty and not force:
             return
-        terms = [term for term, _ in self._term_counts.most_common()]
-        if six.PY2:
-            terms = np.array(terms, dtype=np.object)
-        else:
-            terms = np.array(terms)
+        terms = np.array([term for term, _ in self._term_counts.most_common()])
         if len(terms):
             indices, signs = _get_indices_and_signs(self.hasher, terms)
         else:
             indices, signs = np.array([]), np.array([])
-        self.terms_ = terms  # type: np.ndarray
+        self.terms_: np.ndarray = terms
         self.term_columns_ = indices
         self.term_signs_ = signs
         self.collisions_ = _get_collisions(indices)
@@ -197,8 +181,7 @@ def _get_column_signs(self):
                 colums_signs[hash_id] = 1
         return colums_signs
 
-    def _get_collision_info(self):
-        # type: () -> Tuple[List[int], List[np.ndarray], List[np.ndarray]]
+    def _get_collision_info(self) -> tuple[list[int], list[np.ndarray], list[np.ndarray]]:
         column_ids, term_names, term_signs = [], [], []
         for column_id, _term_ids in self.collisions_.items():
             column_ids.append(column_id)
@@ -207,13 +190,12 @@ def _get_collision_info(self):
         return column_ids, term_names, term_signs
 
 
-def _get_collisions(indices):
-    # type: (...) -> Dict[int, List[int]]
+def _get_collisions(indices) -> dict[int, list[int]]:
     """
     Return a dict ``{column_id: [possible term ids]}``
     with collision information.
     """
-    collisions = defaultdict(list)  # type: Dict[int, List[int]]
+    collisions: dict[int, list[int]] = defaultdict(list)
     for term_id, hash_id in enumerate(indices):
         collisions[hash_id].append(term_id)
     return dict(collisions)
@@ -247,12 +229,12 @@ def is_invhashing(vec):
 
 def handle_hashing_vec(vec, feature_names, coef_scale, with_coef_scale=True):
     """ Return feature_names and coef_scale (if with_coef_scale is True),
-    calling .get_feature_names for invhashing vectorizers.
+    calling .get_feature_names_out for invhashing vectorizers.
     """
     needs_coef_scale = with_coef_scale and coef_scale is None
     if is_invhashing(vec):
         if feature_names is None:
-            feature_names = vec.get_feature_names(always_signed=False)
+            feature_names = vec.get_feature_names_out(always_signed=False)
         if needs_coef_scale:
             coef_scale = vec.column_signs_
     elif (isinstance(vec, FeatureUnion) and
@@ -266,15 +248,15 @@ def handle_hashing_vec(vec, feature_names, coef_scale, with_coef_scale=True):
     return (feature_names, coef_scale) if with_coef_scale else feature_names
 
 
-def _invhashing_union_feature_names_scale(vec_union):
-    # type: (FeatureUnion) -> Tuple[FeatureNames, np.ndarray]
-    feature_names_store = {}  # type: Dict[int, Union[str, List]]
+def _invhashing_union_feature_names_scale(
+        vec_union: FeatureUnion) -> tuple[FeatureNames, np.ndarray]:
+    feature_names_store: dict[int, Union[str, list]] = {}
     unkn_template = None
     shift = 0
     coef_scale_values = []
     for vec_name, vec in vec_union.transformer_list:
         if isinstance(vec, InvertableHashingVectorizer):
-            vec_feature_names = vec.get_feature_names(always_signed=False)
+            vec_feature_names = vec.get_feature_names_out(always_signed=False)
             unkn_template = vec_feature_names.unkn_template
             for idx, fs in vec_feature_names.feature_names.items():
                 new_fs = []
@@ -286,7 +268,7 @@ def _invhashing_union_feature_names_scale(vec_union):
             coef_scale_values.append((shift, vec.column_signs_))
             shift += vec_feature_names.n_features
         else:
-            vec_feature_names = vec.get_feature_names()
+            vec_feature_names = vec.get_feature_names_out()
             feature_names_store.update(
                 (shift + idx, '{}__{}'.format(vec_name, fname))
                 for idx, fname in enumerate(vec_feature_names))
@@ -303,10 +285,8 @@ def _invhashing_union_feature_names_scale(vec_union):
 
 
 def invert_hashing_and_fit(
-        vec,  # type: Union[FeatureUnion, HashingVectorizer]
-        docs
-    ):
-    # type: (...) -> Union[FeatureUnion, InvertableHashingVectorizer]
+        vec: Union[FeatureUnion, HashingVectorizer], docs,
+    ) -> Union[FeatureUnion, InvertableHashingVectorizer]:
     """ Create an :class:`~.InvertableHashingVectorizer` from hashing
     vectorizer vec and fit it on docs. If vec is a FeatureUnion, do it for all
     hashing vectorizers in the union.
@@ -323,8 +303,7 @@ def invert_hashing_and_fit(
     return vec
 
 
-def _fit_invhashing_union(vec_union, docs):
-    # type: (FeatureUnion, Any) -> FeatureUnion
+def _fit_invhashing_union(vec_union: FeatureUnion, docs) -> FeatureUnion:
     """ Fit InvertableHashingVectorizer on doc inside a FeatureUnion.
     """
     return FeatureUnion(
diff --git a/eli5/sklearn/utils.py b/eli5/sklearn/utils.py
index 286d078a..7493b974 100644
--- a/eli5/sklearn/utils.py
+++ b/eli5/sklearn/utils.py
@@ -1,26 +1,41 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from distutils.version import LooseVersion
-from typing import Any, Optional, List, Tuple
+from typing import Any, Optional, Union
 
 import numpy as np
 import scipy.sparse as sp
+import sklearn.base
 from sklearn.multiclass import OneVsRestClassifier
 
 from eli5.sklearn.unhashing import invert_hashing_and_fit, handle_hashing_vec
 from eli5._feature_names import FeatureNames
 
 
-def is_multiclass_classifier(clf):
-    # type: (Any) -> bool
+def is_classifier(estimator):
+    try:
+        return sklearn.base.is_classifier(estimator)
+    except AttributeError:
+        # old xgboost < 2.0.0 is not compatible with new sklean here
+        try:
+            import xgboost
+        except ImportError:
+            pass
+        else:
+            if isinstance(estimator, xgboost.XGBClassifier):
+                return True
+            elif isinstance(estimator, (xgboost.XGBRanker, xgboost.XGBRegressor)):
+                return False
+        raise
+
+
+def is_multiclass_classifier(clf) -> bool:
     """
     Return True if a classifier is multiclass or False if it is binary.
     """
+    if isinstance(clf, OneVsRestClassifier):
+        return len(clf.estimators_) > 1
     return clf.coef_.shape[0] > 1
 
 
-def is_multitarget_regressor(clf):
-    # type: (Any) -> bool
+def is_multitarget_regressor(clf) -> bool:
     """
     Return True if a regressor is multitarget
     or False if it predicts a single target.
@@ -28,8 +43,7 @@ def is_multitarget_regressor(clf):
     return len(clf.coef_.shape) > 1 and clf.coef_.shape[0] > 1
 
 
-def is_probabilistic_classifier(clf):
-    # type: (Any) -> bool
+def is_probabilistic_classifier(clf) -> bool:
     """ Return True if a classifier can return probabilities """
     if not hasattr(clf, 'predict_proba'):
         return False
@@ -40,8 +54,7 @@ def is_probabilistic_classifier(clf):
     return True
 
 
-def predict_proba(estimator, X):
-    # type: (Any, Any) -> Optional[np.ndarray]
+def predict_proba(estimator, X) -> Optional[np.ndarray]:
     """ Return result of predict_proba, if an estimator supports it, or None.
     """
     if is_probabilistic_classifier(estimator):
@@ -54,34 +67,34 @@ def predict_proba(estimator, X):
         return None
 
 
-def has_intercept(estimator):
-    # type: (Any) -> bool
+def has_intercept(estimator) -> bool:
     """ Return True if an estimator has intercept fit. """
+    if isinstance(estimator, OneVsRestClassifier):
+        estimator = estimator.estimator
     if hasattr(estimator, 'fit_intercept'):
         return estimator.fit_intercept
     if hasattr(estimator, 'intercept_'):
         if estimator.intercept_ is None:
             return False
         # scikit-learn sets intercept to zero vector if it is not fit
-        return np.any(estimator.intercept_)
+        return bool(np.any(estimator.intercept_))
     return False
 
 
 def get_feature_names(clf, vec=None, bias_name='<BIAS>', feature_names=None,
-                      num_features=None, estimator_feature_names=None):
-    # type: (Any, Any, Optional[str], Any, int, Any) -> FeatureNames
+                      num_features=None, estimator_feature_names=None) -> FeatureNames:
     """
     Return a FeatureNames instance that holds all feature names
     and a bias feature.
-    If vec is None or doesn't have get_feature_names() method,
+    If vec is None or doesn't have get_feature_names_out() method,
     features are named x0, x1, x2, etc.
     """
     if not has_intercept(clf):
         bias_name = None
 
     if feature_names is None:
-        if vec and hasattr(vec, 'get_feature_names'):
-            return FeatureNames(vec.get_feature_names(), bias_name=bias_name)
+        if vec and hasattr(vec, 'get_feature_names_out'):
+            return FeatureNames(vec.get_feature_names_out(), bias_name=bias_name)
         else:
             if estimator_feature_names is None:
                 num_features = num_features or get_num_features(clf)
@@ -112,11 +125,11 @@ def get_feature_names(clf, vec=None, bias_name='<BIAS>', feature_names=None,
         return FeatureNames(feature_names, bias_name=bias_name)
 
 
-def get_feature_names_filtered(clf, vec=None, bias_name='<BIAS>',
-                               feature_names=None, num_features=None,
-                               feature_filter=None, feature_re=None,
-                               estimator_feature_names=None):
-    # type: (...) -> Tuple[FeatureNames, List[int]]
+def get_feature_names_filtered(
+        clf, vec=None, bias_name='<BIAS>',
+        feature_names=None, num_features=None,
+        feature_filter=None, feature_re=None,
+        estimator_feature_names=None) -> tuple[FeatureNames, list[int]]:
     feature_names = get_feature_names(
         clf=clf,
         vec=vec,
@@ -153,7 +166,13 @@ def get_coef(clf, label_id, scale=None):
     ``scale`` (optional) is a scaling vector; coef_[i] => coef[i] * scale[i] if
     scale[i] is not nan. Intercept is not scaled.
     """
-    if len(clf.coef_.shape) == 2:
+    if isinstance(clf, OneVsRestClassifier):
+        coef = clf.estimators_[label_id].coef_
+        if len(coef.shape) == 2 and coef.shape[0] == 1:
+            coef = coef[0]
+        if len(coef.shape) != 1:
+            raise ValueError(f'Unexpected coef shape: {coef.shape}')
+    elif len(clf.coef_.shape) == 2:
         # Most classifiers (even in binary case) and regressors
         coef = _dense_1d(clf.coef_[label_id])
     elif len(clf.coef_.shape) == 1:
@@ -166,7 +185,7 @@ def get_coef(clf, label_id, scale=None):
         # Lasso with one feature: 0D array
         coef = np.array([clf.coef_])
     else:
-        raise ValueError('Unexpected clf.coef_ shape: %s' % clf.coef_.shape)
+        raise ValueError(f'Unexpected coef shape: {clf.coef_.shape}')
 
     if scale is not None:
         if coef.shape != scale.shape:
@@ -180,7 +199,9 @@ def get_coef(clf, label_id, scale=None):
 
     if not has_intercept(clf):
         return coef
-    if label_id == 0 and not isinstance(clf.intercept_, np.ndarray):
+    if isinstance(clf, OneVsRestClassifier):
+        bias = clf.estimators_[label_id].intercept_
+    elif label_id == 0 and not isinstance(clf.intercept_, np.ndarray):
         bias = clf.intercept_
     else:
         bias = clf.intercept_[label_id]
@@ -221,6 +242,7 @@ def get_num_features(estimator):
 
 
 def get_X(doc, vec=None, vectorized=False, to_dense=False):
+    X: Union[np.ndarray, sp._base._spbase]
     if vec is None or vectorized:
         if isinstance(doc, np.ndarray):
             X = np.array([doc])
@@ -232,6 +254,7 @@ def get_X(doc, vec=None, vectorized=False, to_dense=False):
     else:
         X = vec.transform([doc])
     if to_dense and sp.issparse(X):
+        assert isinstance(X, sp._base._spbase)
         X = X.toarray()
     return X
 
@@ -247,8 +270,9 @@ def get_X0(X):
     return x
 
 
-def handle_vec(clf, doc, vec, vectorized, feature_names, num_features=None):
-    # type: (...) -> Tuple[Any, FeatureNames]
+def handle_vec(
+        clf, doc, vec, vectorized, feature_names, num_features=None,
+        ) -> tuple[Any, FeatureNames]:
     if not vectorized:
         vec = invert_hashing_and_fit(vec, [doc])
     if (vec is None and feature_names is None and
@@ -270,12 +294,3 @@ def add_intercept(X):
         return sp.hstack([X, intercept]).tocsr()
     else:
         return np.hstack([X, intercept])
-
-
-def sklearn_version():
-    """Return sklearn version object which can be used for comparison. Usage:
-    >>> sklearn_version() > '0.17'
-    True
-    """
-    from sklearn import __version__
-    return LooseVersion(__version__)
diff --git a/eli5/sklearn_crfsuite/explain_weights.py b/eli5/sklearn_crfsuite/explain_weights.py
index 6007efd7..702854ee 100644
--- a/eli5/sklearn_crfsuite/explain_weights.py
+++ b/eli5/sklearn_crfsuite/explain_weights.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
 import numpy as np
 from scipy import sparse as sp
 from sklearn_crfsuite import CRF
@@ -30,9 +27,9 @@ def explain_weights_sklearn_crfsuite(crf,
     transition_coef = crf_transition_coef(crf)
 
     if feature_filter is not None or feature_re is not None:
-        state_feature_names, flt_indices = (
+        state_feature_names_obj, flt_indices = (
             FeatureNames(feature_names).handle_filter(feature_filter, feature_re))
-        state_feature_names = np.array(state_feature_names.feature_names)
+        state_feature_names = np.array(state_feature_names_obj.feature_names)
         state_coef = state_coef[:, flt_indices]
     else:
         state_feature_names = feature_names
@@ -57,7 +54,7 @@ def _features(label_id):
             for label_id, label in zip(indices, names)
         ],
         transition_features=TransitionFeatureWeights(
-            class_names=names,
+            class_names=list(names),
             coef=transition_coef,
         ),
         estimator=repr(crf),
diff --git a/eli5/transform.py b/eli5/transform.py
index d79082a8..d89b52b2 100644
--- a/eli5/transform.py
+++ b/eli5/transform.py
@@ -12,7 +12,7 @@ def transform_feature_names(transformer, in_names=None):
     transformations for each class of transformer.
     
     If there is no ``singledispatch`` handler registered for a transformer 
-    class, ``transformer.get_feature_names()`` method is called; if there is
+    class, ``transformer.get_feature_names_out()`` method is called; if there is
     no such method then feature names are not supported and 
     this function raises an exception.
 
@@ -28,7 +28,7 @@ def transform_feature_names(transformer, in_names=None):
     -------
     feature_names : list of str
     """
-    if hasattr(transformer, 'get_feature_names'):
-        return transformer.get_feature_names()
+    if hasattr(transformer, 'get_feature_names_out'):
+        return transformer.get_feature_names_out()
     raise NotImplementedError('transform_feature_names not available for '
                               '{}'.format(transformer))
diff --git a/eli5/utils.py b/eli5/utils.py
index e5f669f8..2ff37566 100644
--- a/eli5/utils.py
+++ b/eli5/utils.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import numpy as np
 from scipy import sparse as sp
 
@@ -56,10 +55,10 @@ def is_sparse_vector(x):
 def indices_to_bool_mask(indices, size):
     """ Convert indices to a boolean (integer) mask.
 
-    >>> list(indices_to_bool_mask(np.array([2, 3]), 4))
+    >>> list(map(bool, indices_to_bool_mask(np.array([2, 3]), 4)))
     [False, False, True, True]
 
-    >>> list(indices_to_bool_mask([2, 3], 4))
+    >>> list(map(bool, indices_to_bool_mask([2, 3], 4)))
     [False, False, True, True]
 
     >>> indices_to_bool_mask(np.array([5]), 2)
diff --git a/eli5/xgboost.py b/eli5/xgboost.py
index 969cccf2..283ac228 100644
--- a/eli5/xgboost.py
+++ b/eli5/xgboost.py
@@ -1,11 +1,11 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from functools import partial
 import re
-from typing import Any, Dict, List, Tuple, Optional, Pattern
+import warnings
+from functools import partial
+from typing import Any, Optional, Pattern, Union
 
 import numpy as np
 import scipy.sparse as sp
+import xgboost
 from xgboost import (
     XGBClassifier,
     XGBRegressor,
@@ -41,7 +41,7 @@ def explain_weights_xgboost(xgb,
                             target_names=None,  # ignored
                             targets=None,  # ignored
                             feature_names=None,
-                            feature_re=None,  # type: Pattern[str]
+                            feature_re: Optional[Pattern[str]] = None,
                             feature_filter=None,
                             importance_type='gain',
                             ):
@@ -98,11 +98,11 @@ def explain_prediction_xgboost(
         target_names=None,
         targets=None,
         feature_names=None,
-        feature_re=None,  # type: Pattern[str]
+        feature_re: Optional[Pattern[str]] = None,
         feature_filter=None,
-        vectorized=False,  # type: bool
-        is_regression=None,  # type: bool
-        missing=None,  # type: bool
+        vectorized: bool = False,
+        is_regression: Optional[bool] = None,
+        missing: Optional[Any] = None,
         ):
     """ Return an explanation of XGBoost prediction (via scikit-learn wrapper
     XGBClassifier or XGBRegressor, or via xgboost.Booster) as feature weights.
@@ -149,13 +149,18 @@ def explain_prediction_xgboost(
     changes from parent to child.
     Weights of all features sum to the output score of the estimator.
     """
+    if not xgboost.__version__.startswith(('0.', '1.')):
+        warnings.warn(
+            'This explanation might be incoorrect, '
+            'only xgboost < 2.0.0 is known to work correctly')
+
     booster, is_regression = _check_booster_args(xgb, is_regression)
     xgb_feature_names = _get_booster_feature_names(booster)
     vec, feature_names = handle_vec(
         xgb, doc, vec, vectorized, feature_names,
         num_features=len(xgb_feature_names))
     if feature_names.bias_name is None:
-        # XGBoost estimators do not have an intercept, but here we interpret
+        # Some XGBoost estimators do not have an intercept, but here we interpret
         # them as having an intercept
         feature_names.bias_name = '<BIAS>'
 
@@ -171,7 +176,7 @@ def explain_prediction_xgboost(
 
     if isinstance(xgb, Booster):
         prediction = xgb.predict(dmatrix)
-        n_targets = prediction.shape[-1]  # type: int
+        n_targets: int = prediction.shape[-1]
         if is_regression is None:
             # When n_targets is 1, this can be classification too,
             # but it's safer to assume regression.
@@ -189,6 +194,7 @@ def explain_prediction_xgboost(
         proba = predict_proba(xgb, X)
         n_targets = _xgb_n_targets(xgb)
 
+    names: Union[list[str], np.ndarray]
     if is_regression:
         names = ['y']
     elif isinstance(xgb, Booster):
@@ -221,8 +227,7 @@ def explain_prediction_xgboost(
      )
 
 
-def _check_booster_args(xgb, is_regression=None):
-    # type: (Any, Optional[bool]) -> Tuple[Booster, Optional[bool]]
+def _check_booster_args(xgb, is_regression: Optional[bool] = None) -> tuple[Booster, Optional[bool]]:
     if isinstance(xgb, Booster):
         booster = xgb
     else:
@@ -309,8 +314,7 @@ def _indexed_leafs(parent):
     return indexed
 
 
-def _parent_value(children):
-    # type: (...) -> int
+def _parent_value(children) -> int:
     """ Value of the parent node: a weighted sum of child values.
     """
     covers = np.array([child['cover'] for child in children])
@@ -319,8 +323,7 @@ def _parent_value(children):
     return np.sum(leafs * covers)
 
 
-def _xgb_n_targets(xgb):
-    # type: (...) -> int
+def _xgb_n_targets(xgb) -> int:
     if isinstance(xgb, XGBClassifier):
         return 1 if xgb.n_classes_ == 2 else xgb.n_classes_
     elif isinstance(xgb, XGBRegressor):
@@ -344,13 +347,12 @@ def _xgb_feature_importances(booster, importance_type, feature_names):
     return all_features / all_features.sum()
 
 
-def _parse_tree_dump(text_dump):
-    # type: (str) -> Optional[Dict[str, Any]]
+def _parse_tree_dump(text_dump: str) -> Optional[dict[str, Any]]:
     """ Parse text tree dump (one item of a list returned by Booster.get_dump())
     into json format that will be used by next XGBoost release.
     """
     result = None
-    stack = []  # type: List[Dict]
+    stack: list[dict] = []
     for line in text_dump.split('\n'):
         if line:
             depth, node = _parse_dump_line(line)
@@ -368,8 +370,7 @@ def _parse_tree_dump(text_dump):
     return result
 
 
-def _parse_dump_line(line):
-    # type: (str) -> Tuple[int, Dict[str, Any]]
+def _parse_dump_line(line: str) -> tuple[int, dict[str, Any]]:
     branch_match = re.match(
         r'^(\t*)(\d+):\[([^<]+)<([^\]]+)\] '
         r'yes=(\d+),no=(\d+),missing=(\d+),'
diff --git a/requirements.txt b/requirements.txt
index a66f621d..6b1f7ff4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 numpy >= 1.9.0
 scipy
 singledispatch >= 3.4.0.3
-scikit-learn >= 0.20
+scikit-learn >= 1.0
 attrs > 16.0.0
 jinja2 >= 3.0.0
 pip >= 8.1
diff --git a/setup.py b/setup.py
index ee0b26fa..bbdd2877 100755
--- a/setup.py
+++ b/setup.py
@@ -35,12 +35,11 @@ def get_long_description():
         'jinja2 >= 3.0.0',
         'numpy >= 1.9.0',
         'scipy',
-        'six',
-        'scikit-learn >= 0.20',
+        'scikit-learn >= 1.6.0',
         'graphviz',
         'tabulate>=0.7.7',
     ],
-    python_requires=">=3.6",
+    python_requires=">=3.9",
     classifiers=[
         'Development Status :: 4 - Beta',
         'License :: OSI Approved :: MIT License',
@@ -48,10 +47,10 @@ def get_long_description():
         'Operating System :: OS Independent',
         'Programming Language :: Python',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
         'Programming Language :: Python :: 3.10',
+        'Programming Language :: Python :: 3.11',
+        'Programming Language :: Python :: 3.12',
+        'Programming Language :: Python :: 3.13',
     ],
 )
diff --git a/tests/test_formatters_as_dict.py b/tests/test_formatters_as_dict.py
index 3658719b..c5ce6fca 100644
--- a/tests/test_formatters_as_dict.py
+++ b/tests/test_formatters_as_dict.py
@@ -1,29 +1,13 @@
-import json
-
 import numpy as np
 
 from eli5.base import (
     Explanation, TargetExplanation, FeatureWeights, FeatureWeight)
-from eli5.formatters.as_dict import format_as_dict, _numpy_to_python
+from eli5.formatters.as_dict import format_as_dict
 
 
 # format_as_dict is called in eli5.tests.utils.format_as_all
 
 
-def test_numpy_to_python():
-    x = _numpy_to_python({
-        'x': np.int32(12),
-        'y': [np.ones(2)],
-        'z': {'inner': np.bool_(False)},
-    })
-    assert x == {
-        'x': 12,
-        'y': [[1.0, 1.0]],
-        'z': {'inner': False},
-    }
-    json.dumps(x)
-
-
 def test_format_as_dict():
     assert format_as_dict(Explanation(
         estimator='some estimator',
diff --git a/tests/test_formatters_utils.py b/tests/test_formatters_utils.py
index 2a974e8a..f9e466ea 100644
--- a/tests/test_formatters_utils.py
+++ b/tests/test_formatters_utils.py
@@ -1,6 +1,9 @@
+import json
+
+import numpy as np
 import pytest
 
-from eli5.formatters.utils import tabulate, format_value
+from eli5.formatters.utils import tabulate, format_value, numpy_to_python
 
 
 def test_tabulate():
@@ -53,3 +56,17 @@ def test_format_value():
     assert format_value(float('nan')) == 'Missing'
     assert format_value(12.23333334) == '12.233'
     assert format_value(-12.23333334) == '-12.233'
+
+
+def test_numpy_to_python():
+    x = numpy_to_python({
+        'x': np.int32(12),
+        'y': [np.ones(2)],
+        'z': {'inner': np.bool_(False)},
+    })
+    assert x == {
+        'x': 12,
+        'y': [[1.0, 1.0]],
+        'z': {'inner': False},
+    }
+    json.dumps(x)
diff --git a/tests/test_lightgbm.py b/tests/test_lightgbm.py
index da98951e..94091692 100644
--- a/tests/test_lightgbm.py
+++ b/tests/test_lightgbm.py
@@ -92,7 +92,7 @@ def test_explain_prediction_clf_multitarget(newsgroups_train):
     docs, ys, target_names = newsgroups_train
     from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
     stop_words = set(ENGLISH_STOP_WORDS) | {'does', 'just'}
-    vec = CountVectorizer(stop_words=stop_words, dtype=np.float64)
+    vec = CountVectorizer(stop_words=list(stop_words), dtype=np.float64)
     xs = vec.fit_transform(docs)
     clf = LGBMClassifier(n_estimators=100, max_depth=2,
                          min_child_samples=1, min_child_weight=1,
diff --git a/tests/test_lightning.py b/tests/test_lightning.py
index cebbb7a8..56c5a0fa 100644
--- a/tests/test_lightning.py
+++ b/tests/test_lightning.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
 import pytest
 pytest.importorskip('lightning')
 
@@ -53,11 +50,10 @@ def test_explain_predition_classifiers_binary(newsgroups_train_binary, clf):
 @pytest.mark.parametrize(['clf'], _instances(_CLASSIFIERS))
 def test_explain_weights_classifiers(newsgroups_train, clf):
     clf = OneVsRestClassifier(clf)
-    assert_explained_weights_linear_classifier(newsgroups_train, clf,
-                                               add_bias=True)
+    assert_explained_weights_linear_classifier(newsgroups_train, clf)
     if _CLASSIFIERS.index(type(clf.estimator)) == 0:
         assert_explained_weights_linear_classifier(
-            newsgroups_train, clf, add_bias=True,
+            newsgroups_train, clf,
             explain_weights=explain_weights_lightning)
 
 
diff --git a/tests/test_lime.py b/tests/test_lime.py
index 92d3d7f2..137e4048 100644
--- a/tests/test_lime.py
+++ b/tests/test_lime.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
 import numpy as np
 from sklearn.feature_extraction.text import HashingVectorizer, CountVectorizer
 from sklearn.naive_bayes import MultinomialNB
@@ -15,11 +12,7 @@
 
 def test_lime_explain_probabilistic(newsgroups_train):
     docs, y, target_names = newsgroups_train
-    try:
-        vec = HashingVectorizer(alternate_sign=False)
-    except TypeError:
-        # sklearn < 0.19
-        vec = HashingVectorizer(non_negative=True)
+    vec = HashingVectorizer(alternate_sign=False)
     clf = MultinomialNB()
 
     X = vec.fit_transform(docs)
@@ -154,7 +147,7 @@ def test_text_explainer_token_pattern():
     predict_proba = substring_presence_predict_proba('bar')
 
     # a different token_pattern
-    te = TextExplainer(token_pattern=r'(?u)\b[-\w]+\b')
+    te = TextExplainer(token_pattern=r'\b[-\w]+\b')
     te.fit(text, predict_proba)
     print(te.metrics_)
     assert te.metrics_['score'] > 0.95
diff --git a/tests/test_sklearn_explain_prediction.py b/tests/test_sklearn_explain_prediction.py
index 277fee46..957ad5b0 100644
--- a/tests/test_sklearn_explain_prediction.py
+++ b/tests/test_sklearn_explain_prediction.py
@@ -1,9 +1,6 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 from functools import partial
 from pprint import pprint
 import re
-from typing import List
 
 import pytest
 import numpy as np
@@ -275,12 +272,11 @@ def assert_predicted_class_used(clf, X):
     return assert_class_used(clf, X, y_pred)
 
 
-def assert_class_used(clf, X, y, **explain_kwargs):
-    # type: (...) -> List[Explanation]
+def assert_class_used(clf, X, y, **explain_kwargs) -> list[Explanation]:
     """ Check that classes y are used for explanations of X predictions """
     explanations = []
     for x, pred_target in zip(X, y):
-        res = explain_prediction(clf, x, **explain_kwargs)  # type: Explanation
+        res: Explanation = explain_prediction(clf, x, **explain_kwargs)
         explanations.append(res)
         assert len(res.targets) == 1
         if res.targets[0].score != 0:
@@ -339,11 +335,11 @@ def _assert_feature_filter_works(get_res, x):
 
 @pytest.mark.parametrize(['clf'], [
     [LogisticRegression(random_state=42)],
-    [LogisticRegression(random_state=42, multi_class='multinomial', solver='lbfgs')],
+    [LogisticRegression(random_state=42, solver='lbfgs')],
     [LogisticRegression(random_state=42, fit_intercept=False)],
     [LogisticRegressionCV(random_state=42)],
     [SGDClassifier(**SGD_KWARGS)],
-    [SGDClassifier(loss='log', **SGD_KWARGS)],
+    [SGDClassifier(loss='log_loss', **SGD_KWARGS)],
     [PassiveAggressiveClassifier(random_state=42)],
     [Perceptron(random_state=42)],
     [RidgeClassifier(random_state=42)],
diff --git a/tests/test_sklearn_explain_weights.py b/tests/test_sklearn_explain_weights.py
index 7f5469c2..407eb636 100644
--- a/tests/test_sklearn_explain_weights.py
+++ b/tests/test_sklearn_explain_weights.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 from functools import partial
 import re
 
@@ -107,26 +105,19 @@ def get_result():
         assert 'space' in pos
 
         pos, neg = _top('talk.religion.misc')
-        assert 'jesus' in pos or 'christians' in pos
+        assert 'jesus' in pos or 'christians' in pos or 'bible' in pos
 
     assert res == get_result()
 
 
 def assert_explained_weights_linear_classifier(
-        newsgroups_train, clf, add_bias=False, explain_weights=explain_weights,
+        newsgroups_train, clf, explain_weights=explain_weights,
         binary=False):
     docs, y, target_names = newsgroups_train
     vec = TfidfVectorizer()
     X = vec.fit_transform(docs)
-    if add_bias:
-        X = sp.hstack([X, np.ones((X.shape[0], 1))])
-        feature_names = vec.get_feature_names() + ['BIAS']
-    else:
-        feature_names = None
-
     clf.fit(X, y)
     check_newsgroups_explanation_linear(clf, vec, target_names,
-                                        feature_names=feature_names,
                                         explain_weights=explain_weights,
                                         binary=binary,
                                         top=(20, 20))
@@ -158,13 +149,13 @@ def assert_explained_weights_linear_regressor(boston_train, reg, has_bias=True):
 
 @pytest.mark.parametrize(['clf'], [
     [LogisticRegression(random_state=42)],
-    [LogisticRegression(random_state=42, multi_class='multinomial', solver='lbfgs')],
+    [LogisticRegression(random_state=42, solver='lbfgs')],
     [LogisticRegression(random_state=42, fit_intercept=False)],
     [LogisticRegressionCV(random_state=42)],
     [RidgeClassifier(random_state=42)],
     [RidgeClassifierCV()],
     [SGDClassifier(**SGD_KWARGS)],
-    [SGDClassifier(loss='log', **SGD_KWARGS)],
+    [SGDClassifier(loss='log_loss', **SGD_KWARGS)],
     [PassiveAggressiveClassifier(random_state=42)],
     [Perceptron(random_state=42)],
     [LinearSVC(random_state=42)],
@@ -281,7 +272,7 @@ def test_explain_linear_hashed_pos_neg(newsgroups_train, pass_feature_weights):
     if pass_feature_weights:
         res = explain_weights(
             clf, top=(10, 10), target_names=target_names,
-            feature_names=ivec.get_feature_names(always_signed=False),
+            feature_names=ivec.get_feature_names_out(always_signed=False),
             coef_scale=ivec.column_signs_)
     else:
         res = explain_weights(
@@ -481,14 +472,14 @@ def test_unsupported():
     [ElasticNetCV(random_state=42)],
     [HuberRegressor()],
     [Lars()],
-    [LarsCV(max_n_alphas=10)],
+    [LarsCV(max_n_alphas=100)],
     [Lasso(random_state=42)],
     [LassoCV(random_state=42)],
     [LassoLars(alpha=0.01)],
     [LassoLarsCV(max_n_alphas=10)],
     [LassoLarsIC()],
     [OrthogonalMatchingPursuit(n_nonzero_coefs=10)],
-    [OrthogonalMatchingPursuitCV()],
+    [OrthogonalMatchingPursuitCV(max_iter=10)],
     [PassiveAggressiveRegressor(C=0.1, random_state=42)],
     [Ridge(random_state=42)],
     [RidgeCV()],
diff --git a/tests/test_sklearn_permutation_importance.py b/tests/test_sklearn_permutation_importance.py
index 9848f4a7..45b606f6 100644
--- a/tests/test_sklearn_permutation_importance.py
+++ b/tests/test_sklearn_permutation_importance.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 import numpy as np
 from sklearn.base import is_classifier, is_regressor
diff --git a/tests/test_sklearn_transform.py b/tests/test_sklearn_transform.py
index 1d25466c..d1b2cc8c 100644
--- a/tests/test_sklearn_transform.py
+++ b/tests/test_sklearn_transform.py
@@ -6,9 +6,9 @@
 from sklearn.feature_selection import (
     SelectPercentile,
     SelectKBest,
-    SelectFpr,  # TODO: add tests and document
-    SelectFdr,  # TODO: add tests and document
-    SelectFwe,  # TODO: add tests and document
+    # SelectFpr,  # TODO: add tests and document
+    # SelectFdr,  # TODO: add tests and document
+    # SelectFwe,  # TODO: add tests and document
     GenericUnivariateSelect,
     VarianceThreshold,
     RFE,
@@ -16,17 +16,6 @@
     SelectFromModel,
 )
 from sklearn.linear_model import LogisticRegression
-_additional_test_cases = []
-try:
-    from sklearn.linear_model import (  # type: ignore
-        RandomizedLogisticRegression,
-        RandomizedLasso,  # TODO: add tests and document
-    )
-    _additional_test_cases.append(
-        (RandomizedLogisticRegression(random_state=42),
-         ['<NAME1>', '<NAME2>', '<NAME3>']))
-except ImportError:     # Removed in scikit-learn 0.21
-    pass
 from sklearn.preprocessing import (
     MinMaxScaler,
     StandardScaler,
@@ -46,7 +35,7 @@ def fit(self, X, y=None):
     def transform(self, X):
         return X[:, :3]
 
-    def get_feature_names(self):
+    def get_feature_names_out(self):
         return ['f1', 'f2', 'f3']
 
 
@@ -95,7 +84,7 @@ def selection_score_func(X, y):
      ['<NAME1>', '<NAME3>']),
     (RFECV(LogisticRegression(solver='liblinear', random_state=42, multi_class='ovr'), cv=3),
      ['<NAME0>', '<NAME1>', '<NAME2>', '<NAME3>']),
-] + _additional_test_cases)
+])
 def test_transform_feature_names_iris(transformer, expected, iris_train):
     X, y, _, _ = iris_train
     transformer.fit(X, y)
diff --git a/tests/test_sklearn_unhashing.py b/tests/test_sklearn_unhashing.py
index 51c3f174..83de1c3e 100644
--- a/tests/test_sklearn_unhashing.py
+++ b/tests/test_sklearn_unhashing.py
@@ -6,7 +6,6 @@
 from sklearn.feature_extraction.text import HashingVectorizer
 
 from eli5.sklearn.unhashing import InvertableHashingVectorizer
-from eli5.sklearn.utils import sklearn_version
 
 @pytest.mark.parametrize(
     ['always_signed', 'binary', 'alternate_sign'], [
@@ -22,12 +21,8 @@
 def test_invertable_hashing_vectorizer(always_signed, binary, alternate_sign):
     n_features = 8
     n_words = 4 * n_features
-    kwargs = dict(n_features=n_features, binary=binary)
-    if sklearn_version() < '0.19':
-        kwargs['non_negative'] = not alternate_sign
-    else:
-        kwargs['alternate_sign'] = alternate_sign
-    vec = HashingVectorizer(**kwargs)
+    vec = HashingVectorizer(
+        n_features=n_features, binary=binary, alternate_sign=alternate_sign)
     words = ['word_{}'.format(i) for i in range(n_words)]
     corpus = [w for i, word in enumerate(words, 1) for w in repeat(word, i)]
     split = len(corpus) // 2
@@ -49,7 +44,7 @@ def test_invertable_hashing_vectorizer(always_signed, binary, alternate_sign):
 
 
 def check_feature_names(vec, ivec, always_signed, corpus, alternate_sign):
-    feature_names = ivec.get_feature_names(always_signed=always_signed)
+    feature_names = ivec.get_feature_names_out(always_signed=always_signed)
     seen_words = set()
     counts = Counter(corpus)
     for idx, collisions in enumerate(feature_names):
diff --git a/tests/test_sklearn_utils.py b/tests/test_sklearn_utils.py
index b9a34904..96f45189 100644
--- a/tests/test_sklearn_utils.py
+++ b/tests/test_sklearn_utils.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
 import numpy as np
 import pytest
 from sklearn.datasets import make_classification, make_regression
diff --git a/tests/test_sklearn_vectorizers.py b/tests/test_sklearn_vectorizers.py
index 0575c8b1..47e8bd23 100644
--- a/tests/test_sklearn_vectorizers.py
+++ b/tests/test_sklearn_vectorizers.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 from pprint import pprint
 
 import attr
@@ -79,7 +77,7 @@ def test_explain_hashing_vectorizer(newsgroups_train_binary):
     assert res_vectorized == _without_weighted_spans(res)
 
     assert res == get_res(
-        feature_names=ivec.get_feature_names(always_signed=False))
+        feature_names=ivec.get_feature_names_out(always_signed=False))
 
 
 def _without_weighted_spans(res):
@@ -105,7 +103,7 @@ def test_explain_linear_dense():
     [test_day_vec] = vec.transform(test_day)
     res2 = explain_prediction(
         clf, test_day_vec, target_names=target_names,
-        vectorized=True, feature_names=vec.get_feature_names())
+        vectorized=True, feature_names=vec.get_feature_names_out())
     assert res1 == res2
 
 
diff --git a/tests/test_xgboost.py b/tests/test_xgboost.py
index 477ad147..7207fc26 100644
--- a/tests/test_xgboost.py
+++ b/tests/test_xgboost.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
 import pytest
 import numpy as np
 import scipy.sparse as sp
@@ -45,7 +42,7 @@ def test_explain_booster(newsgroups_train):
     vec = CountVectorizer()
     X = vec.fit_transform(docs)
     booster = xgboost.train(
-        params={'objective': 'multi:softprob', 'silent': True, 'max_depth': 3,
+        params={'objective': 'multi:softprob', 'max_depth': 3,
                 'num_class': len(target_names)},
         dtrain=xgboost.DMatrix(X, label=y, missing=np.nan),
         num_boost_round=10)
@@ -75,7 +72,7 @@ def test_explain_xgboost_regressor(boston_train):
 def test_explain_xgboost_booster(boston_train):
     xs, ys, feature_names = boston_train
     booster = xgboost.train(
-        params={'objective': 'reg:linear', 'silent': True},
+        params={'objective': 'reg:squarederror'},
         dtrain=xgboost.DMatrix(xs, label=ys),
     )
     res = explain_weights(booster)
@@ -97,9 +94,7 @@ def test_explain_prediction_clf_binary(
     explain_kwargs = {}
     if use_booster:
         clf = xgboost.train(
-            params={'objective': 'binary:logistic',
-                    'silent': True,
-                    'max_depth': 2},
+            params={'objective': 'binary:logistic', 'max_depth': 2},
             dtrain=xgboost.DMatrix(xs, label=ys, missing=missing),
             num_boost_round=100,
         )
@@ -159,7 +154,6 @@ def test_explain_prediction_clf_multitarget(
         clf = xgboost.train(
             params={'objective': 'multi:softprob',
                     'num_class': len(target_names),
-                    'silent': True,
                     'max_depth': 2},
             dtrain=xgboost.DMatrix(xs, label=ys, missing=np.nan),
             num_boost_round=100,
@@ -185,12 +179,14 @@ def test_explain_prediction_clf_multitarget(
         t.proba for t in res.targets)[-2:]
 
 
-def test_explain_prediction_clf_xor():
-    true_xs = [[np.random.randint(2), np.random.randint(2)] for _ in range(100)]
-    xs = np.array([[np.random.normal(x, 0.2), np.random.normal(y, 0.2)]
+@pytest.mark.parametrize('seed', [1, 2, 3])
+def test_explain_prediction_clf_xor(seed):
+    rng = np.random.RandomState(seed)
+    true_xs = [[rng.randint(2), rng.randint(2)] for _ in range(100)]
+    xs = np.array([[rng.normal(x, 0.2), rng.normal(y, 0.2)]
                    for x, y in true_xs])
     ys = np.array([x == y for x, y in true_xs])
-    clf = XGBClassifier(n_estimators=100, max_depth=2)
+    clf = XGBClassifier(n_estimators=100, max_depth=2, tree_method='exact')
     clf.fit(xs, ys)
     res = explain_prediction(clf, np.array([1, 1]))
     format_as_all(res, clf)
@@ -248,7 +244,7 @@ def test_explain_prediction_reg(boston_train):
 def test_explain_prediction_reg_booster(boston_train):
     X, y, feature_names = boston_train
     booster = xgboost.train(
-        params={'objective': 'reg:linear', 'silent': True, 'max_depth': 2},
+        params={'objective': 'reg:squarederror', 'max_depth': 2},
         dtrain=xgboost.DMatrix(X, label=y),
     )
     assert_trained_linear_regression_explained(
diff --git a/tests/utils.py b/tests/utils.py
index 5eb6c3a9..8eb31e5a 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function
 import os
 import inspect
 import json
@@ -13,12 +11,9 @@
 from eli5.formatters import format_as_text, format_as_html, format_as_dict
 from eli5.formatters.html import html_escape
 from eli5.formatters.text import format_signed
-from eli5.sklearn.utils import sklearn_version
 
 
-SGD_KWARGS = {'random_state': 42}
-if sklearn_version() >= '0.19':
-    SGD_KWARGS['tol'] = 1e-3
+SGD_KWARGS = {'random_state': 42, 'tol': 1e-3}
 
 
 def rnd_len_arrays(dtype, min_len=0, max_len=3, elements=None):
@@ -90,8 +85,7 @@ def get_names_coefs(feature_weights):
             for fw in feature_weights]
 
 
-def check_targets_scores(explanation, atol=1e-8):
-    # type: (Explanation, float) -> None
+def check_targets_scores(explanation: Explanation, atol: float = 1e-8) -> None:
     """ Check that feature weights sum to target score or proba,
     if both proba and score are present they match,
     and that there are no "remaining" features.
diff --git a/tox.ini b/tox.ini
index a5e3960d..9ddc5479 100644
--- a/tox.ini
+++ b/tox.ini
@@ -8,7 +8,7 @@
 
 [tox]
 ; if adding or removing an environment, please also update .github/workflows/python-package.yml
-envlist = docs,mypy,py36,py36-nodeps,py36-extra,py37,py38,py38-nodeps,py39,py39-nodeps
+envlist = docs,mypy,py39,py310,py310-nodeps,py310-extra,py311,py312,py313
 
 [base]
 deps=
@@ -28,90 +28,93 @@ deps=
     {[base]deps}
     ipython
     pandas
-
+    sklearn-crfsuite
 commands=
-    ; to install lightning numpy must be installed first
-    pip install joblib "sklearn-contrib-lightning >= 0.4"
     pip install -e .
-    bash _ci/runtests_default.sh {posargs: eli5 tests}
-    ; TODO once sklearn-crfsuite is compatible, use
-    ; bash _ci/runtests_default_with_crfsuite.sh {posargs: eli5 tests}
-
-
-[testenv:py36-extra]
-basepython=python3.6
+    py.test --doctest-modules \
+            --ignore eli5/xgboost.py \
+            --ignore eli5/lightgbm.py \
+            --ignore eli5/catboost.py \
+            --ignore eli5/keras \
+            --ignore eli5/lightning.py \
+            --ignore eli5/formatters/image.py \
+            --ignore tests/utils_image.py \
+            --cov=eli5 --cov-report=html --cov-report=term {posargs: eli5 tests}
+
+
+[testenv:py310-extra]
+basepython=python3.10
 deps=
     {[testenv]deps}
-    xgboost
+    xgboost < 2.0.0
     lightgbm != 2.0.5, != 2.0.6
     catboost
-    tensorflow
-    keras
+    # tensorflow
+    # keras
     matplotlib
     Pillow
-
 commands=
+    ; to install lightning numpy and Cython must be installed first
+    pip install Cython 'setuptools < 60.0'
+    pip install joblib "sklearn-contrib-lightning >= 0.4" --no-binary sklearn-contrib-lightning
     pip install -e .
     ; run tests for extra dependencies
-    bash _ci/runtests_extra.sh {posargs: eli5 tests}
-
-[testenv:py27-extra]
-basepython=python2.7
-deps=
-    {[testenv]deps}
-    xgboost
-    lightgbm < 3.2.0
-    catboost
-    tensorflow
-    keras
-    matplotlib
-    Pillow
-commands={[testenv:py36-extra]commands}
-
-[testenv:py36-nodeps]
+    py.test --doctest-modules \
+            --ignore tests/test_lime.py \
+            --ignore tests/test_formatters.py \
+            --ignore tests/test_samplers.py \
+            --ignore tests/test_sklearn_explain_prediction.py \
+            --ignore tests/test_sklearn_explain_weights.py \
+            --ignore tests/test_sklearn_vectorizers.py \
+            --ignore tests/test_utils.py \
+            --ignore eli5/lightning.py \
+            --ignore eli5/keras \
+            --cov=eli5 --cov-report=html --cov-report=term {posargs: eli5 tests}
+
+
+[testenv:py310-nodeps]
 deps=
     {[base]deps}
-
 commands=
-; without lightning as it is optional
     pip install -e .
-    bash _ci/runtests_nodeps.sh {posargs: eli5 tests}
-
-[testenv:py38-nodeps]
-basepython=python3.8
-deps={[base]deps}
-commands={[testenv:py36-nodeps]commands}
-
-
-[testenv:py39-nodeps]
-basepython=python3.9
-deps={[base]deps}
-commands={[testenv:py36-nodeps]commands}
+    py.test --doctest-modules \
+            --ignore eli5/lightning.py \
+            --ignore eli5/sklearn_crfsuite \
+            --ignore eli5/ipython.py \
+            --ignore eli5/xgboost.py \
+            --ignore eli5/lightgbm.py \
+            --ignore eli5/catboost.py \
+            --ignore eli5/keras \
+            --ignore eli5/formatters/as_dataframe.py \
+            --ignore eli5/formatters/image.py \
+            --ignore tests/utils_image.py \
+            --cov=eli5 --cov-report=html --cov-report=term {posargs: eli5 tests}
 
 
 [testenv:mypy]
-basepython=python3.6
+basepython=python3.10
 deps=
     {[testenv]deps}
-    mypy == 0.750
+    mypy == 1.15.0
+    types-tabulate
     lxml
 commands=
     mypy --html-report ./mypy-cov --check-untyped-defs --ignore-missing-imports eli5
 
 
 [testenv:docs]
+basepython=python3.10
 deps=
-    mock==1.0.1
-    pillow==8.3.1
-    alabaster>=0.7,<0.8,!=0.7.5
-    commonmark==0.8.1
-    recommonmark==0.5.0
-    sphinx<2
-    sphinx-rtd-theme<0.5
-    readthedocs-sphinx-ext<2.2
-    docutils < 0.17.0
+    mock==5.2.0
+    pillow==11.1.0
+    commonmark==0.9.1
+    recommonmark==0.7.1
+    sphinx==7.1.2
+    sphinx-rtd-theme==1.3.0rc1
+    readthedocs-sphinx-ext==2.2.5
     -rdocs/requirements.txt
 changedir=docs/source
 commands=
     pip install -e ../..
-    sphinx-build -W -b html . {envtmpdir}/html
+    ; TODO re-enable -W
+    sphinx-build -b html . {envtmpdir}/html