Trusted-AI · hoffmansc · Nov 21, 2023 · Nov 21, 2023
diff --git a/aif360/metrics/mdss_classification_metric.py b/aif360/metrics/mdss_classification_metric.py
@@ -7,7 +7,6 @@
 from aif360.detectors.mdss.MDSS import MDSS
 
 import pandas as pd
-from sklearn.utils.deprecation import deprecated
 
 
 class MDSSClassificationMetric(ClassificationMetric):
@@ -116,52 +115,3 @@ def score_groups(self, privileged=True, penalty=1e-17):
         return scanner.score_current_subset(
             coordinates, expected, outcomes, dict(subset), penalty
         )
-
-    @deprecated('Change to new interface - aif360.detectors.mdss_detector.bias_scan by version 0.5.0.')
-    def bias_scan(self, privileged=True, num_iters=10, penalty=1e-17):
-        """
-        scan to find the highest scoring subset of records
-
-        :param privileged: flag for group to scan for - privileged group (True) or unprivileged group (False).
-        This abstract the need to explicitly specify the direction of bias to scan for which depends on what the favourable label is.
-        :param num_iters: number of iterations (random restarts)
-        :param penalty: penalty term. Should be positive. The penalty term as with any regularization parameter may need to be
-        tuned for ones use case. The higher the penalty, the less complex (number of features and feature values) the highest scoring
-        subset that gets returned is.
-
-        :returns: the highest scoring subset and the score
-        """
-
-        coordinates = pd.DataFrame(
-            self.classified_dataset.features,
-            columns=self.classified_dataset.feature_names,
-        )
-
-        expected = pd.Series(self.classified_dataset.scores.flatten())
-        outcomes = pd.Series(self.dataset.labels.flatten() == self.dataset.favorable_label, dtype=int)
-
-        # In MDSS, we look for subset whose observations systematically deviates from expectations.
-        # Positive direction means observations are systematically higher than expectations
-        # (or expectations are systematically lower than observations) while
-        # Negative direction means observatons are systematically lower than expectations
-        # (or expectations are systematically higher than observations)
-
-        # For a privileged group, we are looking for a subset whose expectations
-        # (where expectations is obtained from a model) is systematically higher than the observations.
-        # This means we scan in the negative direction.
-
-        # For an uprivileged group, we are looking for a subset whose expectations
-        # (where expectations is obtained from a model) is systematically lower the observations.
-        # This means we scan in the position direction.
-
-        self.kwargs['direction'] = "negative" if privileged else "positive"
-
-        if self.scoring == "Bernoulli":
-            scoring_function = Bernoulli(**self.kwargs)
-        elif self.scoring == "BerkJones":
-            scoring_function = BerkJones(**self.kwargs)
-        else:
-            scoring_function = self.scoring(**self.kwargs)
-
-        scanner = MDSS(scoring_function)
-        return scanner.scan(coordinates, expected, outcomes, penalty, num_iters)
diff --git a/aif360/sklearn/detectors/detectors.py b/aif360/sklearn/detectors/detectors.py
@@ -1,6 +1,6 @@
 from typing import Union
 
-from aif360.detectors import bias_scan
+from aif360.detectors import bias_scan as _bias_scan
 from aif360.detectors.mdss.ScoringFunctions import ScoringFunction
 
 import pandas as pd
@@ -50,7 +50,7 @@ def bias_scan(
      Returns:
         tuple: The highest scoring subset and the score or dict of the highest scoring subset and the score for each category in nominal mode
     """
-    return bias_scan(
+    return _bias_scan(
         data=X,
         observations=y_true,
         expectations=y_pred,
@@ -60,5 +60,5 @@ def bias_scan(
         num_iters=num_iters,
         penalty=penalty,
         mode=mode,
-        kwargs=kwargs
+        **kwargs
     )
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
@@ -1,5 +1,6 @@
 from itertools import permutations
 from typing import Union
+import warnings
 
 import numpy as np
 import pandas as pd
@@ -9,7 +10,6 @@
 from sklearn.metrics._classification import _prf_divide, _check_zero_division
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_X_y
-from sklearn.utils.deprecation import deprecated
 
 from aif360.metrics import ot_metric
 from aif360.sklearn.utils import check_inputs, check_groups
@@ -31,7 +31,7 @@
     'equal_opportunity_difference', 'average_odds_difference', 'average_predictive_value_difference',
     'average_odds_error', 'class_imbalance', 'kl_divergence',
     'conditional_demographic_disparity', 'smoothed_edf',
-    'df_bias_amplification', 'mdss_bias_scan', 'mdss_bias_score',
+    'df_bias_amplification', 'mdss_bias_score',
     # individual fairness
     'generalized_entropy_index', 'generalized_entropy_error',
     'between_group_generalized_entropy_error', 'theil_index',
@@ -946,7 +946,7 @@ def df_bias_amplification(y_true, y_pred, *, prot_attr=None, pos_label=1,
     return eps_pred - eps_true
 
 def mdss_bias_score(y_true, probas_pred, X=None, subset=None, *, pos_label=1,
-                    scoring='Bernoulli', privileged=True, penalty=1e-17,
+                    scoring='Bernoulli', overpredicted=True, penalty=1e-17,
                     **kwargs):
     """Compute the bias score for a prespecified group of records using a
     given scoring function.
@@ -966,10 +966,14 @@ def mdss_bias_score(y_true, probas_pred, X=None, subset=None, *, pos_label=1,
         scoring (str or class): One of 'Bernoulli' or 'BerkJones' or
             subclass of
             :class:`aif360.metrics.mdss.ScoringFunctions.ScoringFunction`.
-        privileged (bool): Flag for which direction to scan: privileged
-            (``True``) implies negative (observed worse than predicted outcomes)
-            while unprivileged (``False``) implies positive (observed better
-            than predicted outcomes).
+        overpredicted (bool): Flag for which direction to scan: `True` means we
+            scan for a group whose expectations/predictions are systematically
+            higher than observed. In other words, we scan for a group whose
+            observed is systematically lower than the expectations. `False`
+            means we scan for a group whose expectations/predictions are
+            systematically lower than observed (observed is systematically
+            higher than the expectations).
+        privileged (bool): Deprecated. Use overpredicted instead.
         penalty (scalar): Penalty coefficient. Should be positive. The higher
             the penalty, the less complex (number of features and feature
             values) the highest scoring subset that gets returned is.
@@ -991,7 +995,12 @@ def mdss_bias_score(y_true, probas_pred, X=None, subset=None, *, pos_label=1,
     expected = pd.Series(probas_pred).reset_index(drop=True)
     outcomes = pd.Series(y_true == pos_label, dtype=int).reset_index(drop=True)
 
-    direction = 'negative' if privileged else 'positive'
+    # TODO: DEPRECATED. Remove in next version.
+    if 'privileged' in kwargs:
+        warnings.warn("privileged is deprecated. Use overpredicted instead.",
+                      category=FutureWarning)
+        overpredicted = kwargs['privileged']
+    direction = 'negative' if overpredicted else 'positive'
     kwargs['direction'] = direction
 
     if scoring == 'Bernoulli':
@@ -1004,72 +1013,6 @@ def mdss_bias_score(y_true, probas_pred, X=None, subset=None, *, pos_label=1,
 
     return scanner.score_current_subset(X, expected, outcomes, subset or {}, penalty)
 
-@deprecated('Change to new interface - aif360.sklearn.detectors.mdss_detector.bias_scan by version 0.5.0.')
-def mdss_bias_scan(y_true, probas_pred, X=None, *, pos_label=1,
-                   scoring='Bernoulli', privileged=True, n_iter=10,
-                   penalty=1e-17, **kwargs):
-    """Scan to find the highest scoring subset of records.
-
-    Bias scan is a technique to identify bias in predictive models using subset
-    scanning [#zhang16]_.
-
-    Args:
-        y_true (array-like): Ground truth (correct) target values.
-        probas_pred (array-like): Probability estimates of the positive class.
-        X (dataframe, optional): The dataset (containing the features) that was
-            used to predict `probas_pred`. If not specified, the subset is
-            returned as indices.
-        pos_label (scalar): Label of the positive class.
-        scoring (str or class): One of 'Bernoulli' or 'BerkJones' or
-            subclass of
-            :class:`aif360.metrics.mdss.ScoringFunctions.ScoringFunction`.
-        privileged (bool): Flag for which direction to scan: privileged
-            (``True``) implies negative (observed worse than predicted outcomes)
-            while unprivileged (``False``) implies positive (observed better
-            than predicted outcomes).
-        n_iter (scalar): Number of iterations (random restarts).
-        penalty (scalar): Penalty coefficient. Should be positive. The higher
-            the penalty, the less complex (number of features and feature
-            values) the highest scoring subset that gets returned is.
-        **kwargs: Additional kwargs to be passed to `scoring` (not including
-            `direction`).
-
-    Returns:
-        tuple:
-            Highest scoring subset and its bias score
-
-            * **subset** (dict) -- Mapping of features to values defining the
-              highest scoring subset.
-            * **score** (float) -- Bias score for that group.
-
-    See also:
-        :func:`mdss_bias_score`
-
-    References:
-        .. [#zhang16] `Zhang, Z. and Neill, D. B., "Identifying significant
-           predictive bias in classifiers," arXiv preprint, 2016.
-           <https://arxiv.org/abs/1611.08292>`_
-    """
-    if X is None:
-        X = pd.DataFrame({'index': range(len(y_true))})
-    else:
-        X = X.reset_index(drop=True)  # match all indices
-
-    expected = pd.Series(probas_pred).reset_index(drop=True)
-    outcomes = pd.Series(y_true == pos_label, dtype=int).reset_index(drop=True)
-
-    direction = 'negative' if privileged else 'positive'
-    kwargs['direction'] = direction
-    if scoring == 'Bernoulli':
-        scoring_function = Bernoulli(**kwargs)
-    elif scoring == 'BerkJones':
-        scoring_function = BerkJones(**kwargs)
-    else:
-        scoring_function = scoring(**kwargs)
-    scanner = MDSS(scoring_function)
-
-    return scanner.scan(X, expected, outcomes, penalty, n_iter)
-
 
 # ========================== INDIVIDUAL FAIRNESS ===============================
 def generalized_entropy_index(b, alpha=2):

diff --git a/examples/demo_mdss_classifier_metric.ipynb b/examples/demo_mdss_classifier_metric.ipynb
@@ -44,9 +44,8 @@
     "import numpy as np\n",
     "import pandas as pd\n",
     "\n",
-    "from aif360.metrics import BinaryLabelDatasetMetric \n",
-    "from aif360.metrics.mdss_classification_metric import MDSSClassificationMetric\n",
-    "from aif360.detectors.mdss.ScoringFunctions.Bernoulli import Bernoulli\n",
+    "from aif360.metrics import BinaryLabelDatasetMetric, MDSSClassificationMetric\n",
+    "from aif360.detectors import bias_scan\n",
     "\n",
     "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_compas"
    ]
@@ -88,9 +87,9 @@
    "source": [
     "dataset_orig_df = pd.DataFrame(dataset_orig.features, columns=dataset_orig.feature_names)\n",
     "\n",
-    "age_cat = np.argmax(dataset_orig_df[['age_cat=Less than 25', 'age_cat=25 to 45', \n",
+    "age_cat = np.argmax(dataset_orig_df[['age_cat=Less than 25', 'age_cat=25 to 45',\n",
     "                                     'age_cat=Greater than 45']].values, axis=1).reshape(-1, 1)\n",
-    "priors_count = np.argmax(dataset_orig_df[['priors_count=0', 'priors_count=1 to 3', \n",
+    "priors_count = np.argmax(dataset_orig_df[['priors_count=0', 'priors_count=1 to 3',\n",
     "                                          'priors_count=More than 3']].values, axis=1).reshape(-1, 1)\n",
     "c_charge_degree = np.argmax(dataset_orig_df[['c_charge_degree=M', 'c_charge_degree=F']].values, axis=1).reshape(-1, 1)\n",
     "\n",
@@ -249,12 +248,12 @@
     }
    ],
    "source": [
-    "metric_train = BinaryLabelDatasetMetric(dataset_orig_train, \n",
+    "metric_train = BinaryLabelDatasetMetric(dataset_orig_train,\n",
     "                             unprivileged_groups=male_group,\n",
     "                             privileged_groups=female_group)\n",
     "\n",
     "print(\"Train set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_train.mean_difference())\n",
-    "metric_test = BinaryLabelDatasetMetric(dataset_orig_test, \n",
+    "metric_test = BinaryLabelDatasetMetric(dataset_orig_test,\n",
     "                             unprivileged_groups=male_group,\n",
     "                             privileged_groups=female_group)\n",
     "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_test.mean_difference())\n"
@@ -935,19 +934,14 @@
    "cell_type": "code",
    "execution_count": 22,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Function bias_scan is deprecated; Change to new interface - aif360.detectors.mdss_detector.bias_scan by version 0.5.0.\n",
-      "Function bias_scan is deprecated; Change to new interface - aif360.detectors.mdss_detector.bias_scan by version 0.5.0.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "privileged_subset = mdss_classified.bias_scan(penalty=0.5, privileged=True)\n",
-    "unprivileged_subset = mdss_classified.bias_scan(penalty=0.5, privileged=False)"
+    "privileged_subset = bias_scan(df.iloc[:, :-2], df.observed, df.probabilities,\n",
+    "                              favorable_value=dataset_orig_test.favorable_label,\n",
+    "                              penalty=0.5, overpredicted=True)\n",
+    "unprivileged_subset = bias_scan(df.iloc[:, :-2], df.observed, df.probabilities,\n",
+    "                                favorable_value=dataset_orig_test.favorable_label,\n",
+    "                                penalty=0.5, overpredicted=False)"
    ]
   },
   {
@@ -1024,7 +1018,7 @@
     "detected_privileged_groups = []\n",
     "for vals in subset_values:\n",
     "    detected_privileged_groups.append((dict(zip(privileged_subset[0].keys(), vals))))\n",
-    "    \n",
+    "\n",
     "a = list(unprivileged_subset[0].values())\n",
     "subset_values = list(itertools.product(*a))\n",
     "\n",
@@ -1047,11 +1041,11 @@
     }
    ],
    "source": [
-    "metric_bias_test = BinaryLabelDatasetMetric(dataset_bias_test, \n",
+    "metric_bias_test = BinaryLabelDatasetMetric(dataset_bias_test,\n",
     "                                             unprivileged_groups=detected_unprivileged_groups,\n",
     "                                             privileged_groups=detected_privileged_groups)\n",
     "\n",
-    "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\" \n",
+    "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\"\n",
     "      % metric_bias_test.mean_difference())"
    ]
   },