More sklearn-compatible metrics (Trusted-AI#290)

* add differential fairness metrics * Clean up bias scan * add aws metrics
Illia-Kryvoviaz · Sep 3, 2022 · 4c43483 · 4c43483
1 parent 805f44f
commit 4c43483
Show file tree

Hide file tree

Showing 14 changed files with 1,001 additions and 658 deletions.
diff --git a/aif360/metrics/mdss_classification_metric.py b/aif360/metrics/mdss_classification_metric.py
@@ -69,7 +69,7 @@ def score_groups(self, privileged=True, penalty=1e-17):
                 (observed better than predicted outcomes).
 
         Returns:
-            float: Bias score for the given group. 
+            float: Bias score for the given group.
                 The higher the score, the evidence for bias.
         """
         groups = self.privileged_groups if privileged else self.unprivileged_groups
@@ -89,16 +89,16 @@ def score_groups(self, privileged=True, penalty=1e-17):
         outcomes = pd.Series(self.dataset.labels.flatten() == self.dataset.favorable_label, dtype=int)
 
         # In MDSS, we look for subset whose observations systematically deviates from expectations.
-        # Positive direction means observations are systematically higher than expectations 
-        # (or expectations are systematically higher than observations) while 
+        # Positive direction means observations are systematically higher than expectations
+        # (or expectations are systematically higher than observations) while
         # Negative direction means observatons are systematically lower than expectations
         # (or expectations are systematically higher than observations)
 
-        # For a privileged group, we are looking for a subset whose expectations 
+        # For a privileged group, we are looking for a subset whose expectations
         # (where expectations is obtained from a model) is systematically higher than the observations.
         # This means we scan in the negative direction.
 
-        # For an uprivileged group, we are looking for a subset whose expectations 
+        # For an uprivileged group, we are looking for a subset whose expectations
         # (where expectations is obtained from a model) is systematically lower the observations.
         # This means we scan in the position direction.
 
@@ -141,16 +141,16 @@ def bias_scan(self, privileged=True, num_iters=10, penalty=1e-17):
         outcomes = pd.Series(self.dataset.labels.flatten() == self.dataset.favorable_label, dtype=int)
 
         # In MDSS, we look for subset whose observations systematically deviates from expectations.
-        # Positive direction means observations are systematically higher than expectations 
-        # (or expectations are systematically lower than observations) while 
+        # Positive direction means observations are systematically higher than expectations
+        # (or expectations are systematically lower than observations) while
         # Negative direction means observatons are systematically lower than expectations
         # (or expectations are systematically higher than observations)
 
-        # For a privileged group, we are looking for a subset whose expectations 
+        # For a privileged group, we are looking for a subset whose expectations
         # (where expectations is obtained from a model) is systematically higher than the observations.
         # This means we scan in the negative direction.
 
-        # For an uprivileged group, we are looking for a subset whose expectations 
+        # For an uprivileged group, we are looking for a subset whose expectations
         # (where expectations is obtained from a model) is systematically lower the observations.
         # This means we scan in the position direction.
 

diff --git a/aif360/sklearn/detectors/__init__.py b/aif360/sklearn/detectors/__init__.py
@@ -1 +1,8 @@
-from aif360.sklearn.detectors.detectors import bias_scan
+"""
+Methods for detecting subsets for which a model or dataset is biased.
+"""
+from aif360.sklearn.detectors.detectors import bias_scan
+
+__all__ = [
+    'bias_scan',
+]
diff --git a/aif360/sklearn/detectors/detectors.py b/aif360/sklearn/detectors/detectors.py
@@ -1,7 +1,7 @@
 from typing import Union
 
 from aif360.detectors import bias_scan
-from aif360.detectors.mdss import ScoringFunction
+from aif360.detectors.mdss.ScoringFunctions import ScoringFunction
 
 import pandas as pd
 
@@ -18,37 +18,37 @@ def bias_scan(
     mode: str = "binary",
     **kwargs,
 ):
-    """
-    scan to find the highest scoring subset of records (see demo_mdss_detector.ipynb for example usage)
+    """Scan to find the highest scoring subset of records.
 
-    :param X (dataframe): the dataset (containing the features) the model was trained on
-    :param y_true (series): ground truth (correct) target values
-    :param y_pred (series,  dataframe, optional): pandas series estimated targets
-        as returned by a model for binary, continuous and ordinal modes.
-        If mode is nominal, this is a dataframe with columns containing expectations/predictions for each nominal class.
-        If None, model is assumed to be a dumb model that predicts the mean of the targets
-                or 1/(num of categories) for nominal mode.
-    :param pos_label (str, float, optional): Should be high or low or float if the mode in [binary, ordinal, or continuous].
+    Args:
+        X (pandas.DataFrame): the dataset (containing the features) the model was trained on
+        y_true (pandas.Series): ground truth (correct) target values
+        y_pred (pandas.Series,  pandas.DataFrame, optional): pandas series estimated targets
+            as returned by a model for binary, continuous and ordinal modes.
+            If mode is nominal, this is a dataframe with columns containing expectations/predictions for each nominal class.
+            If None, model is assumed to be a dumb model that predicts the mean of the targets or 1/(num of categories) for nominal mode.
+        pos_label (str, float, optional): Should be high or low or float if the mode in [binary, ordinal, or continuous].
             If float, value has to be minimum or maximum in the y_true column. Defaults to high if None for these modes.
             Support for float left in to keep the intuition clear in binary classification tasks.
             If mode is nominal, favorable values should be one of the unique categories in the y_true column.
             Defaults to a one-vs-all scan if None for nominal mode.
-    :param overpredicted (bool, optional): flag for group to scan for.
-        True means we scan for a group whose expectations/predictions are systematically higher than observed.
-        In other words, True means we scan for a group whose observeed is systematically lower than the expectations.
-        False means we scan for a group whose expectations/predictions are systematically lower than observed.
-        In other words, False means we scan for a group whose observed is systematically higher than the expectations.
-    :param scoring (str or class): One of 'Bernoulli', 'Gaussian', 'Poisson', or 'BerkJones' or subclass of
-            :class:`aif360.metrics.mdss.ScoringFunctions.ScoringFunction`.
-    :param num_iters (int, optional): number of iterations (random restarts). Should be positive.
-    :param penalty (float,optional): penalty term. Should be positive. The penalty term as with any regularization parameter may need to be
-        tuned for ones use case. The higher the penalty, the less complex (number of features and feature values) the
-        highest scoring subset that gets returned is.
-    :param mode: one of ['binary', 'continuous', 'nominal', 'ordinal']. Defaults to binary.
+        overpredicted (bool, optional): flag for group to scan for.
+            True means we scan for a group whose expectations/predictions are systematically higher than observed.
+            In other words, True means we scan for a group whose observeed is systematically lower than the expectations.
+            False means we scan for a group whose expectations/predictions are systematically lower than observed.
+            In other words, False means we scan for a group whose observed is systematically higher than the expectations.
+        scoring (str or class): One of 'Bernoulli', 'Gaussian', 'Poisson', or 'BerkJones' or subclass of
+            :class:`aif360.detectors.mdss.ScoringFunctions.ScoringFunction`.
+        num_iters (int, optional): number of iterations (random restarts). Should be positive.
+        penalty (float,optional): penalty term. Should be positive. The penalty term as with any regularization parameter may need to be
+            tuned for ones use case. The higher the penalty, the less complex (number of features and feature values) the
+            highest scoring subset that gets returned is.
+        mode(str): one of ['binary', 'continuous', 'nominal', 'ordinal']. Defaults to binary.
             In nominal mode, up to 10 categories are supported by default.
             To increase this, pass in keyword argument max_nominal = integer value.
 
-     :returns: the highest scoring subset and the score or dict of the highest scoring subset and the score for each category in nominal mode
+     Returns:
+        tuple: The highest scoring subset and the score or dict of the highest scoring subset and the score for each category in nominal mode
     """
     return bias_scan(
         data=X,