Enhancement/plant repr (#248)

* add a nice __repr__ for PlantData supported by tabulate * update repr formatting * add base metadata __repr__ schema * add nice reprs for all metadata * update changelog * update examplesout for changes to notebooks * add separate markdown display for notebook compatibility * update spacing in plant data and add docstrings * fix missing list initialization * move ipython to core requirements for markdown output * add ipython to requirements * add the __repr__ demonstration to the intro notebook * fix spacing issues for docs markdown display * update examples in the documentation source * update markdown piece
NREL · Sep 2, 2023 · daf42ac · daf42ac
1 parent 7493b96
commit daf42ac
Show file tree

Hide file tree

Showing 6 changed files with 971 additions and 109 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -28,6 +28,12 @@ All notable changes to this project will be documented in this file. If you make
 - `utils.filters.bin_filter` was converted from a for loop to a vectorized method
 - `utils.filters.bin_filter` and `utils.timeseries.percent_nan` were converted to be nearly pure NumPy methods operating on NumPy arrays for significant speedups of the TIE analysis method.
 - `analysis.TurbineLongTermGrossEnergy.filter_turbine_data` was cleaned up for a minor gain in efficiency and readability.
+- Better `__repr__` methods for `PlantData` and `PlantMetaData`.
+  - Printing a `PlantData` object now provides a high level statistical summary of each of the
+    datasets in `PlantData`, alongside other key variables.
+  - Printing a `PlantMetaData` object now shows the default or provided column mapping with the
+    associated expected dtypes and units, alongside other key variables.
+
 
 ## 3.0rc2
 - Everything from release candidate 1

diff --git a/examples/00_intro_to_plant_data.ipynb b/examples/00_intro_to_plant_data.ipynb
diff --git a/openoa/plant.py b/openoa/plant.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import sys
 import itertools
 from typing import Callable, Optional, Sequence
 from pathlib import Path
@@ -10,6 +11,8 @@
 import pandas as pd
 from attrs import field, define
 from pyproj import Transformer
+from tabulate import tabulate
+from IPython.display import Markdown, display
 from shapely.geometry import Point
 
 import openoa.utils.timeseries as ts
@@ -389,7 +392,10 @@ class PlantData:
     """
 
     metadata: PlantMetaData = field(
-        default={}, converter=PlantMetaData.load, on_setattr=[attrs.converters, attrs.validators]
+        default={},
+        converter=PlantMetaData.load,
+        on_setattr=[attrs.converters, attrs.validators],
+        repr=False,
     )
     analysis_type: list[str] | None = field(
         default=None,
@@ -525,6 +531,110 @@ def reanalysis_validator(
             self._errors["missing"].update(self._validate_column_names(category=name))
             self._errors["dtype"].update(self._validate_dtypes(category=name))
 
+    def __generate_text_repr(self):
+        """Generates a text summary of the core internal data."""
+        repr = []
+        for attribute in self.__attrs_attrs__:
+            if not attribute.repr:
+                continue
+
+            name = attribute.name
+            value = self.__getattribute__(name)
+            if name == "analysis_type":
+                repr.append(f"{name}: {value}")
+            elif name in ("scada", "meter", "tower", "status", "curtail"):
+                repr.append(f"\n{name}")
+                repr.append("-" * len(name))
+                if value is None:
+                    repr.append("no data")
+                else:
+                    _repr = value.describe().T
+                    repr.append(
+                        tabulate(_repr, headers=_repr.columns, floatfmt=",.3f", tablefmt="grid")
+                    )
+            elif name == "reanalysis":
+                repr.append(f"\n{name}")
+                repr.append("-" * len(name))
+                if "product" in value:
+                    repr.append("no data")
+                else:
+                    for product, df in value.items():
+                        repr.append(f"\n{product}")
+
+                        _repr = df.describe().T
+                        repr.append(
+                            tabulate(_repr, headers=_repr.columns, floatfmt=",.3f", tablefmt="grid")
+                        )
+            elif name == "asset":
+                repr.append(f"\n{name}")
+                repr.append("-" * len(name))
+                if value is None:
+                    repr.append("no data")
+                else:
+                    value = value.drop(columns=["geometry"])
+                    repr.append(
+                        tabulate(value, headers=value.columns, floatfmt=",.3f", tablefmt="grid")
+                    )
+        return "\n".join(repr)
+
+    def __generate_markdown_repr(self):
+        """Generates a markdown-friendly summary of the core internal data."""
+        new_line = "\n"
+
+        repr = [
+            "PlantData",
+            new_line,
+            "**analysis_type**",
+            *[f"- {el}" for el in self.analysis_type],
+            new_line,
+        ]
+
+        data = (
+            "no data" if self.asset is None else self.asset.drop(columns=["geometry"]).to_markdown()
+        )
+        repr.extend(["**asset**", new_line, data, new_line])
+
+        data = "no data" if self.scada is None else self.scada.describe().T.to_markdown()
+        repr.extend(["**scada**", new_line, data, new_line])
+
+        data = "no data" if self.meter is None else self.meter.describe().T.to_markdown()
+        repr.extend(["**meter**", new_line, data, new_line])
+
+        data = "no data" if self.tower is None else self.tower.describe().T.to_markdown()
+        repr.extend(["**tower**", new_line, data, new_line])
+
+        data = "no data" if self.status is None else self.status.describe().T.to_markdown()
+        repr.extend(["**status**", new_line, data, new_line])
+
+        data = "no data" if self.curtail is None else self.curtail.describe().T.to_markdown()
+        repr.extend(["**curtail**", new_line, data, new_line])
+
+        repr.extend(["**reanalysis**", new_line])
+
+        if "product" in self.reanalysis:
+            repr.append("no data")
+        for name, df in self.reanalysis.items():
+            data = df.describe().T.to_markdown()
+            repr.extend([f"**{name}**", new_line, data, new_line])
+
+        return (new_line).join(repr)
+
+    def __str__(self):
+        """The string summary."""
+        return self.__generate_text_repr()
+
+    def markdown(self):
+        """A markdown-formatted version of the ``__str__``."""
+        display(Markdown(self.__generate_markdown_repr()))
+
+    def __repr__(self):
+        """A context-aware summary generator for printing out the objects."""
+        is_terminal = sys.stderr.isatty()
+        if is_terminal:
+            return self.__generate_text_repr()
+        else:
+            return repr(display(Markdown(self.__generate_markdown_repr())))
+
     def _set_index_columns(self) -> None:
         """Sets the index value for each of the `PlantData` objects that are not `None`."""
         if self.scada is not None:

diff --git a/openoa/schema/metadata.py b/openoa/schema/metadata.py
@@ -3,12 +3,15 @@
 import json
 import itertools
 from copy import deepcopy
+from typing import Any
 from pathlib import Path
 
 import yaml
 import attrs
 import numpy as np
+import pandas as pd
 from attrs import field, define
+from tabulate import tabulate
 
 
 # *************************************************************************
@@ -192,6 +195,60 @@ def from_dict(cls, data: dict):
         return cls(**kwargs)  # type: ignore
 
 
+def _make_single_repr(name: str, meta_class) -> str:
+    summary = pd.concat(
+        [
+            pd.DataFrame.from_dict(meta_class.col_map, orient="index", columns=["Column Name"]),
+            pd.DataFrame.from_dict(
+                {
+                    k: str(v).replace("<class '", "").replace("'>", "")
+                    for k, v in meta_class.dtypes.items()
+                },
+                orient="index",
+                columns=["Expected Type"],
+            ),
+            pd.DataFrame.from_dict(meta_class.units, orient="index", columns=["Expected Units"]),
+        ],
+        axis=1,
+    )
+
+    if name == "ReanalysisMetaData":
+        repr = []
+    else:
+        repr = ["-" * len(name), name, "-" * len(name) + "\n"]
+
+    if name != "AssetMetaData":
+        repr.append("frequency\n--------")
+        repr.append(meta_class.frequency)
+
+    repr.append("\nMetadata Summary\n----------------")
+    repr.append(tabulate(summary, headers=summary.columns, tablefmt="grid"))
+    return "\n".join(repr)
+
+
+def _make_combined_repr(cls: PlantMetaData) -> str:
+    reanalysis_name = "ReanalysisMetaData"
+    reanalysis_repr = [
+        "-" * len(reanalysis_name),
+        reanalysis_name,
+        "-" * len(reanalysis_name) + "\n",
+    ]
+    for name, meta in cls.reanalysis.items():
+        reanalysis_repr.append(f"\n{name}:\n")
+        reanalysis_repr.append(f"{meta}")
+
+    repr = [
+        cls.scada,
+        cls.meter,
+        cls.tower,
+        cls.status,
+        cls.curtail,
+        cls.asset,
+        "\n".join(reanalysis_repr),
+    ]
+    return "\n\n".join([f"{el}" for el in repr]).replace("\n\n\n", "\n\n")
+
+
 # ***************************************
 # Define the meta data validation classes
 # ***************************************
@@ -299,6 +356,9 @@ def __attrs_post_init__(self) -> None:
         )
         self.col_map_reversed = {v: k for k, v in self.col_map.items()}
 
+    def __repr__(self):
+        return _make_single_repr("SCADAMetaData", self)
+
 
 @define(auto_attribs=True)
 class MeterMetaData(FromDictMixin):  # noqa: F821
@@ -353,6 +413,9 @@ def __attrs_post_init__(self) -> None:
             MMTR_SupWh=self.MMTR_SupWh,
         )
 
+    def __repr__(self):
+        return _make_single_repr("MeterMetaData", self)
+
 
 @define(auto_attribs=True)
 class TowerMetaData(FromDictMixin):  # noqa: F821
@@ -406,6 +469,9 @@ def __attrs_post_init__(self) -> None:
             asset_id=self.asset_id,
         )
 
+    def __repr__(self):
+        return _make_single_repr("TowerMetaData", self)
+
 
 @define(auto_attribs=True)
 class StatusMetaData(FromDictMixin):  # noqa: F821
@@ -477,6 +543,9 @@ def __attrs_post_init__(self) -> None:
             status_text=self.status_text,
         )
 
+    def __repr__(self):
+        return _make_single_repr("StatusMetaData", self)
+
 
 @define(auto_attribs=True)
 class CurtailMetaData(FromDictMixin):  # noqa: F821
@@ -536,6 +605,9 @@ def __attrs_post_init__(self) -> None:
             IAVL_DnWh=self.IAVL_DnWh,
         )
 
+    def __repr__(self):
+        return _make_single_repr("CurtailMetaData", self)
+
 
 @define(auto_attribs=True)
 class AssetMetaData(FromDictMixin):  # noqa: F821
@@ -613,6 +685,9 @@ def __attrs_post_init__(self) -> None:
             type=self.type,
         )
 
+    def __repr__(self):
+        return _make_single_repr("AssetMetaData", self)
+
 
 def convert_reanalysis(value: dict[str, dict]):
     return {k: ReanalysisMetaData.from_dict(v) for k, v in value.items()}
@@ -701,6 +776,9 @@ def __attrs_post_init__(self) -> None:
             WMETR_EnvPres=self.WMETR_EnvPres,
         )
 
+    def __repr__(self):
+        return _make_single_repr("ReanalysisMetaData", self)
+
 
 @define(auto_attribs=True)
 class PlantMetaData(FromDictMixin):  # noqa: F821
@@ -897,3 +975,6 @@ def frequency_requirements(self, analysis_types: list[str | None]) -> dict[str,
                 else:
                     frequency[name] = reqs.intersection(req)
         return frequency
+
+    def __repr__(self):
+        return _make_combined_repr(self)
diff --git a/setup.py b/setup.py
@@ -10,7 +10,6 @@
 
 # Core dependencies
 REQUIRED = [
-    "statsmodels",
     "scikit-learn>=1.0",
     "requests>=2.21.0",
     "eia-python>=1.22",
@@ -26,9 +25,11 @@
     "bokeh>=2.4",
     "attrs>=22",
     "pytz",
-    "pyyaml",
     "h5pyd",
+    "pyyaml",
     "pyspark",
+    "tabulate",
+    "statsmodels",
     "jupyterlab",
     "xarray",
     "dask",