Skip to content

Commit

Permalink
Enhancement/plant repr (#248)
Browse files Browse the repository at this point in the history
* add a nice __repr__ for PlantData supported by tabulate

* update repr formatting

* add base metadata __repr__ schema

* add nice reprs for all metadata

* update changelog

* update examplesout for changes to notebooks

* add separate markdown display for notebook compatibility

* update spacing in plant data and add docstrings

* fix missing list initialization

* move ipython to core requirements for markdown output

* add ipython to requirements

* add the __repr__ demonstration to the intro notebook

* fix spacing issues for docs markdown display

* update examples in the documentation source

* update markdown piece
  • Loading branch information
RHammond2 authored Sep 2, 2023
1 parent 7493b96 commit daf42ac
Show file tree
Hide file tree
Showing 6 changed files with 971 additions and 109 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ All notable changes to this project will be documented in this file. If you make
- `utils.filters.bin_filter` was converted from a for loop to a vectorized method
- `utils.filters.bin_filter` and `utils.timeseries.percent_nan` were converted to be nearly pure NumPy methods operating on NumPy arrays for significant speedups of the TIE analysis method.
- `analysis.TurbineLongTermGrossEnergy.filter_turbine_data` was cleaned up for a minor gain in efficiency and readability.
- Better `__repr__` methods for `PlantData` and `PlantMetaData`.
- Printing a `PlantData` object now provides a high level statistical summary of each of the
datasets in `PlantData`, alongside other key variables.
- Printing a `PlantMetaData` object now shows the default or provided column mapping with the
associated expected dtypes and units, alongside other key variables.


## 3.0rc2
- Everything from release candidate 1
Expand Down
438 changes: 385 additions & 53 deletions examples/00_intro_to_plant_data.ipynb

Large diffs are not rendered by default.

112 changes: 111 additions & 1 deletion openoa/plant.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import sys
import itertools
from typing import Callable, Optional, Sequence
from pathlib import Path
Expand All @@ -10,6 +11,8 @@
import pandas as pd
from attrs import field, define
from pyproj import Transformer
from tabulate import tabulate
from IPython.display import Markdown, display
from shapely.geometry import Point

import openoa.utils.timeseries as ts
Expand Down Expand Up @@ -389,7 +392,10 @@ class PlantData:
"""

metadata: PlantMetaData = field(
default={}, converter=PlantMetaData.load, on_setattr=[attrs.converters, attrs.validators]
default={},
converter=PlantMetaData.load,
on_setattr=[attrs.converters, attrs.validators],
repr=False,
)
analysis_type: list[str] | None = field(
default=None,
Expand Down Expand Up @@ -525,6 +531,110 @@ def reanalysis_validator(
self._errors["missing"].update(self._validate_column_names(category=name))
self._errors["dtype"].update(self._validate_dtypes(category=name))

def __generate_text_repr(self):
"""Generates a text summary of the core internal data."""
repr = []
for attribute in self.__attrs_attrs__:
if not attribute.repr:
continue

name = attribute.name
value = self.__getattribute__(name)
if name == "analysis_type":
repr.append(f"{name}: {value}")
elif name in ("scada", "meter", "tower", "status", "curtail"):
repr.append(f"\n{name}")
repr.append("-" * len(name))
if value is None:
repr.append("no data")
else:
_repr = value.describe().T
repr.append(
tabulate(_repr, headers=_repr.columns, floatfmt=",.3f", tablefmt="grid")
)
elif name == "reanalysis":
repr.append(f"\n{name}")
repr.append("-" * len(name))
if "product" in value:
repr.append("no data")
else:
for product, df in value.items():
repr.append(f"\n{product}")

_repr = df.describe().T
repr.append(
tabulate(_repr, headers=_repr.columns, floatfmt=",.3f", tablefmt="grid")
)
elif name == "asset":
repr.append(f"\n{name}")
repr.append("-" * len(name))
if value is None:
repr.append("no data")
else:
value = value.drop(columns=["geometry"])
repr.append(
tabulate(value, headers=value.columns, floatfmt=",.3f", tablefmt="grid")
)
return "\n".join(repr)

def __generate_markdown_repr(self):
"""Generates a markdown-friendly summary of the core internal data."""
new_line = "\n"

repr = [
"PlantData",
new_line,
"**analysis_type**",
*[f"- {el}" for el in self.analysis_type],
new_line,
]

data = (
"no data" if self.asset is None else self.asset.drop(columns=["geometry"]).to_markdown()
)
repr.extend(["**asset**", new_line, data, new_line])

data = "no data" if self.scada is None else self.scada.describe().T.to_markdown()
repr.extend(["**scada**", new_line, data, new_line])

data = "no data" if self.meter is None else self.meter.describe().T.to_markdown()
repr.extend(["**meter**", new_line, data, new_line])

data = "no data" if self.tower is None else self.tower.describe().T.to_markdown()
repr.extend(["**tower**", new_line, data, new_line])

data = "no data" if self.status is None else self.status.describe().T.to_markdown()
repr.extend(["**status**", new_line, data, new_line])

data = "no data" if self.curtail is None else self.curtail.describe().T.to_markdown()
repr.extend(["**curtail**", new_line, data, new_line])

repr.extend(["**reanalysis**", new_line])

if "product" in self.reanalysis:
repr.append("no data")
for name, df in self.reanalysis.items():
data = df.describe().T.to_markdown()
repr.extend([f"**{name}**", new_line, data, new_line])

return (new_line).join(repr)

def __str__(self):
"""The string summary."""
return self.__generate_text_repr()

def markdown(self):
"""A markdown-formatted version of the ``__str__``."""
display(Markdown(self.__generate_markdown_repr()))

def __repr__(self):
"""A context-aware summary generator for printing out the objects."""
is_terminal = sys.stderr.isatty()
if is_terminal:
return self.__generate_text_repr()
else:
return repr(display(Markdown(self.__generate_markdown_repr())))

def _set_index_columns(self) -> None:
"""Sets the index value for each of the `PlantData` objects that are not `None`."""
if self.scada is not None:
Expand Down
81 changes: 81 additions & 0 deletions openoa/schema/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
import json
import itertools
from copy import deepcopy
from typing import Any
from pathlib import Path

import yaml
import attrs
import numpy as np
import pandas as pd
from attrs import field, define
from tabulate import tabulate


# *************************************************************************
Expand Down Expand Up @@ -192,6 +195,60 @@ def from_dict(cls, data: dict):
return cls(**kwargs) # type: ignore


def _make_single_repr(name: str, meta_class) -> str:
summary = pd.concat(
[
pd.DataFrame.from_dict(meta_class.col_map, orient="index", columns=["Column Name"]),
pd.DataFrame.from_dict(
{
k: str(v).replace("<class '", "").replace("'>", "")
for k, v in meta_class.dtypes.items()
},
orient="index",
columns=["Expected Type"],
),
pd.DataFrame.from_dict(meta_class.units, orient="index", columns=["Expected Units"]),
],
axis=1,
)

if name == "ReanalysisMetaData":
repr = []
else:
repr = ["-" * len(name), name, "-" * len(name) + "\n"]

if name != "AssetMetaData":
repr.append("frequency\n--------")
repr.append(meta_class.frequency)

repr.append("\nMetadata Summary\n----------------")
repr.append(tabulate(summary, headers=summary.columns, tablefmt="grid"))
return "\n".join(repr)


def _make_combined_repr(cls: PlantMetaData) -> str:
reanalysis_name = "ReanalysisMetaData"
reanalysis_repr = [
"-" * len(reanalysis_name),
reanalysis_name,
"-" * len(reanalysis_name) + "\n",
]
for name, meta in cls.reanalysis.items():
reanalysis_repr.append(f"\n{name}:\n")
reanalysis_repr.append(f"{meta}")

repr = [
cls.scada,
cls.meter,
cls.tower,
cls.status,
cls.curtail,
cls.asset,
"\n".join(reanalysis_repr),
]
return "\n\n".join([f"{el}" for el in repr]).replace("\n\n\n", "\n\n")


# ***************************************
# Define the meta data validation classes
# ***************************************
Expand Down Expand Up @@ -299,6 +356,9 @@ def __attrs_post_init__(self) -> None:
)
self.col_map_reversed = {v: k for k, v in self.col_map.items()}

def __repr__(self):
return _make_single_repr("SCADAMetaData", self)


@define(auto_attribs=True)
class MeterMetaData(FromDictMixin): # noqa: F821
Expand Down Expand Up @@ -353,6 +413,9 @@ def __attrs_post_init__(self) -> None:
MMTR_SupWh=self.MMTR_SupWh,
)

def __repr__(self):
return _make_single_repr("MeterMetaData", self)


@define(auto_attribs=True)
class TowerMetaData(FromDictMixin): # noqa: F821
Expand Down Expand Up @@ -406,6 +469,9 @@ def __attrs_post_init__(self) -> None:
asset_id=self.asset_id,
)

def __repr__(self):
return _make_single_repr("TowerMetaData", self)


@define(auto_attribs=True)
class StatusMetaData(FromDictMixin): # noqa: F821
Expand Down Expand Up @@ -477,6 +543,9 @@ def __attrs_post_init__(self) -> None:
status_text=self.status_text,
)

def __repr__(self):
return _make_single_repr("StatusMetaData", self)


@define(auto_attribs=True)
class CurtailMetaData(FromDictMixin): # noqa: F821
Expand Down Expand Up @@ -536,6 +605,9 @@ def __attrs_post_init__(self) -> None:
IAVL_DnWh=self.IAVL_DnWh,
)

def __repr__(self):
return _make_single_repr("CurtailMetaData", self)


@define(auto_attribs=True)
class AssetMetaData(FromDictMixin): # noqa: F821
Expand Down Expand Up @@ -613,6 +685,9 @@ def __attrs_post_init__(self) -> None:
type=self.type,
)

def __repr__(self):
return _make_single_repr("AssetMetaData", self)


def convert_reanalysis(value: dict[str, dict]):
return {k: ReanalysisMetaData.from_dict(v) for k, v in value.items()}
Expand Down Expand Up @@ -701,6 +776,9 @@ def __attrs_post_init__(self) -> None:
WMETR_EnvPres=self.WMETR_EnvPres,
)

def __repr__(self):
return _make_single_repr("ReanalysisMetaData", self)


@define(auto_attribs=True)
class PlantMetaData(FromDictMixin): # noqa: F821
Expand Down Expand Up @@ -897,3 +975,6 @@ def frequency_requirements(self, analysis_types: list[str | None]) -> dict[str,
else:
frequency[name] = reqs.intersection(req)
return frequency

def __repr__(self):
return _make_combined_repr(self)
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

# Core dependencies
REQUIRED = [
"statsmodels",
"scikit-learn>=1.0",
"requests>=2.21.0",
"eia-python>=1.22",
Expand All @@ -26,9 +25,11 @@
"bokeh>=2.4",
"attrs>=22",
"pytz",
"pyyaml",
"h5pyd",
"pyyaml",
"pyspark",
"tabulate",
"statsmodels",
"jupyterlab",
"xarray",
"dask",
Expand Down
Loading

0 comments on commit daf42ac

Please sign in to comment.