From 13d781904b7e3f8f333b77b6eb1d33228ff6660f Mon Sep 17 00:00:00 2001 From: Xavier Roynard Date: Mon, 7 Jul 2025 17:57:51 +0200 Subject: [PATCH 01/15] (containers) minor fix docstrings --- src/plaid/containers/dataset.py | 8 ++++---- src/plaid/containers/sample.py | 2 +- tests/problem_definition/problem_infos.yaml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/plaid/containers/dataset.py b/src/plaid/containers/dataset.py index 8641f69..2b79eb6 100644 --- a/src/plaid/containers/dataset.py +++ b/src/plaid/containers/dataset.py @@ -139,7 +139,7 @@ def get_samples( return {id: self._samples[id] for id in ids} def add_sample(self, sample: Sample, id: int = None) -> int: - """Add a new :class:`Sample ` to the :class:`Dataset .`. + """Add a new :class:`Sample ` to the :class:`Dataset `. Args: sample (Sample): The sample to add. @@ -605,7 +605,7 @@ def get_infos(self) -> dict[str, dict[str, str]]: return self._infos def print_infos(self) -> None: - """Prints information in a readable format (pretty print).""" + """Print information in a readable format (pretty print).""" infos_cats = list(self._infos.keys()) tf = "*********************** \x1b[34;1mdataset infos\x1b[0m **********************\n" for cat in infos_cats: @@ -624,7 +624,7 @@ def print_infos(self) -> None: # -------------------------------------------------------------------------# def merge_dataset(self, dataset: Self) -> list[int]: - """Merges another Dataset into this one. + """Merge another Dataset into this one. Args: dataset (Dataset): The data set to be merged into this one (self). @@ -643,7 +643,7 @@ def merge_dataset(self, dataset: Self) -> list[int]: # -------------------------------------------------------------------------# def save(self, fname: Union[str, Path]) -> None: - """Saves the data set to a TAR (Tape Archive) file. + """Save the data set to a TAR (Tape Archive) file. It creates a temporary intermediate directory to store temporary files during the loading process. diff --git a/src/plaid/containers/sample.py b/src/plaid/containers/sample.py index 838c413..8654762 100644 --- a/src/plaid/containers/sample.py +++ b/src/plaid/containers/sample.py @@ -131,7 +131,7 @@ def read_index_range(pyTree: list, dim: list[int]): class Sample(BaseModel): - """Represents a single sample. It contains data and information related to a single observation or measurement within a dataset.""" + """Represent a single sample. It contains data and information related to a single observation or measurement within a dataset.""" def __init__( self, diff --git a/tests/problem_definition/problem_infos.yaml b/tests/problem_definition/problem_infos.yaml index bb9be46..fc1845c 100644 --- a/tests/problem_definition/problem_infos.yaml +++ b/tests/problem_definition/problem_infos.yaml @@ -13,8 +13,8 @@ input_fields: - test_field output_fields: - field -- test_field - predict_field +- test_field input_timeseries: - predict_timeseries - test_timeseries From e00877c0b62c20b207d1a3ecd70cf41b761bc21c Mon Sep 17 00:00:00 2001 From: Xavier Roynard Date: Mon, 7 Jul 2025 18:14:28 +0200 Subject: [PATCH 02/15] feat(dataset.py) get_scalars_to_tabular create array with dtype depending on data --- src/plaid/containers/dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/plaid/containers/dataset.py b/src/plaid/containers/dataset.py index 2b79eb6..b8ee503 100644 --- a/src/plaid/containers/dataset.py +++ b/src/plaid/containers/dataset.py @@ -449,7 +449,9 @@ def get_scalars_to_tabular( named_tabular = {} for s_name in scalar_names: - res = np.empty(nb_samples) + first_scalar = self[sample_ids[0]].get_scalar(s_name) + s_dtype = first_scalar.dtype if first_scalar is not None else None + res = np.empty(nb_samples, dtype=s_dtype) res.fill(None) for i_, id in enumerate(sample_ids): val = self[id].get_scalar(s_name) From bd5600fb195224f8e00c1f4552c576246d58b8ab Mon Sep 17 00:00:00 2001 From: Xavier Roynard Date: Mon, 7 Jul 2025 18:17:52 +0200 Subject: [PATCH 03/15] feat(dataset.py) improve __getitem__ to work with slices --- src/plaid/containers/dataset.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/plaid/containers/dataset.py b/src/plaid/containers/dataset.py index b8ee503..1b39fc7 100644 --- a/src/plaid/containers/dataset.py +++ b/src/plaid/containers/dataset.py @@ -1004,11 +1004,11 @@ def __len__(self) -> int: """ return len(self._samples) - def __getitem__(self, id: int) -> Sample: + def __getitem__(self, id: Union[int, slice]) -> Union[Sample, Self]: """Retrieve a specific sample by its ID int this dataset. Args: - id (int): The ID of the sample to retrieve. + id (Union[int,slice]): The ID of the sample to retrieve. Raises: IndexError: If the provided ID is out of bounds or does not exist in the dataset. @@ -1026,12 +1026,24 @@ def __getitem__(self, id: int) -> Sample: Seealso: This function can also be called using `__call__()`. """ - if id in self._samples: - return self._samples[id] + if isinstance(id, int): + if id in self._samples: + return self._samples[id] + else: + raise IndexError( + f"sample with {id=} not set -> use 'Dataset.add_sample' or 'Dataset.add_samples'" + ) else: - raise IndexError( - f"sample with {id=} not set -> use 'Dataset.add_sample' or 'Dataset.add_samples'" - ) + if isinstance(id, slice): + # TODO: check slice.stop is positive, if negative use len(dataset)+slice.stop + ids = np.arange(slice.start, slice.stop, slice.step) + samples = [] + for id in ids: + if id in self._samples: + samples.append(self._samples[id]) + dset = Dataset() + dset.add_samples(samples) + return dset __call__ = __getitem__ From 776b4e8a15e14f7d93efd74a4178e50c64b76e4e Mon Sep 17 00:00:00 2001 From: Xavier Roynard Date: Mon, 7 Jul 2025 18:23:40 +0200 Subject: [PATCH 04/15] feat(dataset.py) add method extract_dataset to extract a dataset with samples containing only requiered scalars/fields/timeseries(/nodes ?) --- src/plaid/containers/dataset.py | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/plaid/containers/dataset.py b/src/plaid/containers/dataset.py index 1b39fc7..bcce1ce 100644 --- a/src/plaid/containers/dataset.py +++ b/src/plaid/containers/dataset.py @@ -463,6 +463,43 @@ def get_scalars_to_tabular( named_tabular = np.array(list(named_tabular.values())).T return named_tabular + # -------------------------------------------------------------------------# + def extract_dataset( + self, + scalars: list[str] = [], + fields: list[str] = [], + time_series: list[str] = [], + ) -> Self: + """Extract a subset of the dataset containing only the specified scalars, fields, and time series. + + Args: + scalars (list[str], optional): List of scalar names to include. Defaults to []. + fields (list[str], optional): List of field names to include. Defaults to []. + time_series (list[str], optional): List of time series names to include. Defaults to []. + + Returns: + Self: A new dataset containing only the specified scalars, fields, and time series. + """ + dataset = Dataset() + + for id, sample in self.get_samples().items(): + new_sample = Sample() + + for scalar_name in scalars: + new_sample.add_scalar(scalar_name, sample.get_scalar(scalar_name)) + for time_series_name in time_series: + new_sample.add_time_series( + time_series_name, sample.get_time_series(time_series_name) + ) + # TODO: extract only specified fields --> WON’T WORK: there is no Base/Zone specified + # TODO: use field names of type '//' with optional zone/base names + for field_name in fields: + new_sample.add_field(field_name, sample.get_field(field_name)) + + dataset.add_sample(new_sample, id) + + return dataset + # -------------------------------------------------------------------------# def add_info(self, cat_key: str, info_key: str, info: str) -> None: """Add information to the :class:`Dataset `, overwriting existing information if there's a conflict. From 52ff99147789d17e17c0a9811f126741c2a8132e Mon Sep 17 00:00:00 2001 From: Xavier Roynard Date: Mon, 7 Jul 2025 18:28:19 +0200 Subject: [PATCH 05/15] feat(dataset.py) add method merge_samples to merge scalars/fields/trees/time_series of of samples if they have same ids --- src/plaid/containers/dataset.py | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/plaid/containers/dataset.py b/src/plaid/containers/dataset.py index bcce1ce..23c3c30 100644 --- a/src/plaid/containers/dataset.py +++ b/src/plaid/containers/dataset.py @@ -17,6 +17,7 @@ Self = TypeVar("Self") +import copy import logging import os import shutil @@ -680,6 +681,39 @@ def merge_dataset(self, dataset: Self) -> list[int]: raise ValueError("dataset must be an instance of Dataset") return self.add_samples(dataset.get_samples(as_list=True)) + def merge_samples(self, dataset: Self) -> list[int]: + """Merge Samples of another dataset into samples of this one. + + Args: + dataset (Self): The data set whom samples will be merged into those of this one (self). + + Returns: + list[int]: ids of added :class:`Samples ` from input :class:`Dataset ` that were not already present in this dataset (self). + + Raises: + ValueError: If the provided dataset value is not an instance of Dataset + """ + if not isinstance(dataset, Dataset): + raise ValueError("dataset must be an instance of Dataset") + trg_samples = self.get_samples() + new_ids = [] + for samp_id, samp in dataset.get_samples().items(): + if samp_id in trg_samples: + for scalar_name in samp.get_scalar_names(): + trg_samples[samp_id].add_scalar( + scalar_name, samp.get_scalar(scalar_name) + ) + for time_series_name in samp.get_time_series_names(): + trg_samples[samp_id].add_time_series( + time_series_name, samp.get_time_series(time_series_name) + ) + trg_samples[samp_id].add_tree(samp.get_tree()) + else: + # TODO: should we copy the sample before adding it ? + self.add_sample(copy.deepcopy(samp), id=samp_id) + new_ids.append(samp_id) + return new_ids + # -------------------------------------------------------------------------# def save(self, fname: Union[str, Path]) -> None: """Save the data set to a TAR (Tape Archive) file. From 70962b2a0b6954585b453e838c9388538bd198dd Mon Sep 17 00:00:00 2001 From: Xavier Roynard Date: Mon, 7 Jul 2025 18:30:50 +0200 Subject: [PATCH 06/15] feat(dataset.py) add methods to work with tabular fields the same way as for scalars --- src/plaid/containers/dataset.py | 113 ++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/src/plaid/containers/dataset.py b/src/plaid/containers/dataset.py index 23c3c30..08e2c65 100644 --- a/src/plaid/containers/dataset.py +++ b/src/plaid/containers/dataset.py @@ -464,6 +464,119 @@ def get_scalars_to_tabular( named_tabular = np.array(list(named_tabular.values())).T return named_tabular + # -------------------------------------------------------------------------# + def add_tabular_fields(self, tabular: np.ndarray, names: list[str] = None) -> None: + """Add tabular field data to the summary. + + Args: + tabular (np.ndarray): A 2D NumPy array containing tabular field data. + names (list[str], optional): A list of column names for the tabular data. Defaults to None. + + Raises: + ShapeError: Raised if the input tabular array does not have the correct shape (2D). + ShapeError: Raised if the number of columns in the tabular data does not match the number of names provided. + + Note: + If no names are provided, it will automatically create names based on the pattern 'X{number}' + """ + nb_samples = len(tabular) + + if tabular.ndim != 2: + raise ShapeError(f"{tabular.ndim=}!=2, should be == 2") + if names is None: + names = [f"X{i}" for i in range(tabular.shape[1])] + if tabular.shape[1] != len(names): + raise ShapeError( + f"tabular should have as many columns as there are names, but {tabular.shape[1]=} and {len(names)=}" + ) + + # ---# For efficiency, first add values to storage + name_to_ids = {} + for col, name in zip(tabular.T, names): + name_to_ids[name] = col + + # ---# Then add data in sample + for i_samp in range(nb_samples): + sample = Sample() + for name in names: + sample.add_field(name, name_to_ids[name][i_samp]) + self.add_sample(sample) + + def get_fields_to_tabular( + self, + field_names: list[str] = None, + sample_ids: list[int] = None, + as_nparray=False, + ) -> Union[dict[str, np.ndarray], np.ndarray]: + """Return a dict containing field values as tabulars/arrays. + + Args: + field_names (str, optional): fields to work on. If None, all fields will be returned. Defaults to None. + sample_ids (list[int], optional): Filter by sample id. If None, take all samples. Defaults to None. + as_nparray (bool, optional): If True, return the data as a single numpy ndarray. If False, return a dictionary mapping field names to their respective tabular values. Defaults to False. + + Returns: + np.ndarray: if as_nparray is True. + dict[str,np.ndarray]: if as_nparray is False, field name -> tabular values. + + Note: + This method won’t work if the fields does not have the same sizes in all samples specified by `sample_ids`. + """ + if field_names is None: + field_names = self.get_field_names(sample_ids) + elif len(set(field_names)) != len(field_names): + logger.warning("Provided field names are not unique") + + if sample_ids is None: + sample_ids = self.get_sample_ids() + elif len(set(sample_ids)) != len(sample_ids): + logger.warning("Provided sample ids are not unique") + nb_samples = len(sample_ids) + + named_tabular = {} + for f_name in field_names: + first_field = self[sample_ids[0]].get_field(f_name) + if first_field is not None: + f_dtype = first_field.dtype + nb_points = first_field.shape[0] + if len(first_field.shape) == 1: + field_size = 1 + elif len(first_field.shape) == 2: + field_size = first_field.shape[1] + else: + raise ShapeError( + f"Expects field as a 2-dim array, but field {f_name} from sample {sample_ids[0]} has shape: {first_field.shape}" + ) + else: + print("---") + print(f"Field {f_name} of sample {sample_ids[0]} is None") + print("---") + res = np.empty((nb_samples, nb_points, field_size), dtype=f_dtype) + # print(f"{nb_points=}") + # print(f"{field_size=}") + # print(f"{res.shape=}") + res.fill(None) + for i_, id in enumerate(sample_ids): + val = self[id].get_field(f_name) + # print(f"{val.shape=}") + if val is not None: + if not (val.shape[0] == nb_points): + # TODO: explain error + raise ShapeError("") + if len(val.shape) == 2 and not (val.shape[1] == field_size): + # TODO: explain error + raise ShapeError("") + res[i_] = val.reshape((nb_points, field_size)) + named_tabular[f_name] = res + + if as_nparray: + all_tabs = list(named_tabular.values()) + if all([t.shape[1] == all_tabs[0].shape[2] for t in all_tabs]): + named_tabular = np.stack(all_tabs, axis=2) + else: + named_tabular = np.concatenate(all_tabs, axis=2) + return named_tabular + # -------------------------------------------------------------------------# def extract_dataset( self, From d4b848ebc8d89aefe4db6350efee6b2c57e55693 Mon Sep 17 00:00:00 2001 From: Xavier Roynard Date: Mon, 7 Jul 2025 19:41:20 +0200 Subject: [PATCH 07/15] feat(dataset.py) add some tests for new fonctiannalities -> to DEBUG --- tests/containers/test_dataset.py | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/containers/test_dataset.py b/tests/containers/test_dataset.py index d13c4fb..f0637f8 100644 --- a/tests/containers/test_dataset.py +++ b/tests/containers/test_dataset.py @@ -361,6 +361,47 @@ def test_get_scalars_to_tabular_same_scalars_name( dataset.get_scalars_to_tabular(sample_ids=[0, 0]) dataset.get_scalars_to_tabular(scalar_names=["test", "test"]) + # -------------------------------------------------------------------------# + def test_add_tabular_fields(self, dataset, tabular, field_names, nb_samples): + dataset.add_tabular_fields(tabular, field_names) + assert len(dataset) == nb_samples + + def test_add_tabular_fields_no_names(self, dataset, tabular, nb_samples): + dataset.add_tabular_fields(tabular) + assert len(dataset) == nb_samples + + def test_add_tabular_fields_bad_ndim(self, dataset, tabular, field_names): + with pytest.raises(ShapeError): + dataset.add_tabular_fields(tabular.reshape((-1)), field_names) + + def test_add_tabular_fields_bad_shape(self, dataset, tabular, field_names): + tabular = np.concatenate((tabular, np.zeros((len(tabular), 1))), axis=1) + with pytest.raises(ShapeError): + dataset.add_tabular_fields(tabular, field_names) + + def test_get_fields_to_tabular(self, dataset, tabular, field_names): + assert len(dataset.get_fields_to_tabular()) == 0 + assert dataset.get_fields_to_tabular() == {} + dataset.add_tabular_fields(tabular, field_names) + assert dataset.get_fields_to_tabular(as_nparray=True).shape == ( + len(tabular), + len(field_names), + ) + dict_tabular = dataset.get_fields_to_tabular() + for i_s, sname in enumerate(field_names): + assert np.all(dict_tabular[sname] == tabular[:, i_s]) + + def test_get_fields_to_tabular_same_fields_name( + self, dataset, tabular, field_names + ): + dataset.add_tabular_fields(tabular, field_names) + assert dataset.get_fields_to_tabular(as_nparray=True).shape == ( + len(tabular), + len(field_names), + ) + dataset.get_fields_to_tabular(sample_ids=[0, 0]) + dataset.get_fields_to_tabular(field_names=["test", "test"]) + # -------------------------------------------------------------------------# def test_add_info(self, dataset): dataset.add_info("legal", "owner", "PLAID") @@ -415,6 +456,9 @@ def test_merge_dataset_with_bad_type(self, dataset_with_samples): with pytest.raises(ValueError): dataset_with_samples.merge_dataset(3) + def test_merge_samples(self, dataset_with_samples, other_dataset_with_samples): + dataset_with_samples.merge_samples(other_dataset_with_samples) + # -------------------------------------------------------------------------# def test_save(self, dataset_with_samples, tmp_path): From 2dc9f54d2fb93c1adf533b2302569d32560f3320 Mon Sep 17 00:00:00 2001 From: Xavier Roynard Date: Sat, 12 Jul 2025 23:45:07 +0200 Subject: [PATCH 08/15] (sklearn wrapper) add classes to wrap any sklearn block to use and return a PLAID dataset --- src/plaid/wrappers/__init__.py | 7 + src/plaid/wrappers/sklearn.py | 385 +++++++++++++++++++++++++++++++++ 2 files changed, 392 insertions(+) create mode 100644 src/plaid/wrappers/__init__.py create mode 100644 src/plaid/wrappers/sklearn.py diff --git a/src/plaid/wrappers/__init__.py b/src/plaid/wrappers/__init__.py new file mode 100644 index 0000000..dbcab68 --- /dev/null +++ b/src/plaid/wrappers/__init__.py @@ -0,0 +1,7 @@ +"""Wrapper functions for the PLAID library.""" + +# -*- coding: utf-8 -*- +# +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# diff --git a/src/plaid/wrappers/sklearn.py b/src/plaid/wrappers/sklearn.py new file mode 100644 index 0000000..90f85fa --- /dev/null +++ b/src/plaid/wrappers/sklearn.py @@ -0,0 +1,385 @@ +# -*- coding: utf-8 -*- +# +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# +# +"""This module provides wrappers for scikit-learn estimators and transformers so they can be used seamlessly in scikit-learn Pipelines +with PLAID objects. The wrapped blocks (e.g. PCA, GaussianProcessRegressor, StandardScaler, etc.) take a `plaid.containers.Dataset` as input, +and return a `plaid.containers.Dataset` as output. This allows you to build +scikit-learn Pipelines where all blocks operate on PLAID objects, enabling end-to-end workflows with domain-specific data structures. + +Example usage: + + from sklearn.pipeline import Pipeline + from plaid.wrappers.sklearn import WrappedSklearnTransform, WrappedSklearnRegressor + from sklearn.decomposition import PCA + from sklearn.gaussian_process import GaussianProcessRegressor + from plaid.containers.dataset import Dataset + + # Define your PLAID dataset + dataset = Dataset(...) + + # Build a pipeline with wrapped sklearn blocks + pipe = Pipeline([ + ("pca", WrappedSklearnTransform(PCA(n_components=2))), + ("reg", WrappedSklearnRegressor(GaussianProcessRegressor())) + ]) + + # Fit the pipeline (all steps receive and return Dataset objects) + pipe.fit(dataset) + # Predict + y_pred = pipe.predict(dataset) + +All wrapped blocks must accept and return PLAID Dataset objects. + +Some inspiration come from [TensorDict](https://pytorch.org/tensordict/stable/reference/generated/tensordict.nn.TensorDictModule.html#tensordict.nn.TensorDictModule). + +This module defines the following classes: +`PlaidWrapper`: Base class for scikit-learn estimators and transformers to operate on PLAID objects. +├── `WrappedSklearnTransform`: Wrapper for scikit-learn Transformer blocks. +└── `WrappedSklearnPredictor`: Wrapper for scikit-learn Predictor blocks. + ├── `WrappedSklearnClassifier`: Wrapper for scikit-learn Classifier blocks. + └── `WrappedSklearnRegressor`: Wrapper for scikit-learn Regressor blocks. +""" + +# %% Imports +import sys + +if sys.version_info >= (3, 11): + from typing import Self +else: # pragma: no cover + from typing import TypeVar + + Self = TypeVar("Self") + +import logging +from typing import Union + +import numpy as np +from sklearn.base import ( + BaseEstimator, + BiclusterMixin, + ClassifierMixin, + ClusterMixin, + DensityMixin, + MetaEstimatorMixin, + MultiOutputMixin, + OutlierMixin, + RegressorMixin, + TransformerMixin, +) + +from plaid.containers.dataset import Dataset + +logger = logging.getLogger(__name__) +logging.basicConfig( + format="[%(asctime)s:%(levelname)s:%(filename)s:%(funcName)s(%(lineno)d)]:%(message)s", + level=logging.INFO, +) + +# %% Classes + +SklearnBlock = Union[ + BaseEstimator, + TransformerMixin, + RegressorMixin, + ClassifierMixin, + ClusterMixin, + BiclusterMixin, + DensityMixin, + OutlierMixin, + MultiOutputMixin, +] +"""Union type for all scikit-learn blocks that can be used in a Pipeline.""" + + +class PlaidWrapper(BaseEstimator, MetaEstimatorMixin): + """Base wrapper for scikit-learn estimators and transformers to operate on PLAID objects. + + This class is not intended to be used directly, but as a base for wrappers that allow scikit-learn blocks + (such as PCA, StandardScaler, GaussianProcessRegressor, etc.) to be used in sklearn Pipelines with PLAID objects. + All methods accept and return `plaid.containers.Dataset` objects. + """ + + def __init__( + self, + sklearn_block: SklearnBlock, + fit_only_ones: bool = True, + in_keys: Union[list[str], str] = [], + out_keys: Union[list[str], str] = [], + ): + """Wrap a scikit-learn estimator or transformer. + + Args: + sklearn_block (SklearnBlock): Any scikit-learn transform or predictor (e.g. PCA, StandardScaler, GPRegressor). + fit_only_ones (bool, optional): If True, the model will only be fitted once. Defaults to True. + in_keys (Union[list[str],str], optional): + Names of scalars and/or fields to take as input. + Scalar (resp. field) names should be given as 'scalar::' (resp. 'field::'). + Use 'all' to use all available scalars and fields, or 'scalar::all'/'field::all' for all scalars/fields. + Defaults to []. + out_keys (Union[list[str],str], optional): Names of scalars and/or fields to take as output, using the same convention as for `in_keys`. Defaults to []. + Additionally, if 'same', 'scalar::same' or 'field::same' is given, it will use as output the same names as for input. + """ + self.sklearn_block = sklearn_block + self.fit_only_ones = fit_only_ones + self.in_keys = in_keys + self.out_keys = out_keys + + # ---# Scalars + if in_keys == "all" or "scalar::all" in in_keys: + self.input_scalars = "all" + else: + self.input_scalars = [s[8:] for s in in_keys if s[:8] == "scalar::"] + # + if out_keys == "same" or "scalar::same" in out_keys: + self.output_scalars = self.input_scalars + else: + self.output_scalars = [s[8:] for s in out_keys if s[:8] == "scalar::"] + + # ---# Fields + if in_keys == "all" or "field::all" in in_keys: + self.input_fields = "all" + else: + self.input_fields = [s[7:] for s in in_keys if s[:7] == "field::"] + # + if out_keys == "same" or "field::same" in out_keys: + self.output_fields = self.input_fields + else: + self.output_fields = [s[7:] for s in out_keys if s[:7] == "field::"] + + print(f"{self.input_scalars=}") + print(f"{self.input_fields=}") + print(f"{self.output_scalars=}") + print(f"{self.output_fields=}") + + def fit(self, dataset: Dataset, *args, **kwargs): + """Fit the wrapped scikit-learn model on a PLAID dataset. + + Args: + dataset (Dataset): The dataset to fit the model on. + + Returns: + self: Returns self for chaining. + """ + if self.fit_only_ones and self.__sklearn_is_fitted__(): + return self + + X, y = self._extract_X_y_from_plaid(dataset) + self.sklearn_block.fit(X, y) + + self._is_fitted = True + return self + + def _extract_X_y_from_plaid( + self, dataset: Dataset + ) -> tuple[np.ndarray, np.ndarray]: + """Extract features (X) and labels (y) from a PLAID dataset according to the input/output keys. + + Args: + dataset (Dataset): The dataset to extract data from. + + Returns: + tuple[np.ndarray, np.ndarray]: The extracted features and labels as numpy arrays. + """ + X = [ + dataset.get_scalars_to_tabular([input_scalar_name], as_nparray=True) + for input_scalar_name in ( + dataset.get_scalar_names() + if self.input_scalars == "all" + else self.input_scalars + ) + ] + X.extend( + [ + dataset.get_fields_to_tabular([input_field_name], as_nparray=True) + for input_field_name in ( + dataset.get_field_names() + if self.input_fields == "all" + else self.input_fields + ) + ] + ) + # Reshape any 3D arrays to 2D, contracting the last two dimensions + for i_v, v in enumerate(X): + if len(v.shape) >= 3: + X[i_v] = v.reshape((len(v), -1)) + # Reshape any 1D arrays to 2D, appending a singleton dimension + if len(v.shape) == 1: + X[i_v] = v.reshape((-1, 1)) + print(f"=== In <_extract_X_y_from_plaid> of {self.sklearn_block=}") + print(f"{self.input_scalars=}") + print(f"{self.input_fields=}") + print(f"{self.output_scalars=}") + print(f"{self.output_fields=}") + print(f"{type(X)=}") + print(f"{len(X)=}") + # Concatenate the input arrays into a 2D numpy array + X = np.concatenate(X, axis=-1) + + y = [ + dataset.get_scalars_to_tabular([output_scalar_name], as_nparray=True) + for output_scalar_name in ( + dataset.get_scalar_names() + if self.output_scalars == "all" + else self.output_scalars + ) + ] + y.extend( + [ + dataset.get_fields_to_tabular([output_field_name], as_nparray=True) + for output_field_name in ( + dataset.get_field_names() + if self.output_fields == "all" + else self.output_fields + ) + ] + ) + for i_v, v in enumerate(y): + # Reshape any 3D arrays to 2D, contracting the last two dimensions + if len(v.shape) >= 3: + y[i_v] = v.reshape((len(v), -1)) + # Reshape any 1D arrays to 2D, appending a singleton dimension + if len(v.shape) == 1: + y[i_v] = v.reshape((-1, 1)) + # Concatenate the output arrays into a 2D numpy array + print(f"{self.input_scalars=}") + print(f"{self.input_fields=}") + print(f"{self.output_scalars=}") + print(f"{self.output_fields=}") + print(f"{type(y)=}") + print(f"{len(y)=}") + if len(y) > 0: + y = np.concatenate(y, axis=-1) + else: + y = None + + return X, y + + def _convert_y_to_plaid(self, y: np.ndarray, dataset: Dataset) -> Dataset: + """Convert the model's output (numpy array) to a PLAID Dataset, updating the original dataset. + + Args: + y (np.ndarray): The model's output. + dataset (Dataset): The original dataset. + + Returns: + Dataset: The updated PLAID dataset with new scalars/fields. + """ + new_dset = Dataset() + if len(self.output_scalars) > 0: + new_dset.add_tabular_scalars( + y[:, : len(self.output_scalars)], self.output_scalars + ) + if len(self.output_fields) > 0: + new_dset.add_tabular_fields( + y[:, len(self.output_scalars) :], self.output_fields + ) + dataset.merge_samples(new_dset) + return dataset + + def __sklearn_is_fitted__(self): + """Check if the wrapped scikit-learn model is fitted. + + Returns: + bool: True if the model is fitted, False otherwise. + """ + return hasattr(self, "_is_fitted") and self._is_fitted + + def __repr__(self): + """String representation of the wrapper, showing the underlying sklearn block.""" + return f"{self.__class__.__name__}({self.sklearn_block.__repr__()})" + + def __str__(self): + """String representation of the wrapper, showing the underlying sklearn block.""" + return f"{self.__class__.__name__}({self.sklearn_block.__str__()})" + + +class WrappedSklearnTransform(PlaidWrapper, TransformerMixin): + """Wrapper for scikit-learn Transformer blocks to operate on PLAID objects in a Pipeline. + + This class allows you to use any sklearn Transformer (e.g. PCA, StandardScaler) in a Pipeline where all steps + accept and return PLAID Dataset objects. The transform and inverse_transform methods take a Dataset and return a new Dataset. + """ + + def transform(self, dataset: Dataset): + """Transform the dataset using the wrapped sklearn transformer. + + Args: + dataset (Dataset): The dataset to transform. + + Returns: + Dataset: The transformed PLAID dataset. + """ + X, _ = self._extract_X_y_from_plaid(dataset) + X_transformed = self.sklearn_block.transform(X) + return self._convert_y_to_plaid(X_transformed, dataset) + + def inverse_transform(self, dataset: Dataset): + """Inverse transform the dataset using the wrapped sklearn transformer. + + Args: + dataset (Dataset): The dataset to inverse transform. + + Returns: + Dataset: The inverse transformed PLAID dataset. + """ + # TODO: debug + X, _ = self._extract_X_y_from_plaid(dataset) + X_transformed = self.sklearn_block.inverse_transform(X) + return self._convert_y_to_plaid(X_transformed, dataset) + + ## Already defined by TransformerMixin + # def fit_transform(self, dataset:Dataset):... + + +class WrappedSklearnPredictor(PlaidWrapper, MetaEstimatorMixin): + """Wrapper for scikit-learn Predictor blocks to operate on PLAID objects in a Pipeline. + + This class allows you to use any sklearn predictor (e.g. GaussianProcessRegressor, RandomForestRegressor, etc.) in a Pipeline + where all steps accept and return PLAID Dataset objects. The predict and fit_predict methods take a Dataset and return a new Dataset. + """ + + def predict(self, dataset: Dataset): + """Predict the output for the given dataset using the wrapped sklearn predictor. + + Args: + dataset (Dataset): The dataset to predict. + + Returns: + Dataset: The predicted PLAID dataset. + """ + X, _ = self._extract_X_y_from_plaid(dataset) + y_pred = self.sklearn_block.predict(X) + return self._convert_y_to_plaid(y_pred, dataset) + + def fit_predict(self, dataset: Dataset): + """Fit the model to the dataset and predict the output using the wrapped sklearn predictor. + + Args: + dataset (Dataset): The dataset to fit the model on. + + Returns: + Dataset: The predicted PLAID dataset. + """ + self.fit(dataset) + return self.predict(dataset) + + +class WrappedSklearnClassifier(WrappedSklearnPredictor, ClassifierMixin): + """Wrapper for scikit-learn Classifier blocks to operate on PLAID objects in a Pipeline. + + Inherits from WrappedSklearnPredictor and ClassifierMixin. + """ + + pass + + +class WrappedSklearnRegressor(WrappedSklearnPredictor, RegressorMixin): + """Wrapper for scikit-learn Regressor blocks to operate on PLAID objects in a Pipeline. + + Inherits from WrappedSklearnPredictor and RegressorMixin. + """ + + pass From a660b512c6b2ac618e0337cd887c89d929e57fe8 Mon Sep 17 00:00:00 2001 From: Xavier Roynard Date: Sat, 12 Jul 2025 23:46:32 +0200 Subject: [PATCH 09/15] (dataset) fix __getitem__ with slices --- src/plaid/containers/dataset.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/plaid/containers/dataset.py b/src/plaid/containers/dataset.py index 08e2c65..8cfdb26 100644 --- a/src/plaid/containers/dataset.py +++ b/src/plaid/containers/dataset.py @@ -466,7 +466,7 @@ def get_scalars_to_tabular( # -------------------------------------------------------------------------# def add_tabular_fields(self, tabular: np.ndarray, names: list[str] = None) -> None: - """Add tabular field data to the summary. + """Add tabular field data to the dataset. Args: tabular (np.ndarray): A 2D NumPy array containing tabular field data. @@ -1210,7 +1210,7 @@ def __getitem__(self, id: Union[int, slice]) -> Union[Sample, Self]: Seealso: This function can also be called using `__call__()`. """ - if isinstance(id, int): + if isinstance(id, (int, np.integer)): if id in self._samples: return self._samples[id] else: @@ -1221,6 +1221,10 @@ def __getitem__(self, id: Union[int, slice]) -> Union[Sample, Self]: if isinstance(id, slice): # TODO: check slice.stop is positive, if negative use len(dataset)+slice.stop ids = np.arange(slice.start, slice.stop, slice.step) + else: + raise TypeError( + f"Unsupported index type: {type(id)}, should be int or slice" + ) samples = [] for id in ids: if id in self._samples: From 0ccac03e31a861c42a86d200162b65bb78a86fd6 Mon Sep 17 00:00:00 2001 From: Xavier Roynard Date: Tue, 15 Jul 2025 16:27:27 +0200 Subject: [PATCH 10/15] (notebooks) add to Pipelines examples with or without PLAID wrapping --- .../notebooks/pca_gp_plaid_pipeline.ipynb | 8100 +++++++++++++++++ .../notebooks/pca_gp_sklearn_pipeline.ipynb | 7899 ++++++++++++++++ 2 files changed, 15999 insertions(+) create mode 100644 docs/source/notebooks/pca_gp_plaid_pipeline.ipynb create mode 100644 docs/source/notebooks/pca_gp_sklearn_pipeline.ipynb diff --git a/docs/source/notebooks/pca_gp_plaid_pipeline.ipynb b/docs/source/notebooks/pca_gp_plaid_pipeline.ipynb new file mode 100644 index 0000000..2575ed5 --- /dev/null +++ b/docs/source/notebooks/pca_gp_plaid_pipeline.ipynb @@ -0,0 +1,8100 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exemple of pipeline PCA-GP-PCA type" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import make_regression\n", + "\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", + "from sklearn.gaussian_process import GaussianProcessRegressor\n", + "\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.compose import ColumnTransformer, TransformedTargetRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "from plaid.containers.dataset import Dataset\n", + "from plaid.containers.sample import Sample\n", + "from plaid.problem_definition import ProblemDefinition\n", + "from plaid.wrappers.sklearn import WrappedSklearnTransform, WrappedSklearnRegressor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate some synthetic regression data" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "NB_SAMPLES = 103\n", + "NB_INPUT_SCALARS = 3\n", + "NB_OUTPUT_SCALARS = 5\n", + "FIELD_SIZE = 17\n", + "\n", + "X, y = make_regression(n_samples=NB_SAMPLES, n_features=NB_INPUT_SCALARS, n_targets=NB_OUTPUT_SCALARS + FIELD_SIZE, noise=0.1)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,\n", + " 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,\n", + " 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,\n", + " 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,\n", + " 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,\n", + " 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,\n", + " 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,\n", + " 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102])" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dset = Dataset()\n", + "samples = []\n", + "for sample_id in range(NB_SAMPLES):\n", + " sample = Sample()\n", + " for scalar_id in range(NB_INPUT_SCALARS):\n", + " sample.add_scalar(f\"input_scalar_{scalar_id}\", X[sample_id, scalar_id])\n", + " for scalar_id in range(NB_OUTPUT_SCALARS):\n", + " sample.add_scalar(f\"output_scalar_{scalar_id}\", y[sample_id, scalar_id])\n", + " sample.init_base(topological_dim=3, physical_dim=3)\n", + " sample.init_zone(np.array([0,0,0]))\n", + " sample.add_field(f\"output_field\", y[sample_id, NB_OUTPUT_SCALARS:])\n", + " samples.append(sample)\n", + "dset.add_samples(samples)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "# pr_def = ProblemDefinition()\n", + "# pr_def.set_split({\n", + "# 'train': np.arange(NB_SAMPLES * 0.8),\n", + "# 'test': np.arange(NB_SAMPLES * 0.8,NB_SAMPLES),\n", + "# })\n", + "# pr_def.add_input_scalars_names([f\"input_scalar_{scalar_id}\" for scalar_id in range(NB_OUTPUT_SCALARS)])\n", + "# pr_def.add_output_scalars_names([f\"output_scalar_{scalar_id}\" for scalar_id in range(NB_OUTPUT_SCALARS)])\n", + "# pr_def.add_output_fields_names(['output_field'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PCA-GP-PCA as pipeline with PLAID inputs/outputs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Define the PCA for the shape embedding\n", + "\n", + "In this example we only apply PCA to the first 8 columns\n", + "\n", + "The last two columns are unchanged" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n" + ] + }, + { + "data": { + "text/html": [ + "
WrappedSklearnTransform(PCA(n_components=2))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "WrappedSklearnTransform(PCA(n_components=2))" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "### old version\n", + "# feats_to_reduce = list(range(NB_INPUT_SCALARS))\n", + "# preprocessor = ColumnTransformer(\n", + "# transformers=[\n", + "# (\n", + "# \"pca\",\n", + "# WrappedSklearnTransform(PCA(n_components=2), in_keys=['scalar::all']),\n", + "# feats_to_reduce,\n", + "# ),\n", + "# ],\n", + "# remainder=\"passthrough\",\n", + "# )\n", + "\n", + "### new version\n", + "preprocessor = WrappedSklearnTransform(PCA(n_components=2), in_keys=[f\"scalar::input_scalar_{scalar_id}\" for scalar_id in range(NB_INPUT_SCALARS)])\n", + "preprocessor" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== In <_extract_X_y_from_plaid> of self.sklearn_block=PCA(n_components=2)\n", + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(X)=\n", + "len(X)=3\n", + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(y)=\n", + "len(y)=0\n" + ] + }, + { + "data": { + "text/html": [ + "
WrappedSklearnTransform(PCA(n_components=2))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "WrappedSklearnTransform(PCA(n_components=2))" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preprocessor.fit(dataset=dset)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preprocessor._is_fitted" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== In <_extract_X_y_from_plaid> of self.sklearn_block=PCA(n_components=2)\n", + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(X)=\n", + "len(X)=3\n", + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(y)=\n", + "len(y)=0\n" + ] + }, + { + "data": { + "text/plain": [ + "(True,\n", + " ['input_scalar_0',\n", + " 'input_scalar_1',\n", + " 'input_scalar_2',\n", + " 'output_scalar_0',\n", + " 'output_scalar_1',\n", + " 'output_scalar_2',\n", + " 'output_scalar_3',\n", + " 'output_scalar_4'],\n", + " ['output_field'])" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out_dset = preprocessor.transform(dset)\n", + "id(dset) == id(out_dset), dset.get_scalar_names(), dset.get_field_names()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiwAAAGdCAYAAAAxCSikAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAQnRJREFUeJzt3XdYU3ffBvD7JIEEEFBBEQVRXOBEQZmpnbbirLWu1tHWKiqCtX1afbTV2mGnVVTc2mpddS867LAGcCK4cIuCCiIoewSS8/7Rt7wvdZQgcDLuz3XlD09OTu/zq21uzvckCKIoiiAiIiIyYjKpAxARERH9GxYWIiIiMnosLERERGT0WFiIiIjI6LGwEBERkdFjYSEiIiKjx8JCRERERo+FhYiIiIyeQuoANUWv1+PWrVuwt7eHIAhSxyEiIqIqEEUR+fn5aNq0KWSyh19HMZvCcuvWLbi7u0sdg4iIiKohLS0Nbm5uD33ebAqLvb09gL9O2MHBQeI0REREVBV5eXlwd3eveB9/GLMpLH+PgRwcHFhYiIiITMy/3c7Bm26JiIjI6LGwEBERkdFjYSEiIiKjx8JCRERERo+FhYiIiIweCwsREREZPRYWIiIiMnosLERERGT0WFiIiIjI6LGwEBERkdFjYSEiIiKjx8JCRERERo+F5V/8fDYDERsTkV9SJnUUIiIii2U2v625NhRrdfjv9tPILtTi1I0cLBrRDR2bOUodi4iIyOLwCssj2FjLsXyUL5o6qnAtuwiDouOx7tA1iKIodTQiIiKLwsLyL3w9GmJfhBrPejeGVqfH+7vOYtKGE8jjiIiIiKjOsLBUQQM7a6wY5YeZfbyhkAmIOZ2BPlEanEzLkToaERGRRWBhqSJBEDBW7YmtE4Lg1sAGaXeLMXhpPFbHpnBEREREVMtYWAzk414f+yLUeL6DC8p0IubsTca4dQnIKdJKHY2IiMhssbBUg6ONFZa+6osP+3eAtVyG/cm30ScqFidS70kdjYiIyCyxsFSTIAgYHdQC2yYEwcPJFjdzijFk6SEsP3gFej1HRERERDWJheUxdXJzxJ7JIejT2RXlehGfxpzH2LXHcbeQIyIiIqKawsJSAxxUVlg0vCs+ebEjrBUy/H4+E32iNDh27a7U0YiIiMwCC0sNEQQBr/h7YOfEYHg62yE9twTDlh/G4j8uc0RERET0mFhYalj7pg7YPTkEA32aQqcX8eXPFzB6zVFkFZRKHY2IiMhksbDUgnpKBb4Z6oMvXuoMlZUMmktZCF2gwaEr2VJHIyIiMkksLLVEEAQM6e6OXZNC0LpxPWTml+KVlYex4NdL0HFEREREZBAWllrWrok9docHY7CvG/Qi8M2vFzFy1RFk5pdIHY2IiMhksLDUAVtrBb56uQu+frkLbKzkiL+SjdAFGsReypI6GhERkUlgYalDL/m6Yc/kELRzsUdWgRYjVx/B179cQLlOL3U0IiIio8bCUsdaN66HXeHBGN7DHaIILPz9MkasPIKMXI6IiIiIHoaFRQIqKznmDuqMBcN8YGctx9GUuwiN0uDAhUypoxERERklFhYJDfBphj2TQ9De1QF3C7UYs+YYPvvxPMo4IiIiIqqEhUVino3qYfvEIIwM8AAALP3zCoYtP4xbOcUSJyMiIjIeLCxGQGUlx0cDOyL6lW6wVyqQcP0eQqM0+DX5ttTRiIiIjAILixEJ7eSKfRFqdHZzRE5RGcauPY6P9yZDW84RERERWTYWFiPT3MkWW8IC8VpwCwDAytgUvLzsENLuFkkbjIiISEIsLEZIqZBjVr8OWDbSFw4qBU6m5aBPlAY/ncmQOhoREZEkWFiM2PMdmiAmUg0f9/rIKylH2PcJmL37LErLdVJHIyIiqlMsLEbOrcFfI6JxT3gCAL6Nv4bBSw7henahxMmIiIjqDguLCbCSy/DfUG+sHuOH+rZWOH0zF32iYrH31C2poxEREdUJFhYT8rSXC2Ii1PDzaICC0nKEb0jEjB2nUVLGEREREZk3FhYT07S+DTaNC8DEJ1sBANYfScWL0fG4eqdA4mRERES1h4XFBCnkMrz7ghe+e70HnOyscS49D30XxmJn4k2poxEREdUKFhYT1rNtI8REqhHg2RBFWh2mbE7Ce1tPoVjLEREREZkXFhYT5+KgwvqxAYh4pg0EAdh8PA0DF8fhcma+1NGIiIhqDAuLGZDLBEx9ri2+f8MfzvWUuHA7H/0WxmFrwg2poxEREdUIFhYzEtzaGTGRIQhu7YTiMh3e2XISU39IQpG2XOpoREREj4WFxcw0tldh7ev+ePu5tpAJwPYTN9FvYSzOZ+RJHY2IiKjaWFjMkFwmYPIzbbDhzQC4OChx5U4hBiyKw6ajqRBFUep4REREBmNhMWMBnk6IiVCjZ9tGKC3XY9r204jclISCUo6IiIjItLCwmDmnekqsGdMd773gBblMwO6Tt9BvYSzO3sqVOhoREVGVsbBYAJlMwIQnW2HzuAC4OqqQklWIF6Pjse7wdY6IiIjIJLCwWBC/Fg0RE6HGM16NoS3X4/2dZxC+IRF5JWVSRyMiInokFhYL08DOGitH+2FmH28oZAL2nU5H36hYnLqRI3U0IiKih2JhsUCCIGCs2hNbwgLRrL4NUu8W4aUl8VgTl8IRERERGaVqFZbo6Gi0bNkSKpUKvr6+0Gg0VXpdXFwcFAoFfHx87ntu27ZtaN++PZRKJdq3b48dO3ZUJxoZoGvzBoiJUKNXexeU6UR8uCcZ49clILeIIyIiIjIuBheWzZs3Y8qUKZgxYwYSExOhVqvRu3dvpKamPvJ1ubm5GDVqFJ555pn7njt06BCGDh2KkSNH4uTJkxg5ciSGDBmCI0eOGBqPDORoa4VlI30xu197WMtl+CX5NkKjNEhMvSd1NCIiogqCaOAMwN/fH926dcOSJUsqtnl7e2PgwIGYO3fuQ183bNgwtGnTBnK5HDt37kRSUlLFc0OHDkVeXh5+/PHHim0vvPACGjRogI0bN1YpV15eHhwdHZGbmwsHBwdDTon+1+kbuZi04QRS7xZBIRPw3gteeCOkJWQyQepoRERkpqr6/m3QFRatVouEhAT06tWr0vZevXohPj7+oa9bs2YNrly5glmzZj3w+UOHDt13zOeff/6RxywtLUVeXl6lBz2eTm6O2BsRgj6dXFGuF/FJzDmMXXsc9wq1UkcjIiILZ1BhycrKgk6ng4uLS6XtLi4uyMjIeOBrLl26hGnTpmH9+vVQKBQP3CcjI8OgYwLA3Llz4ejoWPFwd3c35FToIRxUVlg0ois+HtgR1goZfj+fidAoDY5fuyt1NCIismDVuulWECqPCERRvG8bAOh0OowYMQIffvgh2rZtWyPH/Nv06dORm5tb8UhLSzPgDOhRBEHAqwEe2DExCC2d7ZCeW4Khyw8j+sBl6PX8FBEREdU9gwqLs7Mz5HL5fVc+MjMz77tCAgD5+fk4fvw4wsPDoVAooFAoMGfOHJw8eRIKhQK///47AKBJkyZVPubflEolHBwcKj2oZnVo6og9k0MwwKcpdHoRX/x0AWO+PYasglKpoxERkYUxqLBYW1vD19cX+/fvr7R9//79CAoKum9/BwcHnD59GklJSRWPsLAwtGvXDklJSfD39wcABAYG3nfMX3755YHHpLpVT6nA/KE++PylTlAqZDh48Q5CF2hw+Gq21NGIiMiCPPimkkeYOnUqRo4cCT8/PwQGBmL58uVITU1FWFgYgL9GNTdv3sTatWshk8nQsWPHSq9v3LgxVCpVpe2RkZF44okn8Pnnn2PAgAHYtWsXfv31V8TGxj7m6VFNEAQBQ7s3h497A0zacAKXMwswYsVhTHm2LSY91RpyfoqIiIhqmcH3sAwdOhTz58/HnDlz4OPjg4MHDyImJgYeHh4AgPT09H/9TpZ/CgoKwqZNm7BmzRp07twZ3377LTZv3lxxBYaMQ7sm9tgdHoyXurlBLwLz9l/EqNVHkJlfInU0IiIycwZ/D4ux4vew1K2tCTfw/s4zKC7TwbmeEguG+SC4tbPUsYiIyMTUyvewEP1tsK8b9kwORjsXe2QVlOLVVUcw75cL0PFTREREVAtYWKjaWje2x85JwRjW3R2iCET9fhkjVhzG7TyOiIiIqGaxsNBjsbGW47OXOmPBMB/YWctxJOUuei/Q4MCFTKmjERGRGWFhoRoxwKcZ9kwOgberA+4WajFmzTF8/tN5lOv0UkcjIiIzwMJCNcazUT3smBiEkQF/fWJsyYErGLb8MG7lFEucjIiITB0LC9UolZUcHw3siMUjusFeqcDx6/cQGqXBb+duSx2NiIhMGAsL1Yo+nV2xNyIEnZo5IqeoDG98dxyf7EuGtpwjIiIiMhwLC9UaDyc7bJ0QiDFBLQAAKzQpGLLsENLuFkkbjIiITA4LC9UqpUKO2f07YNlIXzioFEhKy0GfKA1+Ppvx7y8mIiL6XywsVCee79AE+yLU8HGvj7yScoxfl4DZu8+itFwndTQiIjIBLCxUZ9wb2uKH8YF4U90SAPBt/DUMXnII17MLJU5GRETGjoWF6pS1QoYZfdpj1Wg/1Le1wumbuegbFYt9p9KljkZEREaMhYUk8Yy3C2Ii1PDzaID80nJM2nACM3eeRkkZR0RERHQ/FhaSTNP6Ntg4LgATn2wFAPj+cCpejI7H1TsFEicjIiJjw8JCkrKSy/DuC1747vUeaGhnjXPpeei3MBa7km5KHY2IiIwICwsZhZ5tG+HHSDX8WzZEoVaHyE1JmLbtFIq1HBERERELCxkRFwcV1o/1R8TTrSEIwKZjaRi4OA6XM/OljkZERBJjYSGjopDLMLVXO6x73R/O9ZS4cDsf/RbGYWvCDamjERGRhFhYyCiFtHFGTGQIgls7obhMh3e2nMTbP5xEkbZc6mhERCQBFhYyWo3tVVj7uj+mPtcWMgHYduIG+i+Kw4UMjoiIiCwNCwsZNblMQMQzbbDhzQC4OChxObMA/RfFYvOxVIiiKHU8IiKqIywsZBICPJ0QE6HGE20bobRcj/e2ncZbm5NQUMoRERGRJWBhIZPhVE+Jb8d0x7svtINcJmBn0i30XxiL5Ft5UkcjIqJaxsJCJkUmEzDxydbYPC4Aro4qXM0qxMDoOHx/+DpHREREZoyFhUySX4uGiIlQ4xmvxtCW6zFz5xmEb0xEXkmZ1NGIiKgWsLCQyWpgZ42Vo/0wI9QbCpmAfafS0TcqFqdv5EodjYiIahgLC5k0QRDw5hOe+CEsEM3q2yD1bhFeWhKPb+NSOCIiIjIjLCxkFro1b4CYCDV6tXeBVqfH7D3JCPs+AblFHBEREZkDFhYyG462Vlg20hez+rWHlVzAz2dvo89CDRJT70kdjYiIHhMLC5kVQRDwWnBLbJsQhOYNbXHjXjFeXnoIKzVXOSIiIjJhLCxkljq71cfeiBD06eSKcr2Ij/edw9jvjuNeoVbqaEREVA0sLGS2HFRWWDSiKz4a2BHWChl+O5+JPlEaJFy/K3U0IiIyEAsLmTVBEDAywAM7JgahpbMdbuWWYMiyw1hy4Ar0eo6IiIhMBQsLWYQOTR2xZ3IIBvg0hU4v4vOfzuO1b48hu6BU6mhERFQFLCxkMeopFZg/1AefDeoEpUKGPy/eQWiUBkeuZksdjYiI/gULC1kUQRAwrEdz7AoPRqtGdridV4rhKw5j4W+XoOOIiIjIaLGwkEXyauKAPZND8FI3N+hF4Ov9FzFq9RHcyeeIiIjIGLGwkMWytVbg6yFd8NXLXWBjJUfc5Wz0XqBB3OUsqaMREdE/sLCQxRvs64bd4cFo52KPrIJSvLrqCObtv8gRERGREWFhIQLQxsUeOycFY1h3d4giEPXbJbyy8jBu55VIHY2IiMDCQlTBxlqOz17qjAXDfGBnLcfhq3cRukCDPy/ekToaEZHFY2Eh+ocBPs2wZ3IIvF0dkF2oxejVR/HFT+dRrtNLHY2IyGKxsBA9gGejetgxMQivBjQHAEQfuILhKw4jPbdY4mRERJaJhYXoIVRWcnw8sBMWjeiKekoFjl27h9AFGvx+/rbU0YiILA4LC9G/6Nu5KfZFhKBTM0fcKyrD698ex6cx51DGERERUZ1hYSGqAg8nO2ydEIgxQS0AAMsPXsXLSw/hxr0iaYMREVkIFhaiKlIq5JjdvwOWvuoLB5UCSWk5CF2gwc9nM6SORkRk9lhYiAz0Qscm2BehRhf3+sgrKcf4dQn4cM9ZaMs5IiIiqi0sLETV4N7QFlvGB+JNdUsAwJq4axi8NB6p2RwRERHVBhYWomqyVsgwo097rBzlh/q2Vjh1Ixd9ojSIOZ0udTQiIrPDwkL0mJ5t74KYCDV8PRogv7QcE9efwPs7z6CkTCd1NCIis8HCQlQDmta3waZxAZjwZCsAwLrD1zEoOh4pWYUSJyMiMg8sLEQ1xEouw3sveOHb17qjoZ01ktPz0DdKg11JN6WORkRk8lhYiGrYk+0aIyZCjR4tG6JQq0PkpiRM336KIyIiosfAwkJUC5o4qrBhrD8mP90aggBsPJqGgYvjcDmzQOpoREQmiYWFqJYo5DK83asd1r3uD+d6SpzPyEe/hbHYlnBD6mhERCaHhYWoloW0cUZMZAiCWjmhuEyHt7ecxDtbTqJIWy51NCIik8HCQlQHGtursO4Nf7z1bFvIBGBrwg0MWBSHi7fzpY5GRGQSWFiI6ohcJiDy2TZYPzYAje2VuJRZgP6LYrH5WCpEUZQ6HhGRUWNhIapjga2cEBOphrqNM0rK9Hhv22m8tTkJBaUcERERPQwLC5EEnOsp8d1rPfDuC+0glwnYmXQL/RfGIvlWntTRiIiMEgsLkURkMgETn2yNTeMC4OqowtWsQgyMjsP6I9c5IiIi+gcWFiKJdW/REPsi1HjaqzG05XrM2HEG4RsTkV9SJnU0IiKjwcJCZAQa2llj5Sg//DfUCwqZgH2n0tF3YSzO3MyVOhoRkVFgYSEyEjKZgHFPtMIPYYFoVt8G17OLMCg6Ht/FX+OIiIgsHgsLkZHp1rwBYiLUeK69C7Q6PWbtPosJ359AbjFHRERkuVhYiIyQo60Vlo/0xQd928NKLuCnsxnoE6VBUlqO1NGIiCRRrcISHR2Nli1bQqVSwdfXFxqN5qH7xsbGIjg4GE5OTrCxsYGXlxe++eab+/abP38+2rVrBxsbG7i7u+Ott95CSUlJdeIRmQVBEPB6SEtsmxCE5g1tceNeMQYvicdKzVWOiIjI4hhcWDZv3owpU6ZgxowZSExMhFqtRu/evZGamvrA/e3s7BAeHo6DBw/i3LlzmDlzJmbOnInly5dX7LN+/XpMmzYNs2bNwrlz57Bq1Sps3rwZ06dPr/6ZEZmJzm71sTciBKGdmqBcL+Ljfefw5trjyCnSSh2NiKjOCKKBP6r5+/ujW7duWLJkScU2b29vDBw4EHPnzq3SMQYNGgQ7OzusW7cOABAeHo5z587ht99+q9jn7bffxtGjRx959eb/y8vLg6OjI3Jzc+Hg4GDAGRGZBlEU8f2RVHy0Nxnacj2aOqqwcERX+Ho0lDoaEVG1VfX926ArLFqtFgkJCejVq1el7b169UJ8fHyVjpGYmIj4+Hj07NmzYltISAgSEhJw9OhRAMDVq1cRExODPn36PPQ4paWlyMvLq/QgMmeCIGBkgAd2TAxCS2c73MotwZBlh7H0zyvQ6zkiIiLzZlBhycrKgk6ng4uLS6XtLi4uyMjIeORr3dzcoFQq4efnh0mTJmHs2LEVzw0bNgwfffQRQkJCYGVlhVatWuGpp57CtGnTHnq8uXPnwtHRseLh7u5uyKkQmawOTR2xZ3II+ndpCp1exGc/nsfr3x1DdkGp1NGIiGpNtW66FQSh0p9FUbxv2z9pNBocP34cS5cuxfz587Fx48aK5w4cOIBPPvkE0dHROHHiBLZv3469e/fio48+eujxpk+fjtzc3IpHWlpadU6FyCTVUyqwYJgP5g7qBKVChgMX7iA0SoMjV7OljkZEVCsUhuzs7OwMuVx+39WUzMzM+666/FPLli0BAJ06dcLt27cxe/ZsDB8+HADw/vvvY+TIkRVXXTp16oTCwkKMGzcOM2bMgEx2f69SKpVQKpWGxCcyK4IgYHiP5ujavD4mrT+BK3cKMXzFYUx9ri0mPtkaMtmjf4ggIjIlBl1hsba2hq+vL/bv319p+/79+xEUFFTl44iiiNLS/7t8XVRUdF8pkcvlEEWRH98k+hdeTRywOzwEg7o1g14EvvrlIkavOYo7+RwREZH5MOgKCwBMnToVI0eOhJ+fHwIDA7F8+XKkpqYiLCwMwF+jmps3b2Lt2rUAgMWLF6N58+bw8vIC8Nf3snz11VeYPHlyxTH79euHefPmoWvXrvD398fly5fx/vvvo3///pDL5TVxnkRmzU6pwLwhPgj0dMIHu85CcykLoVEaLBjqg6DWzlLHIyJ6bAYXlqFDhyI7Oxtz5sxBeno6OnbsiJiYGHh4eAAA0tPTK30ni16vx/Tp05GSkgKFQoFWrVrhs88+w/jx4yv2mTlzJgRBwMyZM3Hz5k00atQI/fr1wyeffFIDp0hkOV72c4ePe31M2nACF28X4JVVRxDxdBtEPNMGco6IiMiEGfw9LMaK38NC9H+KtTrM3n0Wm4//dTN6gGdDLBjWFS4OKomTERFVVivfw0JEpsHGWo7PB3fG/KE+sLWW4/DVuwhdoMHBi3ekjkZEVC0sLERmbGDXZtg7OQTerg7ILtRi9Jqj+PLn8yjX6aWORkRkEBYWIjPn2agedkwMwiv+zSGKwOI/rmD4isNIzy2WOhoRUZWxsBBZAJWVHJ+82AmLRnRFPaUCx67dQ+gCDf44nyl1NCKiKmFhIbIgfTs3xd7JIejYzAH3isrw2rfHMDfmHMo4IiIiI8fCQmRhWjjbYduEIIwJagEAWHbwKoYsO4Qb94qkDUZE9AgsLEQWSKmQY3b/Dlj6ajfYqxRITM1Bn6hY/HL20b/ElIhIKiwsRBbshY6uiIlQo4t7feQWl2HcugTM2ZMMbTlHRERkXFhYiCyce0NbbBkfiLEhf/2C0tVxKRi8NB6p2RwREZHxYGEhIlgrZJjZtz1WjvKDo40VTt3IRZ8oDX48nS51NCIiACwsRPT/PNveBTGRavh6NEB+aTkmrD+BD3adQUmZTupoRGThWFiIqJJm9W2waVwAwnq2AgCsPXQdLy2JR0pWocTJiMiSsbAQ0X2s5DJM6+2FNa91R0M7a5y9lYd+C2Ox++QtqaMRkYViYSGih3qqXWPERKjRo0VDFJSWI2JjIqZvP80RERHVORYWInqkJo4qbHjTH5Ofbg1BADYeTcXAxXG4nFkgdTQisiAsLET0rxRyGd7u1Q5rX+8B53rWOJ+Rj/6LYrH9xA2poxGRhWBhIaIqU7dphJgINYJaOaFIq8PUH07iP1tOokhbLnU0IjJzLCxEZJDGDiqse8Mfbz3bFjIB2JJwAwMWxeHi7XypoxGRGWNhISKDyWUCIp9tg/VjA9DYXolLmQXovygWPxxPgyiKUscjIjPEwkJE1RbYygkxkWqo2zijpEyPd7eewtQfTqKwlCMiIqpZLCxE9Fic6ynx3Ws98J/n20EuE7Aj8Sb6LYzFufQ8qaMRkRlhYSGixyaTCZj0VGtsGheAJg4qXM0qxIDFcVh/5DpHRERUI1hYiKjGdG/REDGRajzVrhG05XrM2HEGkzcmIr+kTOpoRGTiWFiIqEY1tLPGqtHd8d9QLyhkAvaeSke/hbE4czNX6mhEZMJYWIioxslkAsY90QqbxweiWX0bXMsuwqDoeHwXf40jIiKqFhYWIqo1vh4NsC8iBM+1d4FWp8es3Wcxcf0J5BZzREREhmFhIaJaVd/WGstH+uKDvu1hJRfw45kM9F2owcm0HKmjEZEJYWEholonCAJeD2mJrWFBcG9og7S7xRi8NB6rYlM4IiKiKmFhIaI608W9PvZOVqN3xyYo04n4aG8y3lybgJwirdTRiMjIsbAQUZ1ytLFC9Cvd8NGADrCWy/DrudvoExWLhOv3pI5GREaMhYWI6pwgCBgZ2ALbJwahhZMtbuYUY8iyQ1j65xXo9RwREdH9WFiISDIdmzliz+QQ9OvSFDq9iM9+PI/XvzuGu4UcERFRZSwsRCQpe5UVoob5YO6gTlAqZDhw4Q5CF2hwNOWu1NGIyIiwsBCR5ARBwPAezbFzUjA8G9khI68Ew5YfwqLfL3FEREQAWFiIyIh4uzpgT3gIBnVtBr0IfPXLRYxecxR38kuljkZEEmNhISKjYqdUYN5QH3w5uDNUVjJoLmUhNEqD+CtZUkcjIgmxsBCRUXrZzx17wkPQ1qUe7uSX4tWVRzD/14vQcUREZJFYWIjIaLVxsceuSSEY4ucGvQjM//USXl15BJl5JVJHI6I6xsJCREbNxlqOLwZ3wTdDu8DWWo5DV7MRGqWB5tIdqaMRUR1iYSEik/BiVzfsmRwCryb2yCrQYtTqo/jq5wso1+mljkZEdYCFhYhMRqtG9bBzUjBG+DeHKAKL/riMESuOID23WOpoRFTLWFiIyKSorOT49MVOWDi8K+opFTh67S5CF2jwx/lMqaMRUS1iYSEik9SvS1PsnRyCjs0ccK+oDK99ewxzY86hjCMiIrPEwkJEJquFsx22TQjCmKAWAIBlB69i6LJDuJnDERGRuWFhISKTplTIMbt/Byx9tRvsVQqcSM1B6AIN9iffljoaEdUgFhYiMgsvdHRFTIQaXdwckVtchjfXHsdHe5OhLeeIiMgcsLAQkdlwb2iLLWFBeCOkJQBgVWwKXl4aj7S7RRInI6LHxcJCRGbFWiHD+33bY8UoPzjaWOHkjVyERmnw05l0qaMR0WNgYSEis/RcexfERKrRrXl95JeUI+z7E5i16wxKy3VSRyOiamBhISKz1ay+DTaPD8T4np4AgO8OXcdLS+JxLatQ4mREZCgWFiIya1ZyGab39saaMd3RwNYKZ27moe/CWOw5eUvqaERkABYWIrIIT3k1RkykGj1aNERBaTkmb0zEf3ecRkkZR0REpoCFhYgshqujDTa86Y/JT7eGIAAbjqRi4OI4XLlTIHU0IvoXLCxEZFEUchne7tUOa1/vAed61jifkY9+C2OxI/GG1NGI6BFYWIjIIqnbNEJMhBqBnk4o0urw1uaTeHfrSRRrOSIiMkYsLERksRo7qPD9WH9MebYNBAH44fgN9F8Ui0u386WORkT/wMJCRBZNLhMw5dm2WD/WH43slbiUWYB+i2Lxw/E0iKIodTwi+l8sLEREAIJaOePHSDXUbZxRUqbHu1tP4e0fTqKwtFzqaEQEFhYiogrO9ZT47rUe+M/z7SATgO2JN9F/USzOpedJHY3I4rGwEBH9PzKZgElPtcamcYFo4qDClTuFGLg4DhuOpHJERCQhFhYiogfo0bIhYiLVeLJdI5SW6/HfHacRsSkJ+SVlUkcjskgsLERED9HQzhqrR3fH9N5ekMsE7Dl5C/0WxuLMzVypoxFZHBYWIqJHkMkEjO/ZCj+MD0Sz+ja4ll2EQdHxWHvoGkdERHWIhYWIqAp8PRpgX0QInvV2gVanxwe7zmLShhPILeaIiKgusLAQEVVRfVtrrBjli/f7toeVXEDM6Qz0XajBybQcqaMRmT0WFiIiAwiCgDdCWmJrWBDcG9og7W4xBi+Nx6rYFI6IiGoRCwsRUTV0ca+PvZPV6N2xCcp0Ij7am4xx6xKQU6SVOhqRWWJhISKqJkcbK0S/0g1zBnSAtVyG/cm30ScqFidS70kdjcjsVKuwREdHo2XLllCpVPD19YVGo3novrGxsQgODoaTkxNsbGzg5eWFb7755r79cnJyMGnSJLi6ukKlUsHb2xsxMTHViUdEVGcEQcCowBbYPjEIHk62uJlTjCFLD2HZn1eg13NERFRTFIa+YPPmzZgyZQqio6MRHByMZcuWoXfv3khOTkbz5s3v29/Ozg7h4eHo3Lkz7OzsEBsbi/Hjx8POzg7jxo0DAGi1Wjz33HNo3Lgxtm7dCjc3N6SlpcHe3v7xz5CIqA50bOaIvZNDMH37aew9lY65P57HkZS7+OrlLmhoZy11PCKTJ4gG3iXm7++Pbt26YcmSJRXbvL29MXDgQMydO7dKxxg0aBDs7Oywbt06AMDSpUvx5Zdf4vz587CysjIkToW8vDw4OjoiNzcXDg4O1ToGEdHjEkURG4+mYfaes9CW69HEQYWFI7qie4uGUkcjMkpVff82aCSk1WqRkJCAXr16Vdreq1cvxMfHV+kYiYmJiI+PR8+ePSu27d69G4GBgZg0aRJcXFzQsWNHfPrpp9DpdA89TmlpKfLy8io9iIikJggCRvg3x65JwfBsZIeMvBIMW34Yi/+4zBER0WMwqLBkZWVBp9PBxcWl0nYXFxdkZGQ88rVubm5QKpXw8/PDpEmTMHbs2Irnrl69iq1bt0Kn0yEmJgYzZ87E119/jU8++eShx5s7dy4cHR0rHu7u7oacChFRrfJ2dcCe8BAM6toMOr2IL3++gNFrjiKroFTqaEQmqVo33QqCUOnPoijet+2fNBoNjh8/jqVLl2L+/PnYuHFjxXN6vR6NGzfG8uXL4evri2HDhmHGjBmVxk7/NH36dOTm5lY80tLSqnMqRES1xk6pwNdDuuCLwZ2hspJBcykLoQs0OHQlW+poRCbHoJtunZ2dIZfL77uakpmZed9Vl39q2bIlAKBTp064ffs2Zs+ejeHDhwMAXF1dYWVlBblcXrG/t7c3MjIyoNVqYW19/w1rSqUSSqXSkPhERHVOEAQM8XOHj3t9TFp/ApcyC/DKysOIeKYNJj/dBnLZo3/YI6K/GHSFxdraGr6+vti/f3+l7fv370dQUFCVjyOKIkpL/++yaHBwMC5fvgy9Xl+x7eLFi3B1dX1gWSEiMjVtXeyxOzwEQ/zcoBeB+b9ewshVR5CZXyJ1NCKTYPBIaOrUqVi5ciVWr16Nc+fO4a233kJqairCwsIA/DWqGTVqVMX+ixcvxp49e3Dp0iVcunQJa9aswVdffYVXX321Yp8JEyYgOzsbkZGRuHjxIvbt24dPP/0UkyZNqoFTJCIyDjbWcnwxuAvmDekCW2s54q9kI3SBBrGXsqSORmT0DP4elqFDhyI7Oxtz5sxBeno6OnbsiJiYGHh4eAAA0tPTkZqaWrG/Xq/H9OnTkZKSAoVCgVatWuGzzz7D+PHjK/Zxd3fHL7/8grfeegudO3dGs2bNEBkZiffee68GTpGIyLgM6uaGzm71Eb7hBM5n5GPk6iOY9GRrTHm2DRRyfgE50YMY/D0sxorfw0JEpqakTIc5e5Ox4chfP+T1aNkQUcO6oomjSuJkRHWnVr6HhYiIao7KSo5PX+yEqOFdUU+pwNGUuwiN0uDAhUypoxEZHRYWIiKJ9e/SFHsmh6BDUwfcLdRizJpj+OzH8yjT6f/9xUQWgoWFiMgItHS2w7YJQRgd+Nf9gEv/vIJhyw/jZk6xxMmIjAMLCxGRkVBZyfHhgI5Y8ko32KsUSLh+D32iNPg1+bbU0Ygkx8JCRGRkendyxb7JanRxc0ROURnGrj2Oj/cmQ1vOERFZLhYWIiIj1NzJFlvCgvB68F/fEr4yNgUvLzuEtLtFEicjkgYLCxGRkbJWyPBBv/ZYMcoPjjZWOJmWg9AoDX46ky51NKI6x8JCRGTknmvvgn0RIejavD7yS8oR9v0JzNp1BqXlOqmjEdUZFhYiIhPg1sAWP4wPxPiengCA7w5dx0tL4nEtq1DiZER1g4WFiMhEWMllmN7bG2vGdEcDWyucuZmHvgtjsffULamjEdU6FhYiIhPzlFdjxESq0b1FAxSUliN8QyJm7DiNkjKOiMh8sbAQEZkgV0cbbHwzAOFPtYYgAOuPpGLg4jhcuVMgdTSiWsHCQkRkohRyGd55vh3Wvt4DTnbWOJ+Rj34LY7Ez8abU0YhqHAsLEZGJU7dphB8j1Qj0dEKRVocpm5Pw3tZTKNZyRETmg4WFiMgMNHZQ4fux/oh8pg0EAdh8PA0DFsfi0u18qaMR1QgWFiIiMyGXCXjrubZY/4Y/GtkrcfF2AfovisOW42lSRyN6bCwsRERmJqi1M2Ii1FC3cUZxmQ7/2XoKU39IQmFpudTRiKqNhYWIyAw1slfiu9d64J1ebSETgO0nbqL/olicz8iTOhpRtbCwEBGZKZlMQPjTbbDxzQC4OChx5U4hBiyKw8ajqRBFUep4RAZhYSEiMnP+nk6IiVDjyXaNUFqux/TtpxG5KQkFHBGRCWFhISKyAE71lFg9ujum9/aCXCZg98lb6BulwZmbuVJHI6oSFhYiIgshkwkY37MVfhgfiKaOKlzLLsKgJfFYd+gaR0Rk9FhYiIgsjK9HA8REqvGstwu05Xq8v+ssJm04gbySMqmjET0UCwsRkQWqb2uNFaN8MbOPN6zkAmJOZ6BvVCxO3ciROhrRA7GwEBFZKEEQMFbtiS1hQXBrYIPUu0V4aUk8VsemcERERoeFhYjIwvm418e+CDVe6NAEZToRc/YmY/y6BOQWcURExoOFhYiI4GhjhSWvdsOH/TvAWi7DL8m3ERqlwYnUe1JHIwLAwkJERP9LEASMDmqB7ROD4OFki5s5xRiy9BCWH7wCvZ4jIpIWCwsREVXSsZkj9k4OQd/OrijXi/g05jzGrj2Oe4VaqaORBWNhISKi+9irrLBweFd8+mInWCtk+P18JkKjNDh27a7U0chCsbAQEdEDCYKAEf7NsWtSMDyd7ZCeW4Jhyw9j8R+XOSKiOsfCQkREj+Tt6oA9k0PwYtdm0OlFfPnzBYz59hiyCkqljkYWhIWFiIj+lZ1SgXlDuuCLlzpDZSXDwYt3ELpAg8NXs6WORhaChYWIiKpEEAQM6e6O3eEhaNO4HjLzSzFixWEs+PUSdBwRUS1jYSEiIoO0dbHHrvBgvOzrBr0IfPPrRYxafQSZ+SVSRyMzxsJCREQGs7VW4MuXu2DekC6wtZYj7nI2QhfEIvZSltTRyEyxsBARUbUN6uaG3eEh8Gpij6yCUoxcfQRf/3IB5Tq91NHIzLCwEBHRY2nduB52TgrG8B7NIYrAwt8vY8TKI8jI5YiIag4LCxERPTaVlRxzB3VC1PCusLOW42jKXYRGaXDgQqbU0chMsLAQEVGN6d+lKfZGqNGhqQPuFmoxZs0xfP7TeZRxRESPiYWFiIhqVEtnO2ybEIRRgR4AgCUHrmDY8sO4lVMscTIyZSwsRERU41RWcswZ0BHRr3SDvVKBhOv3EBqlwW/nbksdjUwUCwsREdWa0E6u2BehRmc3R+QUleGN747j473J0JZzRESGYWEhIqJa1dzJFlvDgvB6cEsAwMrYFAxZdghpd4skTkamhIWFiIhqnbVChg/6tcfykb5wUCmQlJaDPlEa/Hw2Q+poZCJYWIiIqM706tAEMZFqdG1eH3kl5Ri/LgGzd59FablO6mhk5FhYiIioTrk1sMUP4wMx/glPAMC38dcweMkhXM8ulDgZGTMWFiIiqnNWchmmh3pj9Rg/NLC1wumbuegbFYt9p9KljkZGioWFiIgk87SXC2Ii1ejeogHyS8sxacMJzNx5GiVlHBFRZSwsREQkKVdHG2x8MwCTnmoFQQC+P5yKF6PjcfVOgdTRyIiwsBARkeQUchn+87wXvnutB5zsrHEuPQ/9FsZiV9JNqaORkWBhISIio/FE20aIiVQjwLMhCrU6RG5KwrRtp1Cs5YjI0rGwEBGRUXFxUGH92ABEPtMGggBsOpaGgYvjcDkzX+poJCEWFiIiMjpymYC3nmuL9W/4o5G9Ehdu56PfwjhsTbghdTSSCAsLEREZraDWzoiJUCOktTOKy3R4Z8tJvP3DSRRpy6WORnWMhYWIiIxaI3slvnu9B97p1RYyAdh24gb6LYzFhQyOiCwJCwsRERk9uUxA+NNtsPHNALg4KHHlTiH6L4rFpqOpEEVR6nhUB1hYiIjIZPh7OiEmQo2ebRuhtFyPadtPY8rmJBSUckRk7lhYiIjIpDjVU2LNmO6Y1tsLcpmAXUm30G9hLM7eypU6GtUiFhYiIjI5MpmAsJ6t8MP4ADR1VCElqxAvRsdj3eHrHBGZKRYWIiIyWb4eDbEvQo1nvRtDW67H+zvPIHxDIvJKyqSORjWMhYWIiExaAztrrBjlh5l9vKGQCdh3Oh19o2Jx6kaO1NGoBrGwEBGRyRMEAWPVntg6IQhuDWyQercILy2Jx5q4FI6IzAQLCxERmQ0f9/rYF6HG8x1cUKYT8eGeZIR9n4DcIo6ITB0LCxERmRVHGyssfdUXH/bvAGu5DD+fvY3QKA0SU+9JHY0eAwsLERGZHUEQMDqoBbZNCIKHky1u5hTj5aWHsOLgVY6ITBQLCxERma1Obo7YOzkEfTq7olwv4pOYcxj73XHcK9RKHY0MVK3CEh0djZYtW0KlUsHX1xcajeah+8bGxiI4OBhOTk6wsbGBl5cXvvnmm4fuv2nTJgiCgIEDB1YnGhERUSX2KissGt4Vn7zYEdYKGX47n4k+URocv3ZX6mhkAIMLy+bNmzFlyhTMmDEDiYmJUKvV6N27N1JTUx+4v52dHcLDw3Hw4EGcO3cOM2fOxMyZM7F8+fL79r1+/TreeecdqNVqw8+EiIjoIQRBwCv+Htg5MRiezna4lVuCocsPI/rAZej1HBGZAkE0cJjn7++Pbt26YcmSJRXbvL29MXDgQMydO7dKxxg0aBDs7Oywbt26im06nQ49e/bEa6+9Bo1Gg5ycHOzcubPKufLy8uDo6Ijc3Fw4ODhU+XVERGRZCkrLMXPHaexMugUA6Nm2EeYN6QKnekqJk1mmqr5/G3SFRavVIiEhAb169aq0vVevXoiPj6/SMRITExEfH4+ePXtW2j5nzhw0atQIb7zxhiGRiIiIDFJPqcA3Q33wxUudobKS4c+LdxAapcHhq9lSR6NHUBiyc1ZWFnQ6HVxcXCptd3FxQUZGxiNf6+bmhjt37qC8vByzZ8/G2LFjK56Li4vDqlWrkJSUVOUspaWlKC0trfhzXl5elV9LRESWTRAEDOnuji7u9TFpwwlczizAiBWHMeXZtpj0VGvIZYLUEekfqnXTrSBU/hcpiuJ92/5Jo9Hg+PHjWLp0KebPn4+NGzcCAPLz8/Hqq69ixYoVcHZ2rnKGuXPnwtHRseLh7u5u+IkQEZFFa9fEHrvDgzHY1w16EZi3/yJGrT6CzPwSqaPRPxh0D4tWq4WtrS22bNmCF198sWJ7ZGQkkpKS8Oeff1bpOB9//DHWrVuHCxcuICkpCV27doVcLq94Xq/XAwBkMhkuXLiAVq1a3XeMB11hcXd35z0sRERULdsSbmDmzjMoLtPBuZ4SC4b5ILh11X+QpuqplXtYrK2t4evri/3791favn//fgQFBVX5OKIoVpQNLy8vnD59GklJSRWP/v3746mnnkJSUtJDr5wolUo4ODhUehAREVXXS75u2DM5BO1c7JFVUIpXVx3BvP0XoeOniIyCQfewAMDUqVMxcuRI+Pn5ITAwEMuXL0dqairCwsIAANOnT8fNmzexdu1aAMDixYvRvHlzeHl5Afjre1m++uorTJ48GQCgUqnQsWPHSv+M+vXrA8B924mIiGpT68b1sCs8GB/uOYuNR9MQ9dslHLmajajhXeHioJI6nkUzuLAMHToU2dnZmDNnDtLT09GxY0fExMTAw8MDAJCenl7pO1n0ej2mT5+OlJQUKBQKtGrVCp999hnGjx9fc2dBRERUQ1RWcswd1BkBnk747/bTOJJyF6ELNJg31Ac92zaSOp7FMvh7WIwVv4eFiIhqWkpWISatP4Hk9L8+iTrhyVZ4+7m2UMj5m21qSq3cw0JERGRJWjrbYfvEIIwM+GuKsOTAFQxbfhi3coolTmZ5WFiIiIgeQWUlx0cDOyL6lW6wVypw/Po9hEZp8Pv521JHsygsLERERFUQ2skV+yLU6OzmiJyiMrz+7XF8si8ZZTq91NEsAgsLERFRFTV3ssWWsEC8FtwCALBCk4KXlx5C2t0iaYNZABYWIiIiAygVcszq1wHLRvrCQaVAUloO+kRp8PPZR/+KGno8LCxERETV8HyHJoiJVMPHvT7ySsoxfl0CPtxzFqXlOqmjmSUWFiIiompya/DXiGjcE54AgDVx1zB4ySGkZnNEVNNYWIiIiB6DlVyG/4Z6Y/UYPzSwtcLpm7noE6VBzOl0qaOZFRYWIiKiGvC0lwtiItXw82iA/NJyTFx/Au/vPIOSMo6IagILCxERUQ1xdbTBpnEBmPhkKwDAusPXMSg6HilZhRInM30sLERERDVIIZfh3Re88N3rPeBkZ43k9Dz0jdJgV9JNqaOZNBYWIiKiWtCzbSPERKoR4NkQhVodIjclYdq2UxwRVRMLCxERUS1xcVBh/dgARDzTBoIAbDqWhgGL4nA5M1/qaCaHhYWIiKgWyWUCpj7XFt+/4Q/nekpcuJ2PfgvjsC3hhtTRTAoLCxERUR0Ibu2MmMgQBLd2QnGZDm9vOYl3tpxEkbZc6mgmgYWFiIiojjS2V2Ht6/54+7m2kAnA1oQb6L8oDhcyOCL6NywsREREdUguEzD5mTbY8GYAXByUuJxZgAGLY7H5WCpEUZQ6ntFiYSEiIpJAgKcTYiLU6Nm2EUrK9Hhv22m8tTkJBaUcET0ICwsREZFEnOopsWZMd7z3ghfkMgE7k26h/8JYJN/Kkzqa0WFhISIikpBMJmDCk62weVwAXB1VuJpViIHRcfj+8HWOiP4fFhYiIiIj4NeiIWIi1HjGqzG05XrM3HkG4RsTkV9SJnU0o8DCQkREZCQa2Flj5Wg/zOzjDYVMwL5T6ei7MBanb+RKHU1yLCxERERGRBAEjFV7YktYIJrVt8H17CK8tCQe38alWPSIiIWFiIjICHVt3gAxEWr0au8CrU6P2XuSEfZ9AnKLLHNExMJCRERkpBxtrbBspC9m92sPa7kMP5+9jT4LNUhKy5E6Wp1jYSEiIjJigiBgTHBLbJsQhOYNbXHjXjEGL4nHSs1VixoRsbAQERGZgE5ujtgbEYI+nV1Rrhfx8b5zeHPtceQUaaWOVidYWIiIiEyEg8oKi4Z3xccDO8JaIcOv5zIRukCDhOt3pY5W61hYiIiITIggCHg1wAM7JgahpbMdbuWWYMiyw1hy4Ar0evMdEbGwEBERmaAOTR2xZ3IIBvg0hU4v4vOfzuP1744hu6BU6mi1goWFiIjIRNVTKjB/qA8+f6kTlAoZDly4g9AoDY5czZY6Wo1jYSEiIjJhgiBgaPfm2B0egtaN6+F2XimGrziMhb9dgs6MRkQsLERERGagXRN77A4Pxkvd3KAXga/3X8To1UdxJ988RkQsLERERGbC1lqBr4d0wVcvd4GNlRyxl7PQe4EG8ZezpI722FhYiIiIzMxgXzfsmRyMdi72yCooxSurjmDe/osmPSJiYSEiIjJDrRvbY1d4MIb3cIcoAlG/XcIrKw/jdl6J1NGqhYWFiIjITKms5Jg7qDMWDPOBnbUch6/eRegCDQ5evCN1NIOxsBAREZm5AT7NsGdyCLxdHZBdqMWo1UfxxU/nUa7TSx2tylhYiIiILIBno3rYMTEIIwM8AADRB65g+IrDSM8tljhZ1bCwEBERWQiVlRwfDeyIxSO6wV6pwLFr9xC6QIM/zmdKHe1fsbAQERFZmD6dXbE3IgSdmjniXlEZXvv2GObGnEOZEY+IWFiIiIgskIeTHbZOCMSYoBYAgGUHr2LIskO4ca9I2mAPwcJCRERkoZQKOWb374BlI33hoFIgMTUHoQs0+OVshtTR7sPCQkREZOGe79AE+yLU8HGvj7yScoxbl4AP95yFttx4RkQsLERERAT3hrb4YXwg3lS3BACsibuGwUvjkZptHCMiFhYiIiICAFgrZJjRpz1WjfZDfVsrnLqRiz5RGsScTpc6GgsLERERVfaMtwtiItTw82iA/NJyTFx/Au/vPIOSMp1kmVhYiIiI6D5N69tg47gATHyyFQBg3eHr2JV0U7I8Csn+yURERGTUrOQyvPuCF/w9nbAr6SZe9nWXLAsLCxERET1Sz7aN0LNtI0kzcCRERERERo+FhYiIiIweCwsREREZPRYWIiIiMnosLERERGT0WFiIiIjI6LGwEBERkdFjYSEiIiKjx8JCRERERo+FhYiIiIweCwsREREZPRYWIiIiMnosLERERGT0zOa3NYuiCADIy8uTOAkRERFV1d/v23+/jz+M2RSW/Px8AIC7u7vESYiIiMhQ+fn5cHR0fOjzgvhvlcZE6PV63Lp1C/b29hAEocaOm5eXB3d3d6SlpcHBwaHGjkuVcZ3rDte6bnCd6wbXuW7U5jqLooj8/Hw0bdoUMtnD71QxmyssMpkMbm5utXZ8BwcH/sdQB7jOdYdrXTe4znWD61w3amudH3Vl5W+86ZaIiIiMHgsLERERGT0Wln+hVCoxa9YsKJVKqaOYNa5z3eFa1w2uc93gOtcNY1hns7nploiIiMwXr7AQERGR0WNhISIiIqPHwkJERERGj4WFiIiIjB4LC4Do6Gi0bNkSKpUKvr6+0Gg0j9z/zz//hK+vL1QqFTw9PbF06dI6SmraDFnn7du347nnnkOjRo3g4OCAwMBA/Pzzz3WY1nQZ+vf5b3FxcVAoFPDx8andgGbE0LUuLS3FjBkz4OHhAaVSiVatWmH16tV1lNZ0GbrO69evR5cuXWBrawtXV1e89tpryM7OrqO0pungwYPo168fmjZtCkEQsHPnzn99TZ2/F4oWbtOmTaKVlZW4YsUKMTk5WYyMjBTt7OzE69evP3D/q1evira2tmJkZKSYnJwsrlixQrSyshK3bt1ax8lNi6HrHBkZKX7++efi0aNHxYsXL4rTp08XraysxBMnTtRxctNi6Dr/LScnR/T09BR79eoldunSpW7CmrjqrHX//v1Ff39/cf/+/WJKSop45MgRMS4urg5Tmx5D11mj0YgymUxcsGCBePXqVVGj0YgdOnQQBw4cWMfJTUtMTIw4Y8YMcdu2bSIAcceOHY/cX4r3QosvLD169BDDwsIqbfPy8hKnTZv2wP3fffdd0cvLq9K28ePHiwEBAbWW0RwYus4P0r59e/HDDz+s6WhmpbrrPHToUHHmzJnirFmzWFiqyNC1/vHHH0VHR0cxOzu7LuKZDUPX+csvvxQ9PT0rbYuKihLd3NxqLaO5qUphkeK90KJHQlqtFgkJCejVq1el7b169UJ8fPwDX3Po0KH79n/++edx/PhxlJWV1VpWU1addf4nvV6P/Px8NGzYsDYimoXqrvOaNWtw5coVzJo1q7Yjmo3qrPXu3bvh5+eHL774As2aNUPbtm3xzjvvoLi4uC4im6TqrHNQUBBu3LiBmJgYiKKI27dvY+vWrejTp09dRLYYUrwXms0vP6yOrKws6HQ6uLi4VNru4uKCjIyMB74mIyPjgfuXl5cjKysLrq6utZbXVFVnnf/p66+/RmFhIYYMGVIbEc1Cddb50qVLmDZtGjQaDRQKi/7fgUGqs9ZXr15FbGwsVCoVduzYgaysLEycOBF3797lfSwPUZ11DgoKwvr16zF06FCUlJSgvLwc/fv3x8KFC+sissWQ4r3Qoq+w/E0QhEp/FkXxvm3/tv+DtlNlhq7z3zZu3IjZs2dj8+bNaNy4cW3FMxtVXWedTocRI0bgww8/RNu2besqnlkx5O+0Xq+HIAhYv349evTogdDQUMybNw/ffvstr7L8C0PWOTk5GREREfjggw+QkJCAn376CSkpKQgLC6uLqBalrt8LLfpHKmdnZ8jl8vuaemZm5n3N8W9NmjR54P4KhQJOTk61ltWUVWed/7Z582a88cYb2LJlC5599tnajGnyDF3n/Px8HD9+HImJiQgPDwfw15uqKIpQKBT45Zdf8PTTT9dJdlNTnb/Trq6uaNasGRwdHSu2eXt7QxRF3LhxA23atKnVzKaoOus8d+5cBAcH4z//+Q8AoHPnzrCzs4NarcbHH3/Mq+A1RIr3Qou+wmJtbQ1fX1/s37+/0vb9+/cjKCjoga8JDAy8b/9ffvkFfn5+sLKyqrWspqw66wz8dWVlzJgx2LBhA+fPVWDoOjs4OOD06dNISkqqeISFhaFdu3ZISkqCv79/XUU3OdX5Ox0cHIxbt26hoKCgYtvFixchk8ng5uZWq3lNVXXWuaioCDJZ5bc2uVwO4P+uANDjk+S9sNZu5zURf39kbtWqVWJycrI4ZcoU0c7OTrx27ZooiqI4bdo0ceTIkRX7//1RrrfeektMTk4WV61axY81V4Gh67xhwwZRoVCIixcvFtPT0yseOTk5Up2CSTB0nf+JnxKqOkPXOj8/X3RzcxMHDx4snj17Vvzzzz/FNm3aiGPHjpXqFEyCoeu8Zs0aUaFQiNHR0eKVK1fE2NhY0c/PT+zRo4dUp2AS8vPzxcTERDExMVEEIM6bN09MTEys+Pi4MbwXWnxhEUVRXLx4sejh4SFaW1uL3bp1E//888+K50aPHi327Nmz0v4HDhwQu3btKlpbW4stWrQQlyxZUseJTZMh69yzZ08RwH2P0aNH131wE2Po3+f/j4XFMIau9blz58Rnn31WtLGxEd3c3MSpU6eKRUVFdZza9Bi6zlFRUWL79u1FGxsb0dXVVXzllVfEGzdu1HFq0/LHH3888v+5xvBeKIgir5ERERGRcbPoe1iIiIjINLCwEBERkdFjYSEiIiKjx8JCRERERo+FhYiIiIweCwsREREZPRYWIiIiMnosLERERGT0WFiIiIjI6LGwEBERkdFjYSEiIiKjx8JCRERERu9/AN5rm8S7FDeFAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(preprocessor.sklearn_block.explained_variance_ratio_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Define the output scaler for the output fields (MinMaxScaler + PCA)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "self.input_scalars=[]\n", + "self.input_fields=['output_field']\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "self.input_scalars=[]\n", + "self.input_fields=['output_field']\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n" + ] + }, + { + "data": { + "text/html": [ + "
Pipeline(steps=[('scaler',\n",
+       "                 WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=MinMaxScaler())),\n",
+       "                ('pca',\n",
+       "                 WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=PCA(n_components=9)))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('scaler',\n", + " WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=MinMaxScaler())),\n", + " ('pca',\n", + " WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=PCA(n_components=9)))])" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "postprocessor = Pipeline(\n", + " [\n", + " (\"scaler\", WrappedSklearnTransform(MinMaxScaler(), in_keys=[\"field::output_field\"])),\n", + " (\"pca\", WrappedSklearnTransform(PCA(n_components=9), in_keys=[\"field::output_field\"])),\n", + " ]\n", + ")\n", + "postprocessor" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== In <_extract_X_y_from_plaid> of self.sklearn_block=MinMaxScaler()\n", + "self.input_scalars=[]\n", + "self.input_fields=['output_field']\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(X)=\n", + "len(X)=1\n", + "self.input_scalars=[]\n", + "self.input_fields=['output_field']\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(y)=\n", + "len(y)=0\n", + "=== In <_extract_X_y_from_plaid> of self.sklearn_block=MinMaxScaler()\n", + "self.input_scalars=[]\n", + "self.input_fields=['output_field']\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(X)=\n", + "len(X)=1\n", + "self.input_scalars=[]\n", + "self.input_fields=['output_field']\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(y)=\n", + "len(y)=0\n", + "=== In <_extract_X_y_from_plaid> of self.sklearn_block=PCA(n_components=9)\n", + "self.input_scalars=[]\n", + "self.input_fields=['output_field']\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(X)=\n", + "len(X)=1\n", + "self.input_scalars=[]\n", + "self.input_fields=['output_field']\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(y)=\n", + "len(y)=0\n" + ] + }, + { + "data": { + "text/html": [ + "
Pipeline(steps=[('scaler',\n",
+       "                 WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=MinMaxScaler())),\n",
+       "                ('pca',\n",
+       "                 WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=PCA(n_components=9)))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('scaler',\n", + " WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=MinMaxScaler())),\n", + " ('pca',\n", + " WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=PCA(n_components=9)))])" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "postprocessor.fit(dset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Define the regressor\n", + "\n", + "Y = GP(transformer(X)) where transformer(X) = postprocessor(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=['output_scalar_0', 'output_scalar_1', 'output_scalar_2', 'output_scalar_3', 'output_scalar_4']\n", + "self.output_fields=['output_field']\n" + ] + }, + { + "data": { + "text/html": [ + "
TransformedTargetRegressor(check_inverse=False,\n",
+       "                           regressor=WrappedSklearnRegressor(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], out_keys=['field::output_field', 'scalar::output_scalar_0', 'scalar::output_scalar_1', 'scalar::output_scalar_2', 'scalar::output_scalar_3', 'scalar::output_scalar_4'], sklearn_block=GaussianProcessRegressor(n_restarts_optimizer=3)),\n",
+       "                           transformer=Pipeline(steps=[('scaler',\n",
+       "                                                        WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=MinMaxScaler())),\n",
+       "                                                       ('pca',\n",
+       "                                                        WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=PCA(n_components=9)))]))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "TransformedTargetRegressor(check_inverse=False,\n", + " regressor=WrappedSklearnRegressor(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], out_keys=['field::output_field', 'scalar::output_scalar_0', 'scalar::output_scalar_1', 'scalar::output_scalar_2', 'scalar::output_scalar_3', 'scalar::output_scalar_4'], sklearn_block=GaussianProcessRegressor(n_restarts_optimizer=3)),\n", + " transformer=Pipeline(steps=[('scaler',\n", + " WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=MinMaxScaler())),\n", + " ('pca',\n", + " WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=PCA(n_components=9)))]))" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "regressor = TransformedTargetRegressor(\n", + " regressor=WrappedSklearnRegressor(\n", + " GaussianProcessRegressor(n_restarts_optimizer=3),\n", + " in_keys=[f\"scalar::input_scalar_{scalar_id}\" for scalar_id in range(NB_INPUT_SCALARS)],\n", + " out_keys=[\"field::output_field\", *[f\"scalar::output_scalar_{scalar_id}\" for scalar_id in range(NB_OUTPUT_SCALARS)]],\n", + " ),\n", + " check_inverse=False,\n", + " transformer=postprocessor,\n", + ")\n", + "regressor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Combine to make the pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n" + ] + }, + { + "data": { + "text/html": [ + "
Pipeline(steps=[('preprocessor',\n",
+       "                 WrappedSklearnTransform(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], sklearn_block=PCA(n_components=2))),\n",
+       "                ('scaler',\n",
+       "                 WrappedSklearnTransform(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], sklearn_block=StandardScaler())),\n",
+       "                ('regressor',\n",
+       "                 Transfor...\n",
+       "                                            regressor=WrappedSklearnRegressor(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], out_keys=['field::output_field', 'scalar::output_scalar_0', 'scalar::output_scalar_1', 'scalar::output_scalar_2', 'scalar::output_scalar_3', 'scalar::output_scalar_4'], sklearn_block=GaussianProcessRegressor(n_restarts_optimizer=3)),\n",
+       "                                            transformer=Pipeline(steps=[('scaler',\n",
+       "                                                                         WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=MinMaxScaler())),\n",
+       "                                                                        ('pca',\n",
+       "                                                                         WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=PCA(n_components=9)))])))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('preprocessor',\n", + " WrappedSklearnTransform(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], sklearn_block=PCA(n_components=2))),\n", + " ('scaler',\n", + " WrappedSklearnTransform(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], sklearn_block=StandardScaler())),\n", + " ('regressor',\n", + " Transfor...\n", + " regressor=WrappedSklearnRegressor(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], out_keys=['field::output_field', 'scalar::output_scalar_0', 'scalar::output_scalar_1', 'scalar::output_scalar_2', 'scalar::output_scalar_3', 'scalar::output_scalar_4'], sklearn_block=GaussianProcessRegressor(n_restarts_optimizer=3)),\n", + " transformer=Pipeline(steps=[('scaler',\n", + " WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=MinMaxScaler())),\n", + " ('pca',\n", + " WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=PCA(n_components=9)))])))])" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = Pipeline(\n", + " steps=[\n", + " (\"preprocessor\", preprocessor),\n", + " (\"scaler\", WrappedSklearnTransform(StandardScaler(), in_keys=[f\"scalar::input_scalar_{scalar_id}\" for scalar_id in range(NB_INPUT_SCALARS)])),\n", + " (\"regressor\", regressor),\n", + " ]\n", + ")\n", + "model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fit the model" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== In <_extract_X_y_from_plaid> of self.sklearn_block=PCA(n_components=2)\n", + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(X)=\n", + "len(X)=3\n", + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(y)=\n", + "len(y)=0\n", + "=== In <_extract_X_y_from_plaid> of self.sklearn_block=StandardScaler()\n", + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(X)=\n", + "len(X)=3\n", + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(y)=\n", + "len(y)=0\n", + "=== In <_extract_X_y_from_plaid> of self.sklearn_block=StandardScaler()\n", + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(X)=\n", + "len(X)=3\n", + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "type(y)=\n", + "len(y)=0\n" + ] + }, + { + "ename": "ValueError", + "evalue": "This TransformedTargetRegressor estimator requires y to be passed, but the target y is None.", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mValueError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[58]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdset\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2\u001b[39m model\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\d606912\\.conda\\envs\\plaid_dev\\Lib\\site-packages\\sklearn\\base.py:1363\u001b[39m, in \u001b[36m_fit_context..decorator..wrapper\u001b[39m\u001b[34m(estimator, *args, **kwargs)\u001b[39m\n\u001b[32m 1356\u001b[39m estimator._validate_params()\n\u001b[32m 1358\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[32m 1359\u001b[39m skip_parameter_validation=(\n\u001b[32m 1360\u001b[39m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[32m 1361\u001b[39m )\n\u001b[32m 1362\u001b[39m ):\n\u001b[32m-> \u001b[39m\u001b[32m1363\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\d606912\\.conda\\envs\\plaid_dev\\Lib\\site-packages\\sklearn\\pipeline.py:661\u001b[39m, in \u001b[36mPipeline.fit\u001b[39m\u001b[34m(self, X, y, **params)\u001b[39m\n\u001b[32m 655\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._final_estimator != \u001b[33m\"\u001b[39m\u001b[33mpassthrough\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m 656\u001b[39m last_step_params = \u001b[38;5;28mself\u001b[39m._get_metadata_for_step(\n\u001b[32m 657\u001b[39m step_idx=\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m) - \u001b[32m1\u001b[39m,\n\u001b[32m 658\u001b[39m step_params=routed_params[\u001b[38;5;28mself\u001b[39m.steps[-\u001b[32m1\u001b[39m][\u001b[32m0\u001b[39m]],\n\u001b[32m 659\u001b[39m all_params=params,\n\u001b[32m 660\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m661\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_final_estimator\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mXt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mlast_step_params\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mfit\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 663\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\d606912\\.conda\\envs\\plaid_dev\\Lib\\site-packages\\sklearn\\base.py:1363\u001b[39m, in \u001b[36m_fit_context..decorator..wrapper\u001b[39m\u001b[34m(estimator, *args, **kwargs)\u001b[39m\n\u001b[32m 1356\u001b[39m estimator._validate_params()\n\u001b[32m 1358\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[32m 1359\u001b[39m skip_parameter_validation=(\n\u001b[32m 1360\u001b[39m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[32m 1361\u001b[39m )\n\u001b[32m 1362\u001b[39m ):\n\u001b[32m-> \u001b[39m\u001b[32m1363\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\d606912\\.conda\\envs\\plaid_dev\\Lib\\site-packages\\sklearn\\compose\\_target.py:253\u001b[39m, in \u001b[36mTransformedTargetRegressor.fit\u001b[39m\u001b[34m(self, X, y, **fit_params)\u001b[39m\n\u001b[32m 225\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Fit the model according to the given training data.\u001b[39;00m\n\u001b[32m 226\u001b[39m \n\u001b[32m 227\u001b[39m \u001b[33;03mParameters\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 250\u001b[39m \u001b[33;03m Fitted estimator.\u001b[39;00m\n\u001b[32m 251\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 252\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m253\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 254\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mThis \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.\u001b[34m__class__\u001b[39m.\u001b[34m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m estimator \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 255\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mrequires y to be passed, but the target y is None.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 256\u001b[39m )\n\u001b[32m 257\u001b[39m y = check_array(\n\u001b[32m 258\u001b[39m y,\n\u001b[32m 259\u001b[39m input_name=\u001b[33m\"\u001b[39m\u001b[33my\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 264\u001b[39m allow_nd=\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[32m 265\u001b[39m )\n\u001b[32m 267\u001b[39m \u001b[38;5;66;03m# store the number of dimension of the target to predict an array of\u001b[39;00m\n\u001b[32m 268\u001b[39m \u001b[38;5;66;03m# similar shape at predict\u001b[39;00m\n", + "\u001b[31mValueError\u001b[39m: This TransformedTargetRegressor estimator requires y to be passed, but the target y is None." + ] + } + ], + "source": [ + "model.fit(dset)\n", + "model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Predict on the training data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Other way to define the pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Define the regressor" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=[]\n", + "self.output_fields=[]\n", + "self.input_scalars=['input_scalar_0', 'input_scalar_1', 'input_scalar_2']\n", + "self.input_fields=[]\n", + "self.output_scalars=['output_scalar_0', 'output_scalar_1', 'output_scalar_2', 'output_scalar_3', 'output_scalar_4']\n", + "self.output_fields=['output_field']\n" + ] + }, + { + "data": { + "text/html": [ + "
Pipeline(steps=[('preprocessor',\n",
+       "                 WrappedSklearnTransform(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], sklearn_block=PCA(n_components=2))),\n",
+       "                ('scaler',\n",
+       "                 WrappedSklearnTransform(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], sklearn_block=StandardScaler())),\n",
+       "                ('regressor',\n",
+       "                 WrappedSklearnRegressor(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], out_keys=['field::output_field', 'scalar::output_scalar_0', 'scalar::output_scalar_1', 'scalar::output_scalar_2', 'scalar::output_scalar_3', 'scalar::output_scalar_4'], sklearn_block=GaussianProcessRegressor(n_restarts_optimizer=3)))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('preprocessor',\n", + " WrappedSklearnTransform(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], sklearn_block=PCA(n_components=2))),\n", + " ('scaler',\n", + " WrappedSklearnTransform(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], sklearn_block=StandardScaler())),\n", + " ('regressor',\n", + " WrappedSklearnRegressor(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], out_keys=['field::output_field', 'scalar::output_scalar_0', 'scalar::output_scalar_1', 'scalar::output_scalar_2', 'scalar::output_scalar_3', 'scalar::output_scalar_4'], sklearn_block=GaussianProcessRegressor(n_restarts_optimizer=3)))])" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "regressor = Pipeline(\n", + " steps=[\n", + " (\"preprocessor\", preprocessor),\n", + " (\"scaler\", WrappedSklearnTransform(\n", + " StandardScaler(),\n", + " in_keys=[f\"scalar::input_scalar_{scalar_id}\" for scalar_id in range(NB_INPUT_SCALARS)],\n", + " )),\n", + " (\"regressor\", WrappedSklearnRegressor(\n", + " GaussianProcessRegressor(n_restarts_optimizer=3),\n", + " in_keys=[f\"scalar::input_scalar_{scalar_id}\" for scalar_id in range(NB_INPUT_SCALARS)],\n", + " out_keys=[\"field::output_field\", *[f\"scalar::output_scalar_{scalar_id}\" for scalar_id in range(NB_OUTPUT_SCALARS)]],\n", + " )),\n", + " ]\n", + ")\n", + "regressor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Combine to make the pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
TransformedTargetRegressor(check_inverse=False,\n",
+       "                           regressor=Pipeline(steps=[('preprocessor',\n",
+       "                                                      WrappedSklearnTransform(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], sklearn_block=PCA(n_components=2))),\n",
+       "                                                     ('scaler',\n",
+       "                                                      WrappedSklearnTransform(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2...\n",
+       "                                                      WrappedSklearnRegressor(in_keys=['scalar::input_scalar_0', 'scalar::input_scalar_1', 'scalar::input_scalar_2'], out_keys=['field::output_field', 'scalar::output_scalar_0', 'scalar::output_scalar_1', 'scalar::output_scalar_2', 'scalar::output_scalar_3', 'scalar::output_scalar_4'], sklearn_block=GaussianProcessRegressor(n_restarts_optimizer=3)))]),\n",
+       "                           transformer=Pipeline(steps=[('scaler',\n",
+       "                                                        WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=MinMaxScaler())),\n",
+       "                                                       ('pca',\n",
+       "                                                        WrappedSklearnTransform(in_keys=['field::output_field'], sklearn_block=PCA(n_components=9)))]))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.