diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 7ddf1971a2..b756c0e26b 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -9,6 +9,7 @@ between iterations while at the same time keeping the amount of redundant calls to a minimum """ import logging +from collections import namedtuple from itertools import zip_longest import numpy as np from scipy.interpolate import PchipInterpolator @@ -32,6 +33,8 @@ # Each how many epochs do we increase the integrability Lagrange Multiplier PUSH_INTEGRABILITY_EACH = 100 +# See ModelTrainer::_xgrid_generation for the definition of each field and how they are generated +InputInfo = namedtuple("InputInfo", ["input", "split", "idx"]) def _pdf_injection(pdf_layers, observables, masks): """ @@ -289,7 +292,73 @@ def _fill_the_dictionaries(self): self.training["expdata"].append(integ_dict["expdata"]) self.training["integdatasets"].append(integ_dict["name"]) - def _model_generation(self, pdf_models, partition, partition_idx): + def _xgrid_generation(self): + """ + Generates the full x-grid pertaining to the complete set of observables to be fitted. + + To first approximation, the full x-grid is a concatenation of all x-grid requested by + all fk-tables. + + In the case of pineappl models all fktables ask for the same grid in x + and so the input can be simplified to be a single grid for all (or most) datasets. + However, this is not a _strict_ requirement for pineappl and was not a requirement before + so the solution below must be kept general enough. + + Detailed implementation of the union of xgrids: + let's assume an input [x1, x1, x1, x2, x2, x3] + where each xi is a different grid, this will be broken into two lists: + [x1, x2, x3] (unique grids) and [0,0,0,1,1,2] (index of the grid per dataset) + The pdf will then be evaluated to concatenate([x1,x2,x3]) and then split (x1, x2, x3) + Then each of the experiment, looking at the indexes, will receive one of the 3 PDFs + The decision whether two grids (x1 and x1) are really the same is decided below + + The necessary information to redistribute the x-grid is held by a ``InputInfo`` tuple + which is returned by this function. + + Returns + ------ + Instance of ``InputInfo`` containing the input information necessary for the PDF model: + - input: + backend input layer with an array attached which is a concatenation of the unique + inputs of the Model + two inputs are the same if and only if they have the same shape, values and order + - split: + backend layer which splits the aforementioned concatenation back into the separate + unique inputs, to be applied after the PDF is called + - idx: + indices of the observables to which the split PDF must be distributed + """ + log.info("Generating the input grid") + + inputs_unique = [] + inputs_idx = [] + for igrid in self.input_list: + for idx, arr in enumerate(inputs_unique): + if igrid.size == arr.size and np.allclose(igrid, arr): + inputs_idx.append(idx) + break + else: + inputs_idx.append(len(inputs_unique)) + inputs_unique.append(igrid) + + # Concatenate the unique inputs + input_arr = np.concatenate(inputs_unique, axis=1).T + if self._scaler: + # Apply feature scaling if given + input_arr = self._scaler(input_arr) + input_layer = op.numpy_to_input(input_arr) + + # The PDF model will be called with a concatenation of all inputs + # now the output needs to be splitted so that each experiment takes its corresponding input + sp_ar = [[i.shape[1] for i in inputs_unique]] + sp_kw = {"axis": 1} + sp_layer = op.as_layer( + op.split, op_args=sp_ar, op_kwargs=sp_kw, name="pdf_split" + ) + + return InputInfo(input_layer, sp_layer, inputs_idx) + + def _model_generation(self, xinput, pdf_models, partition, partition_idx): """ Fills the three dictionaries (``training``, ``validation``, ``experimental``) with the ``model`` entry @@ -298,7 +367,7 @@ def _model_generation(self, pdf_models, partition, partition_idx): as they are never trained, but this is needed by some backends in order to run evaluate on them. - Before entering this function the dictionaries contain a list of inputs + Before entering this function we have the input of the model and a list of outputs, but they are not connected. This function connects inputs with outputs by injecting the PDF. At this point we have a PDF model that takes an input (1, None, 1) @@ -316,8 +385,16 @@ def _model_generation(self, pdf_models, partition, partition_idx): Parameters ---------- + xinput: InputInfo + a tuple containing the input layer (with all values of x), and the information + (in the form of a splitting layer and a list of indices) to distribute + the results of the PDF (PDF(xgrid)) among the different observables pdf_models: list(n3fit.backend.MetaModel) a list of models that produce PDF values + partition: dict + Only active during k-folding, information about the partition to be fitted + partition_idx: int + Index of the partition Returns ------- @@ -326,36 +403,6 @@ def _model_generation(self, pdf_models, partition, partition_idx): """ log.info("Generating the Model") - # In the case of pineappl models all fktables ask for the same grid in x - # and so the input can be simplified to be a single grid for all dataset - # instead of a concatenation that gets splitted afterwards - # However, this is not a _strict_ requirement for pineappl so the solution below - # aims to be completely general - # Detailed: - # let's assume an input [x1, x1, x1, x2, x2, x3] - # where each xi is a different grid, this will be broken into two lists: - # [x1, x2, x3] (unique grids) and [0,0,0,1,1,2] (index of the grid per dataset) - # The pdf will then be evaluated to concatenate([x1,x2,x3]) and then split (x1, x2, x3) - # Then each of the experiment, looking at the indexes, will receive one of the 3 PDFs - # The decision whether two grids (x1 and x1) are really the same is decided below - inputs_unique = [] - inputs_idx = [] - for igrid in self.input_list: - for idx, arr in enumerate(inputs_unique): - if igrid.size == arr.size and np.allclose(igrid, arr): - inputs_idx.append(idx) - break - else: - inputs_idx.append(len(inputs_unique)) - inputs_unique.append(igrid) - - # Concatenate the unique inputs - input_arr = np.concatenate(inputs_unique, axis=1).T - if self._scaler: - # Apply feature scaling if given - input_arr = self._scaler(input_arr) - input_layer = op.numpy_to_input(input_arr) - # For multireplica fits: # The trainable part of the n3fit framework is a concatenation of all PDF models # each model, in the NNPDF language, corresponds to a different replica @@ -363,24 +410,19 @@ def _model_generation(self, pdf_models, partition, partition_idx): for pdf_model in pdf_models: # The input to the full model also works as the input to the PDF model # We apply the Model as Layers and save for later the model (full_pdf) - full_model_input_dict, full_pdf = pdf_model.apply_as_layer({"pdf_input": input_layer}) + full_model_input_dict, full_pdf = pdf_model.apply_as_layer( + {"pdf_input": xinput.input} + ) all_replicas_pdf.append(full_pdf) # Note that all models share the same symbolic input so we take as input the last # full_model_input_dict in the loop full_pdf_per_replica = op.stack(all_replicas_pdf, axis=-1) - - # The PDF model was called with a concatenation of all inputs - # now the output needs to be splitted so that each experiment takes its corresponding input - sp_ar = [[i.shape[1] for i in inputs_unique]] - sp_kw = {"axis": 1} - splitting_layer = op.as_layer(op.split, op_args=sp_ar, op_kwargs=sp_kw, name="pdf_split") - splitted_pdf_unique = splitting_layer(full_pdf_per_replica) + split_pdf_unique = xinput.split(full_pdf_per_replica) # Now reorganize the uniques PDF so that each experiment receives its corresponding PDF - splitted_pdf = [splitted_pdf_unique[i] for i in inputs_idx] - + split_pdf = [split_pdf_unique[i] for i in xinput.idx] # If we are in a kfolding partition, select which datasets are out training_mask = validation_mask = experimental_mask = [None] if partition and partition["datasets"]: @@ -394,14 +436,14 @@ def _model_generation(self, pdf_models, partition, partition_idx): # Training and validation leave out the kofld dataset # experiment leaves out the negation - output_tr = _pdf_injection(splitted_pdf, self.training["output"], training_mask) + output_tr = _pdf_injection(split_pdf, self.training["output"], training_mask) training = MetaModel(full_model_input_dict, output_tr) # Validation skips integrability and the "true" chi2 skips also positivity, # so we must only use the corresponding subset of PDF functions val_pdfs = [] exp_pdfs = [] - for partial_pdf, obs in zip(splitted_pdf, self.training["output"]): + for partial_pdf, obs in zip(split_pdf, self.training["output"]): if not obs.positivity and not obs.integrability: val_pdfs.append(partial_pdf) exp_pdfs.append(partial_pdf) @@ -823,6 +865,9 @@ def hyperparametrizable(self, params): n3pdfs = [] exp_models = [] + # Generate the grid in x, note this is the same for all partitions + xinput = self._xgrid_generation() + ### Training loop for k, partition in enumerate(self.kpartitions): # Each partition of the kfolding needs to have its own separate model @@ -845,7 +890,7 @@ def hyperparametrizable(self, params): # Model generation joins all the different observable layers # together with pdf model generated above - models = self._model_generation(pdf_models, partition, k) + models = self._model_generation(xinput, pdf_models, partition, k) # Only after model generation, apply possible weight file if self.model_file: