Merge pull request #1644 from NNPDF/separate_xgrid_from_model

Separate xgrid and model generation within model trainer
NNPDF · Dec 7, 2022 · 74f9413 · 74f9413
2 parents e5e702d + e7050c4
commit 74f9413
Showing 1 changed file with 90 additions and 45 deletions.
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
@@ -9,6 +9,7 @@
     between iterations while at the same time keeping the amount of redundant calls to a minimum
 """
 import logging
+from collections import namedtuple
 from itertools import zip_longest
 import numpy as np
 from scipy.interpolate import PchipInterpolator
@@ -32,6 +33,8 @@
 # Each how many epochs do we increase the integrability Lagrange Multiplier
 PUSH_INTEGRABILITY_EACH = 100
 
+# See ModelTrainer::_xgrid_generation for the definition of each field and how they are generated
+InputInfo = namedtuple("InputInfo", ["input", "split", "idx"])
 
 def _pdf_injection(pdf_layers, observables, masks):
     """
@@ -289,7 +292,73 @@ def _fill_the_dictionaries(self):
                 self.training["expdata"].append(integ_dict["expdata"])
                 self.training["integdatasets"].append(integ_dict["name"])
 
-    def _model_generation(self, pdf_models, partition, partition_idx):
+    def _xgrid_generation(self):
+        """
+        Generates the full x-grid pertaining to the complete set of observables to be fitted.
+
+        To first approximation, the full x-grid is a concatenation of all x-grid requested by
+        all fk-tables.
+
+        In the case of pineappl models all fktables ask for the same grid in x
+        and so the input can be simplified to be a single grid for all (or most) datasets.
+        However, this is not a _strict_ requirement for pineappl and was not a requirement before
+        so the solution below must be kept general enough.
+
+        Detailed implementation of the union of xgrids:
+            let's assume an input [x1, x1, x1, x2, x2, x3]
+            where each xi is a different grid, this will be broken into two lists:
+            [x1, x2, x3] (unique grids) and [0,0,0,1,1,2] (index of the grid per dataset)
+            The pdf will then be evaluated to concatenate([x1,x2,x3]) and then split (x1, x2, x3)
+            Then each of the experiment, looking at the indexes, will receive one of the 3 PDFs
+            The decision whether two grids (x1 and x1) are really the same is decided below
+
+        The necessary information to redistribute the x-grid is held by a ``InputInfo`` tuple
+        which is returned by this function.
+
+        Returns
+        ------
+            Instance of ``InputInfo`` containing the input information necessary for the PDF model:
+            - input:
+                backend input layer with an array attached which is a concatenation of the unique
+                inputs of the Model
+                two inputs are the same if and only if they have the same shape, values and order
+            - split:
+                backend layer which splits the aforementioned concatenation back into the separate
+                unique inputs, to be applied after the PDF is called
+            - idx:
+                indices of the observables to which the split PDF must be distributed
+        """
+        log.info("Generating the input grid")
+
+        inputs_unique = []
+        inputs_idx = []
+        for igrid in self.input_list:
+            for idx, arr in enumerate(inputs_unique):
+                if igrid.size == arr.size and np.allclose(igrid, arr):
+                    inputs_idx.append(idx)
+                    break
+            else:
+                inputs_idx.append(len(inputs_unique))
+                inputs_unique.append(igrid)
+
+        # Concatenate the unique inputs
+        input_arr = np.concatenate(inputs_unique, axis=1).T
+        if self._scaler:
+            # Apply feature scaling if given
+            input_arr = self._scaler(input_arr)
+        input_layer = op.numpy_to_input(input_arr)
+
+        # The PDF model will be called with a concatenation of all inputs
+        # now the output needs to be splitted so that each experiment takes its corresponding input
+        sp_ar = [[i.shape[1] for i in inputs_unique]]
+        sp_kw = {"axis": 1}
+        sp_layer = op.as_layer(
+            op.split, op_args=sp_ar, op_kwargs=sp_kw, name="pdf_split"
+        )
+
+        return InputInfo(input_layer, sp_layer, inputs_idx)
+
+    def _model_generation(self, xinput, pdf_models, partition, partition_idx):
         """
         Fills the three dictionaries (``training``, ``validation``, ``experimental``)
         with the ``model`` entry
@@ -298,7 +367,7 @@ def _model_generation(self, pdf_models, partition, partition_idx):
         as they are never trained, but this is needed by some backends
         in order to run evaluate on them.
 
-        Before entering this function the dictionaries contain a list of inputs
+        Before entering this function we have the input of the model
         and a list of outputs, but they are not connected.
         This function connects inputs with outputs by injecting the PDF.
         At this point we have a PDF model that takes an input (1, None, 1)
@@ -316,8 +385,16 @@ def _model_generation(self, pdf_models, partition, partition_idx):
 
         Parameters
         ----------
+            xinput: InputInfo
+                a tuple containing the input layer (with all values of x), and the information
+                (in the form of a splitting layer and a list of indices) to distribute
+                the results of the PDF (PDF(xgrid)) among the different observables
             pdf_models: list(n3fit.backend.MetaModel)
                 a list of models that produce PDF values
+            partition: dict
+                Only active during k-folding, information about the partition to be fitted
+            partition_idx: int
+                Index of the partition
 
         Returns
         -------
@@ -326,61 +403,26 @@ def _model_generation(self, pdf_models, partition, partition_idx):
         """
         log.info("Generating the Model")
 
-        # In the case of pineappl models all fktables ask for the same grid in x
-        # and so the input can be simplified to be a single grid for all dataset
-        # instead of a concatenation that gets splitted afterwards
-        # However, this is not a _strict_ requirement for pineappl so the solution below
-        # aims to be completely general
-        # Detailed:
-        #    let's assume an input [x1, x1, x1, x2, x2, x3]
-        #    where each xi is a different grid, this will be broken into two lists:
-        #    [x1, x2, x3] (unique grids) and [0,0,0,1,1,2] (index of the grid per dataset)
-        #    The pdf will then be evaluated to concatenate([x1,x2,x3]) and then split (x1, x2, x3)
-        #    Then each of the experiment, looking at the indexes, will receive one of the 3 PDFs
-        #    The decision whether two grids (x1 and x1) are really the same is decided below
-        inputs_unique = []
-        inputs_idx = []
-        for igrid in self.input_list:
-            for idx, arr in enumerate(inputs_unique):
-                if igrid.size == arr.size and np.allclose(igrid, arr):
-                    inputs_idx.append(idx)
-                    break
-            else:
-                inputs_idx.append(len(inputs_unique))
-                inputs_unique.append(igrid)
-
-        # Concatenate the unique inputs
-        input_arr = np.concatenate(inputs_unique, axis=1).T
-        if self._scaler:
-            # Apply feature scaling if given
-            input_arr = self._scaler(input_arr)
-        input_layer = op.numpy_to_input(input_arr)
-
         # For multireplica fits:
         #   The trainable part of the n3fit framework is a concatenation of all PDF models
         #   each model, in the NNPDF language, corresponds to a different replica
         all_replicas_pdf = []
         for pdf_model in pdf_models:
             # The input to the full model also works as the input to the PDF model
             # We apply the Model as Layers and save for later the model (full_pdf)
-            full_model_input_dict, full_pdf = pdf_model.apply_as_layer({"pdf_input": input_layer})
+            full_model_input_dict, full_pdf = pdf_model.apply_as_layer(
+                {"pdf_input": xinput.input}
+            )
 
             all_replicas_pdf.append(full_pdf)
             # Note that all models share the same symbolic input so we take as input the last
             # full_model_input_dict in the loop
 
         full_pdf_per_replica = op.stack(all_replicas_pdf, axis=-1)
-
-        # The PDF model was called with a concatenation of all inputs
-        # now the output needs to be splitted so that each experiment takes its corresponding input
-        sp_ar = [[i.shape[1] for i in inputs_unique]]
-        sp_kw = {"axis": 1}
-        splitting_layer = op.as_layer(op.split, op_args=sp_ar, op_kwargs=sp_kw, name="pdf_split")
-        splitted_pdf_unique = splitting_layer(full_pdf_per_replica)
+        split_pdf_unique = xinput.split(full_pdf_per_replica)
 
         # Now reorganize the uniques PDF so that each experiment receives its corresponding PDF
-        splitted_pdf = [splitted_pdf_unique[i] for i in inputs_idx]
-
+        split_pdf = [split_pdf_unique[i] for i in xinput.idx]
         # If we are in a kfolding partition, select which datasets are out
         training_mask = validation_mask = experimental_mask = [None]
         if partition and partition["datasets"]:
@@ -394,14 +436,14 @@ def _model_generation(self, pdf_models, partition, partition_idx):
 
         # Training and validation leave out the kofld dataset
         # experiment leaves out the negation
-        output_tr = _pdf_injection(splitted_pdf, self.training["output"], training_mask)
+        output_tr = _pdf_injection(split_pdf, self.training["output"], training_mask)
         training = MetaModel(full_model_input_dict, output_tr)
 
         # Validation skips integrability and the "true" chi2 skips also positivity,
         # so we must only use the corresponding subset of PDF functions
         val_pdfs = []
         exp_pdfs = []
-        for partial_pdf, obs in zip(splitted_pdf, self.training["output"]):
+        for partial_pdf, obs in zip(split_pdf, self.training["output"]):
             if not obs.positivity and not obs.integrability:
                 val_pdfs.append(partial_pdf)
                 exp_pdfs.append(partial_pdf)
@@ -823,6 +865,9 @@ def hyperparametrizable(self, params):
         n3pdfs = []
         exp_models = []
 
+        # Generate the grid in x, note this is the same for all partitions
+        xinput = self._xgrid_generation()
+
         ### Training loop
         for k, partition in enumerate(self.kpartitions):
             # Each partition of the kfolding needs to have its own separate model
@@ -845,7 +890,7 @@ def hyperparametrizable(self, params):
 
             # Model generation joins all the different observable layers
             # together with pdf model generated above
-            models = self._model_generation(pdf_models, partition, k)
+            models = self._model_generation(xinput, pdf_models, partition, k)
 
             # Only after model generation, apply possible weight file
             if self.model_file: