Skip to content

Commit

Permalink
Merge pull request #1644 from NNPDF/separate_xgrid_from_model
Browse files Browse the repository at this point in the history
Separate xgrid and model generation within model trainer
  • Loading branch information
Zaharid committed Dec 7, 2022
2 parents e5e702d + e7050c4 commit 74f9413
Showing 1 changed file with 90 additions and 45 deletions.
135 changes: 90 additions & 45 deletions n3fit/src/n3fit/model_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
between iterations while at the same time keeping the amount of redundant calls to a minimum
"""
import logging
from collections import namedtuple
from itertools import zip_longest
import numpy as np
from scipy.interpolate import PchipInterpolator
Expand All @@ -32,6 +33,8 @@
# Each how many epochs do we increase the integrability Lagrange Multiplier
PUSH_INTEGRABILITY_EACH = 100

# See ModelTrainer::_xgrid_generation for the definition of each field and how they are generated
InputInfo = namedtuple("InputInfo", ["input", "split", "idx"])

def _pdf_injection(pdf_layers, observables, masks):
"""
Expand Down Expand Up @@ -289,7 +292,73 @@ def _fill_the_dictionaries(self):
self.training["expdata"].append(integ_dict["expdata"])
self.training["integdatasets"].append(integ_dict["name"])

def _model_generation(self, pdf_models, partition, partition_idx):
def _xgrid_generation(self):
"""
Generates the full x-grid pertaining to the complete set of observables to be fitted.
To first approximation, the full x-grid is a concatenation of all x-grid requested by
all fk-tables.
In the case of pineappl models all fktables ask for the same grid in x
and so the input can be simplified to be a single grid for all (or most) datasets.
However, this is not a _strict_ requirement for pineappl and was not a requirement before
so the solution below must be kept general enough.
Detailed implementation of the union of xgrids:
let's assume an input [x1, x1, x1, x2, x2, x3]
where each xi is a different grid, this will be broken into two lists:
[x1, x2, x3] (unique grids) and [0,0,0,1,1,2] (index of the grid per dataset)
The pdf will then be evaluated to concatenate([x1,x2,x3]) and then split (x1, x2, x3)
Then each of the experiment, looking at the indexes, will receive one of the 3 PDFs
The decision whether two grids (x1 and x1) are really the same is decided below
The necessary information to redistribute the x-grid is held by a ``InputInfo`` tuple
which is returned by this function.
Returns
------
Instance of ``InputInfo`` containing the input information necessary for the PDF model:
- input:
backend input layer with an array attached which is a concatenation of the unique
inputs of the Model
two inputs are the same if and only if they have the same shape, values and order
- split:
backend layer which splits the aforementioned concatenation back into the separate
unique inputs, to be applied after the PDF is called
- idx:
indices of the observables to which the split PDF must be distributed
"""
log.info("Generating the input grid")

inputs_unique = []
inputs_idx = []
for igrid in self.input_list:
for idx, arr in enumerate(inputs_unique):
if igrid.size == arr.size and np.allclose(igrid, arr):
inputs_idx.append(idx)
break
else:
inputs_idx.append(len(inputs_unique))
inputs_unique.append(igrid)

# Concatenate the unique inputs
input_arr = np.concatenate(inputs_unique, axis=1).T
if self._scaler:
# Apply feature scaling if given
input_arr = self._scaler(input_arr)
input_layer = op.numpy_to_input(input_arr)

# The PDF model will be called with a concatenation of all inputs
# now the output needs to be splitted so that each experiment takes its corresponding input
sp_ar = [[i.shape[1] for i in inputs_unique]]
sp_kw = {"axis": 1}
sp_layer = op.as_layer(
op.split, op_args=sp_ar, op_kwargs=sp_kw, name="pdf_split"
)

return InputInfo(input_layer, sp_layer, inputs_idx)

def _model_generation(self, xinput, pdf_models, partition, partition_idx):
"""
Fills the three dictionaries (``training``, ``validation``, ``experimental``)
with the ``model`` entry
Expand All @@ -298,7 +367,7 @@ def _model_generation(self, pdf_models, partition, partition_idx):
as they are never trained, but this is needed by some backends
in order to run evaluate on them.
Before entering this function the dictionaries contain a list of inputs
Before entering this function we have the input of the model
and a list of outputs, but they are not connected.
This function connects inputs with outputs by injecting the PDF.
At this point we have a PDF model that takes an input (1, None, 1)
Expand All @@ -316,8 +385,16 @@ def _model_generation(self, pdf_models, partition, partition_idx):
Parameters
----------
xinput: InputInfo
a tuple containing the input layer (with all values of x), and the information
(in the form of a splitting layer and a list of indices) to distribute
the results of the PDF (PDF(xgrid)) among the different observables
pdf_models: list(n3fit.backend.MetaModel)
a list of models that produce PDF values
partition: dict
Only active during k-folding, information about the partition to be fitted
partition_idx: int
Index of the partition
Returns
-------
Expand All @@ -326,61 +403,26 @@ def _model_generation(self, pdf_models, partition, partition_idx):
"""
log.info("Generating the Model")

# In the case of pineappl models all fktables ask for the same grid in x
# and so the input can be simplified to be a single grid for all dataset
# instead of a concatenation that gets splitted afterwards
# However, this is not a _strict_ requirement for pineappl so the solution below
# aims to be completely general
# Detailed:
# let's assume an input [x1, x1, x1, x2, x2, x3]
# where each xi is a different grid, this will be broken into two lists:
# [x1, x2, x3] (unique grids) and [0,0,0,1,1,2] (index of the grid per dataset)
# The pdf will then be evaluated to concatenate([x1,x2,x3]) and then split (x1, x2, x3)
# Then each of the experiment, looking at the indexes, will receive one of the 3 PDFs
# The decision whether two grids (x1 and x1) are really the same is decided below
inputs_unique = []
inputs_idx = []
for igrid in self.input_list:
for idx, arr in enumerate(inputs_unique):
if igrid.size == arr.size and np.allclose(igrid, arr):
inputs_idx.append(idx)
break
else:
inputs_idx.append(len(inputs_unique))
inputs_unique.append(igrid)

# Concatenate the unique inputs
input_arr = np.concatenate(inputs_unique, axis=1).T
if self._scaler:
# Apply feature scaling if given
input_arr = self._scaler(input_arr)
input_layer = op.numpy_to_input(input_arr)

# For multireplica fits:
# The trainable part of the n3fit framework is a concatenation of all PDF models
# each model, in the NNPDF language, corresponds to a different replica
all_replicas_pdf = []
for pdf_model in pdf_models:
# The input to the full model also works as the input to the PDF model
# We apply the Model as Layers and save for later the model (full_pdf)
full_model_input_dict, full_pdf = pdf_model.apply_as_layer({"pdf_input": input_layer})
full_model_input_dict, full_pdf = pdf_model.apply_as_layer(
{"pdf_input": xinput.input}
)

all_replicas_pdf.append(full_pdf)
# Note that all models share the same symbolic input so we take as input the last
# full_model_input_dict in the loop

full_pdf_per_replica = op.stack(all_replicas_pdf, axis=-1)

# The PDF model was called with a concatenation of all inputs
# now the output needs to be splitted so that each experiment takes its corresponding input
sp_ar = [[i.shape[1] for i in inputs_unique]]
sp_kw = {"axis": 1}
splitting_layer = op.as_layer(op.split, op_args=sp_ar, op_kwargs=sp_kw, name="pdf_split")
splitted_pdf_unique = splitting_layer(full_pdf_per_replica)
split_pdf_unique = xinput.split(full_pdf_per_replica)

# Now reorganize the uniques PDF so that each experiment receives its corresponding PDF
splitted_pdf = [splitted_pdf_unique[i] for i in inputs_idx]

split_pdf = [split_pdf_unique[i] for i in xinput.idx]
# If we are in a kfolding partition, select which datasets are out
training_mask = validation_mask = experimental_mask = [None]
if partition and partition["datasets"]:
Expand All @@ -394,14 +436,14 @@ def _model_generation(self, pdf_models, partition, partition_idx):

# Training and validation leave out the kofld dataset
# experiment leaves out the negation
output_tr = _pdf_injection(splitted_pdf, self.training["output"], training_mask)
output_tr = _pdf_injection(split_pdf, self.training["output"], training_mask)
training = MetaModel(full_model_input_dict, output_tr)

# Validation skips integrability and the "true" chi2 skips also positivity,
# so we must only use the corresponding subset of PDF functions
val_pdfs = []
exp_pdfs = []
for partial_pdf, obs in zip(splitted_pdf, self.training["output"]):
for partial_pdf, obs in zip(split_pdf, self.training["output"]):
if not obs.positivity and not obs.integrability:
val_pdfs.append(partial_pdf)
exp_pdfs.append(partial_pdf)
Expand Down Expand Up @@ -823,6 +865,9 @@ def hyperparametrizable(self, params):
n3pdfs = []
exp_models = []

# Generate the grid in x, note this is the same for all partitions
xinput = self._xgrid_generation()

### Training loop
for k, partition in enumerate(self.kpartitions):
# Each partition of the kfolding needs to have its own separate model
Expand All @@ -845,7 +890,7 @@ def hyperparametrizable(self, params):

# Model generation joins all the different observable layers
# together with pdf model generated above
models = self._model_generation(pdf_models, partition, k)
models = self._model_generation(xinput, pdf_models, partition, k)

# Only after model generation, apply possible weight file
if self.model_file:
Expand Down

0 comments on commit 74f9413

Please sign in to comment.