From 0ad3fc0e64cf012868793d0a9f5180f024ddb63c Mon Sep 17 00:00:00 2001 From: juacrumar Date: Fri, 19 Aug 2022 16:38:16 +0200 Subject: [PATCH 1/3] evaluate only unique xgrids --- n3fit/src/n3fit/layers/observable.py | 6 +-- n3fit/src/n3fit/model_gen.py | 54 ++++++++++++++++-------- n3fit/src/n3fit/model_trainer.py | 62 ++++++++++++++++++---------- 3 files changed, 81 insertions(+), 41 deletions(-) diff --git a/n3fit/src/n3fit/layers/observable.py b/n3fit/src/n3fit/layers/observable.py index 7ac2f08df8..739d7c775b 100644 --- a/n3fit/src/n3fit/layers/observable.py +++ b/n3fit/src/n3fit/layers/observable.py @@ -4,7 +4,7 @@ from n3fit.backends import operations as op -def _is_unique(list_of_arrays): +def is_unique(list_of_arrays): """Check whether the list of arrays more than one different arrays""" the_first = list_of_arrays[0] for i in list_of_arrays[1:]: @@ -52,13 +52,13 @@ def __init__(self, fktable_data, fktable_arr, operation_name, nfl=14, **kwargs): self.fktables.append(op.numpy_to_tensor(fk)) # check how many xgrids this dataset needs - if _is_unique(xgrids): + if is_unique(xgrids): self.splitting = None else: self.splitting = [i.shape[1] for i in xgrids] # check how many basis this dataset needs - if _is_unique(basis) and _is_unique(xgrids): + if is_unique(basis) and is_unique(xgrids): self.all_masks = [self.gen_mask(basis[0])] self.many_masks = False else: diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index fa0688110d..70fd091345 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -9,11 +9,13 @@ """ +from itertools import zip_longest from dataclasses import dataclass import numpy as np from n3fit.msr import msr_impose from n3fit.layers import DIS, DY, ObsRotation, losses from n3fit.layers import Preprocessing, FkRotation, FlavourToEvolution +from n3fit.layers.observable import is_unique from n3fit.backends import MetaModel, Input from n3fit.backends import operations as op @@ -23,7 +25,7 @@ @dataclass class ObservableWrapper: - """Wrapper to generate the observable layer once the PDF model is prepared + """Wraps many observables into an experimental layer once the PDF model is prepared It can take normal datasets or Lagrange-multiplier-like datasets (such as positivity or integrability) """ @@ -59,8 +61,10 @@ def _generate_loss(self, mask=None): return loss def _generate_experimental_layer(self, pdf): - """Generates the experimental layer from the PDF""" - # First split the layer into the different datasets (if needed!) + """Generate the experimental layer by feeding to each observable its PDF. + In the most general case, each observable might need a PDF evaluated on a different xgrid, + the input PDF is evaluated in all points that the experiment needs and needs to be split + """ if len(self.dataset_xsizes) > 1: splitting_layer = op.as_layer( op.split, @@ -68,12 +72,12 @@ def _generate_experimental_layer(self, pdf): op_kwargs={"axis": 1}, name=f"{self.name}_split", ) - split_pdf = splitting_layer(pdf) + sp_pdf = splitting_layer(pdf) + output_layers = [obs(p) for obs, p in zip(self.observables, sp_pdf)] else: - split_pdf = [pdf] - # Every obs gets its share of the split - output_layers = [obs(p_pdf) for p_pdf, obs in zip(split_pdf, self.observables)] - # Concatenate all datasets (so that experiments are one single entity) + output_layers = [obs(pdf) for obs in self.observables] + + # Finally concatenate all observables (so that experiments are one single entitiy) ret = op.concatenate(output_layers, axis=2) if self.rotation is not None: ret = self.rotation(ret) @@ -89,11 +93,16 @@ def observable_generator( spec_dict, positivity_initial=1.0, integrability=False ): # pylint: disable=too-many-locals """ - This function generates the observable model for each experiment. + This function generates the observable models for each experiment. These are models which takes as input a PDF tensor (1 x size_of_xgrid x flavours) and outputs - the result of the observable for each contained dataset (n_points,) + the result of the observable for each contained dataset (n_points,). + + In summary the model has the following structure: + One experiment layer, made of any number of observable layers. + Observable layers, corresponding to commondata datasets + and made of any number of fktables (and an operation on them). - An experiment contains an fktable, which is loaded by the convolution layer + An observable contains an fktable, which is loaded by the convolution layer (be it hadronic or DIS) and a inv covmat which loaded by the loss. This function also outputs three "output objects" (which are functions that generate layers) @@ -129,10 +138,10 @@ def observable_generator( """ spec_name = spec_dict["name"] dataset_xsizes = [] + model_inputs = [] model_obs_tr = [] model_obs_vl = [] model_obs_ex = [] - model_inputs = [] # The first step is to compute the observable for each of the datasets for dataset in spec_dict["datasets"]: # Get the generic information of the dataset @@ -193,20 +202,31 @@ def observable_generator( name=f"val_{dataset_name}", ) - # To know how many xpoints we compute we are duplicating functionality from obs_layer + # If the observable layer found that all input grids are equal, the splitting will be None + # otherwise the different xgrids need to be stored separately + # Note: for pineappl grids, obs_layer_tr.splitting should always be None if obs_layer_tr.splitting is None: - xgrid = dataset.fktables_data[0].xgrid.reshape(1, -1) + xgrid = dataset.fktables_data[0].xgrid model_inputs.append(xgrid) - dataset_xsizes.append(xgrid.shape[1]) + dataset_xsizes.append(len(xgrid)) else: - xgrids = [i.xgrid.reshape(1, -1) for i in dataset.fktables_data] + xgrids = [i.xgrid for i in dataset.fktables_data] model_inputs += xgrids - dataset_xsizes.append(sum([i.shape[1] for i in xgrids])) + dataset_xsizes.append(sum([len(i) for i in xgrids])) model_obs_tr.append(obs_layer_tr) model_obs_vl.append(obs_layer_vl) model_obs_ex.append(obs_layer_ex) + # Check whether all xgrids of all observables in this experiment are equal + # if so, simplify the model input + if is_unique(model_inputs): + model_inputs = model_inputs[0:1] + dataset_xsizes = dataset_xsizes[0:1] + + # Reshape all inputs arrays to be (1, nx) + model_inputs = np.concatenate(model_inputs).reshape(1, -1) + full_nx = sum(dataset_xsizes) if spec_dict["positivity"]: out_positivity = ObservableWrapper( diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index db47d74e5d..9bd75912a3 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -12,7 +12,7 @@ from itertools import zip_longest import numpy as np from scipy.interpolate import PchipInterpolator -import n3fit.model_gen as model_gen +from n3fit import model_gen from n3fit.backends import MetaModel, clear_backend_state, callbacks from n3fit.backends import operations as op from n3fit.stopping import Stopping @@ -182,7 +182,6 @@ def __init__( # Initialize the dictionaries which contain all fitting information self.input_list = [] - self.input_sizes = [] self.training = { "output": [], "expdata": [], @@ -312,8 +311,8 @@ def _model_generation(self, pdf_models, partition, partition_idx): and output (1, masked_ndata) where masked_ndata can be the training/validation or the experimental mask (in which cased masked_ndata == ndata). Several models can be fitted at once by passing a list of models with a shared input - this function will give the same input to every model and will concatenate the output at the end - so that the final output of the model is (1, None, 14, n) (with n=number of parallel models) + so that every mode receives the same input and the output will be concatenated at the end + the final output of the model is then (1, None, 14, n) (with n=number of parallel models). Parameters ---------- @@ -327,15 +326,39 @@ def _model_generation(self, pdf_models, partition, partition_idx): """ log.info("Generating the Model") - # Construct the input array that will be given to the pdf - input_arr = np.concatenate(self.input_list, axis=1).T + # In the case of pineappl models all fktables ask for the same grid in x + # and so the input can be simplified to be a single grid for all dataset + # instead of a concatenation that gets splitted afterwards + # However, this is not a _strict_ requirement for pineappl so the solution below + # aims to be completely general + # Detailed: + # let's assume an input [x1, x1, x1, x2, x2, x3] + # where each xi is a different grid, this will be broken into two lists: + # [x1, x2, x3] (unique grids) and [0,0,0,1,1,2] (index of the grid per dataset) + # The pdf will then be evaluated to concatenate([x1,x2,x3]) and then split (x1, x2, x3) + # Then each of the experiment, looking at the indexes, will receive one of the 3 PDFs + + # Clean the input list + inputs_hash = [] + inputs_unique = [] + inputs_idx = [] + for input_grid in self.input_list: + ghash = hash(tuple(input_grid.flatten())) + if ghash not in inputs_hash: + inputs_hash.append(ghash) + inputs_unique.append(input_grid) + inputs_idx.append(inputs_hash.index(ghash)) + + # Concatenate the unique inputs + input_arr = np.concatenate(inputs_unique, axis=1).T if self._scaler: # Apply feature scaling if given input_arr = self._scaler(input_arr) input_layer = op.numpy_to_input(input_arr) - # The trainable part of the n3fit framework is a concatenation of all PDF models - # each model, in the NNPDF language, corresponds to a different replica + # For multireplica fits: + # The trainable part of the n3fit framework is a concatenation of all PDF models + # each model, in the NNPDF language, corresponds to a different replica all_replicas_pdf = [] for pdf_model in pdf_models: # The input to the full model also works as the input to the PDF model @@ -348,13 +371,15 @@ def _model_generation(self, pdf_models, partition, partition_idx): full_pdf_per_replica = op.stack(all_replicas_pdf, axis=-1) - # The input layer was a concatenation of all experiments - # the output of the pdf on input_layer will be thus a concatenation - # we need now to split the output on a different array per experiment - sp_ar = [self.input_sizes] + # The PDF model was called with a concatenation of all inputs + # now the output needs to be splitted so that each experiment takes its corresponding input + sp_ar = [[i.shape[1] for i in inputs_unique]] sp_kw = {"axis": 1} splitting_layer = op.as_layer(op.split, op_args=sp_ar, op_kwargs=sp_kw, name="pdf_split") - splitted_pdf = splitting_layer(full_pdf_per_replica) + splitted_pdf_unique = splitting_layer(full_pdf_per_replica) + + # Now reorganize the uniques PDF so that each experiment receives its corresponding PDF + splitted_pdf = [splitted_pdf_unique[i] for i in inputs_idx] # If we are in a kfolding partition, select which datasets are out training_mask = validation_mask = experimental_mask = [None] @@ -369,7 +394,6 @@ def _model_generation(self, pdf_models, partition, partition_idx): # Training and validation leave out the kofld dataset # experiment leaves out the negation - output_tr = _pdf_injection(splitted_pdf, self.training["output"], training_mask) training = MetaModel(full_model_input_dict, output_tr) @@ -412,7 +436,6 @@ def _reset_observables(self): or be obliterated when/if the backend state is reset """ self.input_list = [] - self.input_sizes = [] for key in ["output", "posmultipliers", "integmultipliers"]: self.training[key] = [] self.validation[key] = [] @@ -467,8 +490,7 @@ def _generate_observables( exp_layer = model_gen.observable_generator(exp_dict) # Save the input(s) corresponding to this experiment - self.input_list += exp_layer["inputs"] - self.input_sizes.append(exp_layer["experiment_xsize"]) + self.input_list.append(exp_layer["inputs"]) # Now save the observable layer, the losses and the experimental data self.training["output"].append(exp_layer["output_tr"]) @@ -489,8 +511,7 @@ def _generate_observables( pos_layer = model_gen.observable_generator(pos_dict, positivity_initial=pos_initial) # The input list is still common - self.input_list += pos_layer["inputs"] - self.input_sizes.append(pos_layer["experiment_xsize"]) + self.input_list.append(pos_layer["inputs"]) # The positivity should be on both training and validation models self.training["output"].append(pos_layer["output_tr"]) @@ -516,8 +537,7 @@ def _generate_observables( integ_dict, positivity_initial=integ_initial, integrability=True ) # The input list is still common - self.input_list += integ_layer["inputs"] - self.input_sizes.append(integ_layer["experiment_xsize"]) + self.input_list.append(integ_layer["inputs"]) # The integrability all falls to the training self.training["output"].append(integ_layer["output_tr"]) From 6d3fdc70d19985fc0f34cb08e7e06f7741589133 Mon Sep 17 00:00:00 2001 From: "Juan M. Cruz-Martinez" Date: Mon, 22 Aug 2022 13:42:21 +0200 Subject: [PATCH 2/3] Update n3fit/src/n3fit/model_gen.py --- n3fit/src/n3fit/model_gen.py | 1 - 1 file changed, 1 deletion(-) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 70fd091345..a89ce19c19 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -9,7 +9,6 @@ """ -from itertools import zip_longest from dataclasses import dataclass import numpy as np from n3fit.msr import msr_impose From a5a669e536b2b0669643946b9184b7f02b064115 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Mon, 29 Aug 2022 15:54:22 +0200 Subject: [PATCH 3/3] check with allclose --- n3fit/src/n3fit/model_trainer.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 9bd75912a3..14a9724b34 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -337,18 +337,18 @@ def _model_generation(self, pdf_models, partition, partition_idx): # [x1, x2, x3] (unique grids) and [0,0,0,1,1,2] (index of the grid per dataset) # The pdf will then be evaluated to concatenate([x1,x2,x3]) and then split (x1, x2, x3) # Then each of the experiment, looking at the indexes, will receive one of the 3 PDFs - - # Clean the input list - inputs_hash = [] + # The decision whether two grids (x1 and x1) are really the same is decided below inputs_unique = [] inputs_idx = [] - for input_grid in self.input_list: - ghash = hash(tuple(input_grid.flatten())) - if ghash not in inputs_hash: - inputs_hash.append(ghash) - inputs_unique.append(input_grid) - inputs_idx.append(inputs_hash.index(ghash)) - + for igrid in self.input_list: + for idx, arr in enumerate(inputs_unique): + if igrid.size == arr.size and np.allclose(igrid, arr): + inputs_idx.append(idx) + break + else: + inputs_idx.append(len(inputs_unique)) + inputs_unique.append(igrid) + # Concatenate the unique inputs input_arr = np.concatenate(inputs_unique, axis=1).T if self._scaler: