diff --git a/doc/sphinx/source/n3fit/figures/plot_pdf.png b/doc/sphinx/source/n3fit/figures/plot_pdf.png new file mode 100644 index 0000000000..59d086a139 Binary files /dev/null and b/doc/sphinx/source/n3fit/figures/plot_pdf.png differ diff --git a/doc/sphinx/source/n3fit/methodology.rst b/doc/sphinx/source/n3fit/methodology.rst index 0eb3ed2362..8380d5526d 100644 --- a/doc/sphinx/source/n3fit/methodology.rst +++ b/doc/sphinx/source/n3fit/methodology.rst @@ -126,6 +126,56 @@ provide a faster convergence to the solution. .. important:: Parameters like the number of layers, nodes, activation functions are hyper-parameters that require tuning. + +To see the structure of the model, one can use Keras's ``plot_model`` function as illustrated in the script below. +See the `Keras documentation `_ for more details. + +.. code-block:: python + + from tensorflow.keras.utils import plot_model + from n3fit.model_gen import pdfNN_layer_generator + from validphys.api import API + + fit_info = API.fit(fit="NNPDF40_nnlo_as_01180_1000").as_input() + basis_info = fit_info["fitting"]["basis"] + + pdf_models = pdfNN_layer_generator( + nodes=[25, 20, 8], + activations=["tanh", "tanh", "linear"], + initializer_name="glorot_normal", + layer_type="dense", + flav_info=basis_info, + fitbasis="EVOL", + out=14, + seed=42, + dropout=0.0, + regularizer=None, + regularizer_args=None, + impose_sumrule="All", + scaler=None, + parallel_models=1, + ) + + pdf_model = pdf_models[0] + nn_model = pdf_model.get_layer("NN_0") + msr_model = pdf_model.get_layer("impose_msr") + models_to_plot = { + 'plot_pdf': pdf_model, + 'plot_nn': nn_model, + 'plot_msr': msr_model + } + + for name, model in models_to_plot.items(): + plot_model(model, to_file=f"./{name}.png", show_shapes=True) + + +This will produce for instance the plot of the PDF model below, and can also be used to plot the +neural network model, and the momentum sum rule model. + +.. image:: + figures/plot_pdf.png + + .. _preprocessing: Preprocessing diff --git a/n3fit/runcards/examples/developing_weights.h5 b/n3fit/runcards/examples/developing_weights.h5 index a337c5b6a4..542fca06f9 100644 Binary files a/n3fit/runcards/examples/developing_weights.h5 and b/n3fit/runcards/examples/developing_weights.h5 differ diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py index 8cf1065849..efff803b92 100644 --- a/n3fit/src/n3fit/backends/keras_backend/operations.py +++ b/n3fit/src/n3fit/backends/keras_backend/operations.py @@ -23,20 +23,22 @@ equally operations are automatically converted to layers when used as such. """ +from typing import Optional + import numpy as np +import numpy.typing as npt import tensorflow as tf +from tensorflow.keras import backend as K +from tensorflow.keras.layers import Input from tensorflow.keras.layers import Lambda as keras_Lambda from tensorflow.keras.layers import multiply as keras_multiply from tensorflow.keras.layers import subtract as keras_subtract -from tensorflow.keras.layers import Input -from tensorflow.keras import backend as K - from validphys.convolution import OP def evaluate(tensor): - """ Evaluate input tensor using the backend """ + """Evaluate input tensor using the backend""" return K.eval(tensor) @@ -107,36 +109,29 @@ def numpy_to_tensor(ival, **kwargs): # f(x: tensor) -> y: tensor def batchit(x, batch_dimension=0, **kwarg): - """ Add a batch dimension to tensor x """ + """Add a batch dimension to tensor x""" return tf.expand_dims(x, batch_dimension, **kwarg) # layer generation -def numpy_to_input(numpy_array, no_reshape=False, name=None): +def numpy_to_input(numpy_array: npt.NDArray, name: Optional[str] = None): """ - Takes a numpy array and generates a Input layer. - By default it adds a batch dimension (of size 1) so that the shape of the layer - is that of the array + Takes a numpy array and generates an Input layer with the same shape, + but with a batch dimension (of size 1) added. Parameters ---------- numpy_array: np.ndarray - no_reshape: bool - if true, don't add batch dimension, take the first dimension of the array as the batch - name: bool + name: str name to give to the layer """ - if no_reshape: - batched_array = numpy_array - batch_size = numpy_array.shape[0] - shape = numpy_array.shape[1:] - else: - batched_array = np.expand_dims(numpy_array, 0) - batch_size = 1 - shape = numpy_array.shape - input_layer = Input(batch_size=batch_size, shape=shape, name=name) + batched_array = np.expand_dims(numpy_array, 0) + shape = list(numpy_array.shape) + # set the number of gridpoints to None, otherwise shapes don't show in model.summary + shape[0] = None + + input_layer = Input(batch_size=1, shape=shape, name=name) input_layer.tensor_content = batched_array - input_layer.original_shape = no_reshape return input_layer @@ -174,7 +169,7 @@ def op_gather_keep_dims(tensor, indices, axis=0, **kwargs): both eager and non-eager tensors """ if indices == -1: - indices = tensor.shape[axis]-1 + indices = tensor.shape[axis] - 1 def tmp(x): y = tf.gather(x, indices, axis=axis, **kwargs) @@ -189,6 +184,7 @@ def tmp(x): # f(x: tensor[s]) -> y: tensor # + # Generation operations # generate tensors of given shape/content @tf.function @@ -216,7 +212,7 @@ def many_replication(grid, replications, axis=0, **kwargs): # modify properties of the tensor like the shape or elements it has @tf.function def flatten(x): - """ Flatten tensor x """ + """Flatten tensor x""" return tf.reshape(x, (-1,)) @@ -240,7 +236,7 @@ def transpose(tensor, **kwargs): def stack(tensor_list, axis=0, **kwargs): - """ Stack a list of tensors + """Stack a list of tensors see full `docs `_ """ return tf.stack(tensor_list, axis=axis, **kwargs) @@ -280,8 +276,8 @@ def pdf_masked_convolution(raw_pdf, basis_mask): pdf_x_pdf: tf.tensor rank3 (len(mask_true), xgrid, xgrid, replicas) """ - if raw_pdf.shape[-1] == 1: # only one replica! - pdf = tf.squeeze(raw_pdf, axis=(0,-1)) + if raw_pdf.shape[-1] == 1: # only one replica! + pdf = tf.squeeze(raw_pdf, axis=(0, -1)) luminosity = tensor_product(pdf, pdf, axes=0) lumi_tmp = K.permute_dimensions(luminosity, (3, 1, 2, 0)) pdf_x_pdf = batchit(boolean_mask(lumi_tmp, basis_mask), -1) @@ -309,6 +305,14 @@ def tensor_product(*args, **kwargs): return tf.tensordot(*args, **kwargs) +@tf.function +def pow(tensor, power): + """ + Computes the power of the tensor + """ + return tf.pow(tensor, power) + + @tf.function(experimental_relax_shapes=True) def op_log(o_tensor, **kwargs): """ diff --git a/n3fit/src/n3fit/layers/msr_normalization.py b/n3fit/src/n3fit/layers/msr_normalization.py index 51219ad73f..552f618b59 100644 --- a/n3fit/src/n3fit/layers/msr_normalization.py +++ b/n3fit/src/n3fit/layers/msr_normalization.py @@ -16,8 +16,7 @@ class MSR_Normalization(MetaLayer): _msr_enabled = False _vsr_enabled = False - def __init__(self, output_dim=14, mode="ALL", photons_contribution=None, **kwargs): - self._photons = photons_contribution + def __init__(self, output_dim=14, mode="ALL", **kwargs): if mode == True or mode.upper() == "ALL": self._msr_enabled = True self._vsr_enabled = True @@ -38,9 +37,9 @@ def __init__(self, output_dim=14, mode="ALL", photons_contribution=None, **kwarg op.scatter_to_one, op_kwargs={"indices": idx, "output_dim": output_dim} ) - super().__init__(**kwargs, name="normalizer") + super().__init__(**kwargs) - def call(self, pdf_integrated, ph_replica): + def call(self, pdf_integrated, photon_integral): """Imposes the valence and momentum sum rules: A_g = (1-sigma-photon)/g A_v = A_v24 = A_v35 = 3/V @@ -49,17 +48,24 @@ def call(self, pdf_integrated, ph_replica): A_v15 = 3/V_15 Note that both the input and the output are in the 14-flavours fk-basis + + Parameters + ---------- + pdf_integrated: (Tensor(1,None,14)) + the integrated PDF + photon_integral: (Tensor(1)): + the integrated photon, not included in PDF + + Returns + ------- + normalization_factor: Tensor(14) + The normalization factors per flavour. """ y = op.flatten(pdf_integrated) norm_constants = [] - if self._photons: - photon_integral = self._photons[ph_replica] - else: - photon_integral = 0.0 - if self._msr_enabled: - n_ag = [(1.0 - y[GLUON_IDX[0][0] - 1] - photon_integral) / y[GLUON_IDX[0][0]]] * len( + n_ag = [(1.0 - y[GLUON_IDX[0][0] - 1] - photon_integral[0]) / y[GLUON_IDX[0][0]]] * len( GLUON_IDX ) norm_constants += n_ag diff --git a/n3fit/src/n3fit/layers/preprocessing.py b/n3fit/src/n3fit/layers/preprocessing.py index 56278224bf..e5d45c7038 100644 --- a/n3fit/src/n3fit/layers/preprocessing.py +++ b/n3fit/src/n3fit/layers/preprocessing.py @@ -1,11 +1,10 @@ -from n3fit.backends import MetaLayer -from n3fit.backends import constraints +from n3fit.backends import MetaLayer, constraints from n3fit.backends import operations as op class Preprocessing(MetaLayer): """ - Applies preprocessing to the PDF. + Computes preprocessing factor for the PDF. This layer generates a factor (1-x)^beta*x^(1-alpha) where both beta and alpha are model paramters that can be trained. If feature scaling is used, the preprocessing @@ -21,7 +20,7 @@ class Preprocessing(MetaLayer): Parameters ---------- flav_info: list - list of dicts containing the information about the fitting of the preprocessing + list of dicts containing the information about the fitting of the preprocessing factor This corresponds to the `fitting::basis` parameter in the nnpdf runcard. The dicts can contain the following fields: `smallx`: range of alpha @@ -29,21 +28,18 @@ class Preprocessing(MetaLayer): `trainable`: whether these alpha-beta should be trained during the fit (defaults to true) large_x: bool - Whether large x preprocessing should be active + Whether large x preprocessing factor should be active seed: int seed for the initializer of the random alpha and beta values """ def __init__( - self, - flav_info=None, - seed=0, - initializer="random_uniform", - large_x=True, - **kwargs, + self, flav_info=None, seed=0, initializer="random_uniform", large_x=True, **kwargs, ): if flav_info is None: - raise ValueError("Trying to instantiate a preprocessing with no basis information") + raise ValueError( + "Trying to instantiate a preprocessing factor with no basis information" + ) self.flav_info = flav_info self.seed = seed self.output_dim = len(flav_info) diff --git a/n3fit/src/n3fit/layers/x_operations.py b/n3fit/src/n3fit/layers/x_operations.py index f3cc9c32e9..a71a42d93d 100644 --- a/n3fit/src/n3fit/layers/x_operations.py +++ b/n3fit/src/n3fit/layers/x_operations.py @@ -1,7 +1,7 @@ """ This module contains layers acting on the x-grid input of the NN - The three operations included are: + The two operations included are: - ``xDivide`` - ``xIntegrator`` @@ -9,18 +9,21 @@ for all flavours. The choice of flavours on which to act in a different way is given as an input argument. """ +from typing import List, Optional from n3fit.backends import MetaLayer from n3fit.backends import operations as op -BASIS_SIZE=14 +BASIS_SIZE = 14 + class xDivide(MetaLayer): """ - Divide some PDFs by x + Create tensor of either 1/x or ones depending on the flavour, + to be used to divide some PDFs by x by multiplying with the result. - By default it utilizes the 14-flavour FK basis and divides [v, v3, v8] - which corresponds to indices (3,4,5) from + By default it utilizes the 14-flavour FK basis and divides [v, v3, v8, v15] + which corresponds to indices (3, 4, 5, 6) from (photon, sigma, g, v, v3, v8, v15, v24, v35, t3, t8, t15, t24, t35) Parameters: @@ -28,27 +31,27 @@ class xDivide(MetaLayer): output_dim: int dimension of the pdf div_list: list - list of indices to be divided by X (by default [3,4,5]; [v, v3, v8] + list of indices to be divided by X (by default [3, 4, 5, 6]; [v, v3, v8, v15] """ - def __init__(self, output_dim=BASIS_SIZE, div_list=None, **kwargs): + def __init__( + self, output_dim: int = BASIS_SIZE, div_list: Optional[List[int]] = None, **kwargs + ): if div_list is None: div_list = [3, 4, 5, 6] self.output_dim = output_dim self.div_list = div_list super().__init__(**kwargs) + self.powers = [-1 if i in div_list else 0 for i in range(output_dim)] + def call(self, x): - out_array = [] - one = op.tensor_ones_like(x) - for i in range(self.output_dim): - if i in self.div_list: - res = one / x - else: - res = one - out_array.append(res) - out_tensor = op.concatenate(out_array) - return out_tensor + return op.pow(x, self.powers) + + def get_config(self): + config = super().get_config() + config.update({"output_dim": self.output_dim, "div_list": self.div_list}) + return config class xIntegrator(MetaLayer): diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index ca7844d12a..2bd898021b 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -10,6 +10,7 @@ """ from dataclasses import dataclass +from typing import Callable, List import numpy as np @@ -27,7 +28,8 @@ losses, ) from n3fit.layers.observable import is_unique -from n3fit.msr import msr_impose +from n3fit.msr import generate_msr_model_and_grid +from validphys.photon.compute import Photon # only used for type hint here @dataclass @@ -299,13 +301,13 @@ def observable_generator( # Network generation functions def generate_dense_network( - nodes_in, - nodes, - activations, - initializer_name="glorot_normal", - seed=0, - dropout_rate=0.0, - regularizer=None, + nodes_in: int, + nodes: int, + activations: List[str], + initializer_name: str = "glorot_normal", + seed: int = 0, + dropout_rate: float = 0.0, + regularizer: str = None, ): """ Generates a dense network @@ -355,7 +357,6 @@ def generate_dense_per_flavour_network( number_of_layers = len(nodes) current_seed = seed for i, (nodes_out, activation) in enumerate(zip(nodes, activations)): - initializers = [] for _ in range(basis_size): # select the initializer and move the seed @@ -396,29 +397,28 @@ def output_layer(ilayer): def pdfNN_layer_generator( - inp=2, - nodes=None, - activations=None, - initializer_name="glorot_normal", - layer_type="dense", - flav_info=None, - fitbasis="NN31IC", - out=14, - seed=None, - dropout=0.0, - regularizer=None, - regularizer_args=None, - impose_sumrule=None, - scaler=None, - parallel_models=1, - photons=None, + nodes: List[int] = None, + activations: List[str] = None, + initializer_name: str = "glorot_normal", + layer_type: str = "dense", + flav_info: dict = None, + fitbasis: str = "NN31IC", + out: int = 14, + seed: int = None, + dropout: float = 0.0, + regularizer: str = None, + regularizer_args: dict = None, + impose_sumrule: str = None, + scaler: Callable = None, + parallel_models: int = 1, + photons: Photon = None, ): # pylint: disable=too-many-locals """ Generates the PDF model which takes as input a point in x (from 0 to 1) and outputs a basis of 14 PDFs. It generates the preprocessing of the x into a set (x, log(x)), the arbitrary NN to fit the form of the PDF - and the preprocessing factors. + and the preprocessing factor. The funtional form of the output of this function is of: @@ -453,7 +453,7 @@ def pdfNN_layer_generator( A function is constructed that joins all those layers. The function takes a tensor as the input and applies all layers for NN in order. - 4. Create a preprocessing layer (that takes as input the same tensor x as the NN) + 4. Create a preprocessing factor layer (that takes as input the same tensor x as the NN) and multiply it to the NN. We have now: N(x)_{j} * x^{1-alpha_{j}} * (1-x)^{beta_{j}} @@ -476,8 +476,6 @@ def pdfNN_layer_generator( Parameters ---------- - inp: int - dimension of the xgrid. If inp=2, turns the x point into a (x, log(x)) pair nodes: list(int) list of the number of nodes per layer of the PDF NN. Default: [15,8] activation: list @@ -499,9 +497,10 @@ def pdfNN_layer_generator( rate of dropout layer by layer impose_sumrule: str whether to impose sumrules on the output pdf and which one to impose (All, MSR, VSR) - scaler: scaler + scaler: callable Function to apply to the input. If given the input to the model will be a (1, None, 2) tensor where dim [:,:,0] is scaled + When None, instead turn the x point into a (x, log(x)) pair parallel_models: int How many models should be trained in parallel photon: :py:class:`validphys.photon.compute.Photon` @@ -527,9 +526,6 @@ def pdfNN_layer_generator( if impose_sumrule is None: impose_sumrule = "All" - if scaler: - inp = 1 - if activations is None: activations = ["tanh", "linear"] elif callable(activations): @@ -544,135 +540,206 @@ def pdfNN_layer_generator( # The number of nodes in the last layer is equal to the number of fitted flavours last_layer_nodes = nodes[-1] # (== len(flav_info)) - # Generate the generic layers that will not depend on extra considerations - - # First prepare the input for the PDF model and any scaling if needed - placeholder_input = Input(shape=(None, 1), batch_size=1) + # Process input options. There are 2 options: + # 1. Scale the input + # 2. Concatenate log(x) to the input + use_feature_scaling = scaler is not None - subtract_one = False - process_input = Lambda(lambda x: x) - input_x_eq_1 = [1.0] # When scaler is active we also want to do the subtraction of large x # TODO: make it its own option (i.e., one could want to use this without using scaler) - if scaler: - # change the input domain [0,1] -> [-1,1] - process_input = Lambda(lambda x: 2 * x - 1) - subtract_one = True - input_x_eq_1 = scaler([1.0])[0] - placeholder_input = Input(shape=(None, 2), batch_size=1) - elif inp == 2: - # If the input is of type (x, logx) - # create a x --> (x, logx) layer to preppend to everything - process_input = Lambda(lambda x: op.concatenate([x, op.op_log(x)], axis=-1)) - - model_input = {"pdf_input": placeholder_input} + subtract_one = use_feature_scaling + + # Feature scaling happens before the pdf model and changes x->(scaler(x), x), + # so it adds an input dimension + pdf_input_dimensions = 2 if use_feature_scaling else 1 + # Adding of logs happens inside, but before the NN and adds a dimension there + nn_input_dimensions = 1 if use_feature_scaling else 2 + + # Define the main input + do_nothing = lambda x: x + if use_feature_scaling: + pdf_input = Input(shape=(None, pdf_input_dimensions), batch_size=1, name='scaledx_x') + process_input = do_nothing + extract_nn_input = Lambda(lambda x: op.op_gather_keep_dims(x, 0, axis=-1), name='x_scaled') + extract_original = Lambda(lambda x: op.op_gather_keep_dims(x, 1, axis=-1), name='x') + else: # add log(x) + pdf_input = Input(shape=(None, pdf_input_dimensions), batch_size=1, name='x') + process_input = Lambda(lambda x: op.concatenate([x, op.op_log(x)], axis=-1), name='x_logx') + extract_original = do_nothing + extract_nn_input = do_nothing + + model_input = {"pdf_input": pdf_input} + if subtract_one: - layer_x_eq_1 = op.numpy_to_input(np.array(input_x_eq_1).reshape(1, 1)) + input_x_eq_1 = [1.0] + if use_feature_scaling: + input_x_eq_1 = scaler(input_x_eq_1)[0] + # the layer that subtracts 1 from the NN output + subtract_one_layer = Lambda(op.op_subtract, name='subtract_one') + layer_x_eq_1 = op.numpy_to_input(np.array(input_x_eq_1).reshape(1, 1), name='x_eq_1') model_input["layer_x_eq_1"] = layer_x_eq_1 - # Evolution layer - layer_evln = FkRotation(input_shape=(last_layer_nodes,), output_dim=out) + # the layer that multiplies the NN output by the preprocessing factor + apply_preprocessing_factor = Lambda(op.op_multiply, name='prefactor_times_NN') # Photon layer - layer_photon = AddPhoton(photons=photons) + layer_photon = AddPhoton(photons=photons, name="add_photon") # Basis rotation - basis_rotation = FlavourToEvolution(flav_info=flav_info, fitbasis=fitbasis) + basis_rotation = FlavourToEvolution( + flav_info=flav_info, fitbasis=fitbasis, name="pdf_evolution_basis" + ) + + # Evolution layer + layer_evln = FkRotation(input_shape=(last_layer_nodes,), output_dim=out, name="pdf_FK_basis") # Normalization and sum rules if impose_sumrule: - sumrule_layer, integrator_input = msr_impose( + sumrule_layer, integrator_input = generate_msr_model_and_grid( mode=impose_sumrule, scaler=scaler, photons=photons ) model_input["integrator_input"] = integrator_input else: sumrule_layer = lambda x: x - # Now we need a trainable network per model to be trained in parallel + # Now we need a trainable network per replica to be trained in parallel pdf_models = [] - for i, layer_seed in enumerate(seed): - if layer_type == "dense": - reg = regularizer_selector(regularizer, **regularizer_args) - list_of_pdf_layers = generate_dense_network( - inp, - nodes, - activations, - initializer_name, - seed=layer_seed, - dropout_rate=dropout, - regularizer=reg, + + # Only these layers change from replica to replica: + nn_replicas = [] + preprocessing_factor_replicas = [] + for i_replica, replica_seed in enumerate(seed): + preprocessing_factor_replicas.append( + Preprocessing( + flav_info=flav_info, + input_shape=(1,), + name=f"preprocessing_factor_{i_replica}", + seed=replica_seed + number_of_layers, + large_x=not subtract_one, ) - elif layer_type == "dense_per_flavour": - # Define the basis size attending to the last layer in the network - # TODO: this information should come from the basis information - # once the basis information is passed to this class - list_of_pdf_layers = generate_dense_per_flavour_network( - inp, - nodes, - activations, - initializer_name, - seed=layer_seed, - basis_size=last_layer_nodes, + ) + nn_replicas.append( + generate_nn( + layer_type=layer_type, + input_dimensions=nn_input_dimensions, + nodes=nodes, + activations=activations, + initializer_name=initializer_name, + replica_seed=replica_seed, + dropout=dropout, + regularizer=regularizer, + regularizer_args=regularizer_args, + last_layer_nodes=last_layer_nodes, + name=f"NN_{i_replica}", ) - - def dense_me(x): - """Takes an input tensor `x` and applies all layers - from the `list_of_pdf_layers` in order""" - processed_x = process_input(x) - curr_fun = list_of_pdf_layers[0](processed_x) - - for dense_layer in list_of_pdf_layers[1:]: - curr_fun = dense_layer(curr_fun) - return curr_fun - - preproseed = layer_seed + number_of_layers - layer_preproc = Preprocessing( - flav_info=flav_info, - input_shape=(1,), - name=f"pdf_prepro_{i}", - seed=preproseed, - large_x=not subtract_one, ) - # Apply preprocessing and basis - def layer_fitbasis(x): - """The tensor x has a expected shape of (1, None, {1,2}) - where x[...,0] corresponds to the feature_scaled input and x[...,-1] the original input - """ - x_scaled = op.op_gather_keep_dims(x, 0, axis=-1) - x_original = op.op_gather_keep_dims(x, -1, axis=-1) - - nn_output = dense_me(x_scaled) - if subtract_one: - nn_at_one = dense_me(layer_x_eq_1) - nn_output = op.op_subtract([nn_output, nn_at_one]) - - ret = op.op_multiply([nn_output, layer_preproc(x_original)]) - if basis_rotation.is_identity(): - # if we don't need to rotate basis we don't want spurious layers - return ret - return basis_rotation(ret) - - # Rotation layer, changes from the 8-basis to the 14-basis - def layer_pdf(x): - return layer_evln(layer_fitbasis(x)) - - # Final PDF (apply normalization) - normalized_pdf = sumrule_layer(layer_pdf, i) - - # Photon layer, changes the photon from zero to non-zero - def apply_photon(x): - # if photon is None then the photon layer is not applied - if photons: - return layer_photon(normalized_pdf(x), i) - else: - return normalized_pdf(x) - - final_pdf = apply_photon + # All layers have been made, now we need to connect them, + # do this in a function so we can call it for both grids and each replica + # Since all layers are already made, they will be reused + def compute_unnormalized_pdf(x, neural_network, compute_preprocessing_factor): + # Preprocess the input grid + x_nn_input = extract_nn_input(x) + x_original = extract_original(x) + x_processed = process_input(x_nn_input) + + # Compute the neural network output + nn_output = neural_network(x_processed) + if subtract_one: + x_eq_1_processed = process_input(layer_x_eq_1) + nn_at_one = neural_network(x_eq_1_processed) + nn_output = subtract_one_layer([nn_output, nn_at_one]) + + # Compute the preprocessing factor and multiply + preprocessing_factor = compute_preprocessing_factor(x_original) + pref_nn = apply_preprocessing_factor([nn_output, preprocessing_factor]) + + # Apply basis rotation if needed + if not basis_rotation.is_identity(): + pref_nn = basis_rotation(pref_nn) + + # Transform to FK basis + pdf_unnormalized = layer_evln(pref_nn) + + return pdf_unnormalized + + # Finally compute the normalized PDFs for each replica + pdf_models = [] + for i_replica, (preprocessing_factor, nn) in enumerate( + zip(preprocessing_factor_replicas, nn_replicas) + ): + pdf_unnormalized = compute_unnormalized_pdf(pdf_input, nn, preprocessing_factor) + + if impose_sumrule: + pdf_integration_grid = compute_unnormalized_pdf( + integrator_input, nn, preprocessing_factor + ) + pdf_normalized = sumrule_layer( + { + "pdf_x": pdf_unnormalized, + "pdf_xgrid_integration": pdf_integration_grid, + "xgrid_integration": integrator_input, + # The photon is treated separately, need to get its integrals to normalize the pdf + "photon_integral": op.numpy_to_tensor( + 0.0 if not photons else photons.integral[i_replica] + ), + } + ) + pdf = pdf_normalized + else: + pdf = pdf_unnormalized + + if photons: + # Add in the photon component + pdf = layer_photon(pdf, i_replica) # Create the model - pdf_model = MetaModel( - model_input, final_pdf(placeholder_input), name=f"PDF_{i}", scaler=scaler - ) + pdf_model = MetaModel(model_input, pdf, name=f"PDF_{i_replica}", scaler=scaler) pdf_models.append(pdf_model) + return pdf_models + + +def generate_nn( + layer_type: str, + input_dimensions: int, + nodes: List[int], + activations: List[str], + initializer_name: str, + replica_seed: int, + dropout: float, + regularizer: str, + regularizer_args: dict, + last_layer_nodes: int, + name: str, +) -> MetaModel: + """ + Create the part of the model that contains all of the actual neural network + layers. + """ + common_args = { + 'nodes_in': input_dimensions, + 'nodes': nodes, + 'activations': activations, + 'initializer_name': initializer_name, + 'seed': replica_seed, + } + if layer_type == "dense": + reg = regularizer_selector(regularizer, **regularizer_args) + list_of_pdf_layers = generate_dense_network( + **common_args, dropout_rate=dropout, regularizer=reg + ) + elif layer_type == "dense_per_flavour": + list_of_pdf_layers = generate_dense_per_flavour_network( + **common_args, basis_size=last_layer_nodes + ) + + # Note: using a Sequential model would be more appropriate, but it would require + # creating a MetaSequential model. + x = Input(shape=(None, input_dimensions), batch_size=1, name='xgrids_processed') + pdf = x + for layer in list_of_pdf_layers: + pdf = layer(pdf) + + model = MetaModel({'NN_input': x}, pdf, name=name) + return model diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 1f1a5a4011..2f77590a6e 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -13,13 +13,13 @@ import logging import numpy as np -from scipy.interpolate import PchipInterpolator from n3fit import model_gen from n3fit.backends import MetaModel, callbacks, clear_backend_state from n3fit.backends import operations as op import n3fit.hyper_optimization.penalties import n3fit.hyper_optimization.rewards +from n3fit.scaler import generate_scaler from n3fit.stopping import Stopping from n3fit.vpinterface import N3PDF from validphys.photon.compute import Photon @@ -472,6 +472,12 @@ def _model_generation(self, xinput, pdf_models, partition, partition_idx): if self.print_summary: training.summary() + pdf_model = training.get_layer("PDF_0") + pdf_model.summary() + nn_model = pdf_model.get_layer("NN_0") + nn_model.summary() + msr_model = pdf_model.get_layer("impose_msr") + msr_model.summary() models = { "training": training, @@ -600,53 +606,7 @@ def _generate_observables( # Store a reference to the interpolator as self._scaler if interpolation_points: - input_arr = np.concatenate(self.input_list, axis=1) - input_arr = np.sort(input_arr) - input_arr_size = input_arr.size - - # Define an evenly spaced grid in the domain [0,1] - # force_set_smallest is used to make sure the smallest point included in the scaling is - # 1e-9, to prevent trouble when saving it to the LHAPDF grid - force_set_smallest = input_arr.min() > 1e-9 - if force_set_smallest: - new_xgrid = np.linspace( - start=1 / input_arr_size, stop=1.0, endpoint=False, num=input_arr_size - ) - else: - new_xgrid = np.linspace(start=0, stop=1.0, endpoint=False, num=input_arr_size) - - # When mapping the FK xgrids onto our new grid, we need to consider degeneracies among - # the x-values in the FK grids - unique, counts = np.unique(input_arr, return_counts=True) - map_to_complete = [] - for cumsum_ in np.cumsum(counts): - # Make sure to include the smallest new_xgrid value, such that we have a point at - # x<=1e-9 - map_to_complete.append(new_xgrid[cumsum_ - counts[0]]) - map_to_complete = np.array(map_to_complete) - map_from_complete = unique - - # If needed, set feature_scaling(x=1e-9)=0 - if force_set_smallest: - map_from_complete = np.insert(map_from_complete, 0, 1e-9) - map_to_complete = np.insert(map_to_complete, 0, 0.0) - - # Select the indices of the points that will be used by the interpolator - onein = map_from_complete.size / (int(interpolation_points) - 1) - selected_points = [round(i * onein - 1) for i in range(1, int(interpolation_points))] - if selected_points[0] != 0: - selected_points = [0] + selected_points - map_from = map_from_complete[selected_points] - map_from = np.log(map_from) - map_to = map_to_complete[selected_points] - - try: - scaler = PchipInterpolator(map_from, map_to) - except ValueError: - raise ValueError( - "interpolation_points is larger than the number of unique " "input x-values" - ) - self._scaler = lambda x: np.concatenate([scaler(np.log(x)), x], axis=-1) + self._scaler = generate_scaler(self.input_list, interpolation_points) def _generate_pdf( self, @@ -886,9 +846,7 @@ def hyperparametrizable(self, params): # Initialize all photon classes for the different replicas: if self.lux_params: photons = Photon( - theoryid=self.theoryid, - lux_params=self.lux_params, - replicas=self.replicas, + theoryid=self.theoryid, lux_params=self.lux_params, replicas=self.replicas, ) else: photons = None @@ -960,11 +918,7 @@ def hyperparametrizable(self, params): for model in models.values(): model.compile(**params["optimizer"]) - passed = self._train_and_fit( - models["training"], - stopping_object, - epochs=epochs, - ) + passed = self._train_and_fit(models["training"], stopping_object, epochs=epochs,) if self.mode_hyperopt: # If doing a hyperparameter scan we need to keep track of the loss function diff --git a/n3fit/src/n3fit/msr.py b/n3fit/src/n3fit/msr.py index 9db718f285..17392d3022 100644 --- a/n3fit/src/n3fit/msr.py +++ b/n3fit/src/n3fit/msr.py @@ -2,15 +2,114 @@ The constraint module include functions to impose the momentum sum rules on the PDFs """ import logging +from typing import Callable, Optional import numpy as np +from n3fit.backends import Input, Lambda, MetaModel from n3fit.backends import operations as op from n3fit.layers import MSR_Normalization, xDivide, xIntegrator log = logging.getLogger(__name__) +def generate_msr_model_and_grid( + output_dim: int = 14, + mode: str = "ALL", + nx: int = int(2e3), + scaler: Optional[Callable] = None, + **kwargs +) -> MetaModel: + """ + Generates a model that applies the sum rules to the PDF. + + Parameters + ---------- + output_dim: int + Number of flavours of the output PDF + mode: str + Mode of sum rules to apply. It can be: + - "ALL": applies both the momentum and valence sum rules + - "MSR": applies only the momentum sum rule + - "VSR": applies only the valence sum rule + nx: int + Number of points of the integration grid + scaler: Scaler + Scaler to be applied to the PDF before applying the sum rules + + Returns + ------- + model: MetaModel + Model that applies the sum rules to the PDF + It takes as inputs: + - pdf_x: the PDF output of the model + - pdf_xgrid_integration: the PDF output of the model evaluated at the integration grid + - xgrid_integration: the integration grid + - photon_integral: the integrated photon contribution + It returns the PDF with the sum rules applied + xgrid_integration: dict + Dictionary with the integration grid, with: + - values: the integration grid + - input: the input layer of the integration grid + """ + # 0. Prepare input layers to MSR model + pdf_x = Input(shape=(None, output_dim), batch_size=1, name="pdf_x") + pdf_xgrid_integration = Input( + shape=(nx, output_dim), batch_size=1, name="pdf_xgrid_integration" + ) + + # 1. Generate the grid and weights that will be used to integrate + xgrid_integration, weights_array = gen_integration_input(nx) + # 1b If a scaler is provided, scale the input xgrid + if scaler: + xgrid_integration = scaler(xgrid_integration) + + # Turn into input layer. + xgrid_integration = op.numpy_to_input(xgrid_integration, name="integration_grid") + + # 1c Get the original grid + if scaler: + get_original = Lambda( + lambda x: op.op_gather_keep_dims(x, -1, axis=-1), name="x_original_integ" + ) + else: + get_original = lambda x: x + x_original = get_original(xgrid_integration) + + # 2. Divide the grid by x depending on the flavour + x_divided = xDivide()(x_original) + + # 3. Prepare the pdf for integration by dividing by x + pdf_integrand = Lambda(op.op_multiply, name="pdf_integrand")([x_divided, pdf_xgrid_integration]) + + # 4. Integrate the pdf + pdf_integrated = xIntegrator(weights_array, input_shape=(nx,))(pdf_integrand) + + # 5. THe input for the photon integral, will be set to 0 if no photons + photon_integral = Input(shape=(), batch_size=1, name='photon_integral') + + # 6. Compute the normalization factor + # For now set the photon component to None + normalization_factor = MSR_Normalization(output_dim, mode, name="msr_weights")( + pdf_integrated, photon_integral + ) + + # 7. Apply the normalization factor to the pdf + pdf_normalized = Lambda(lambda pdf_norm: pdf_norm[0] * pdf_norm[1], name="pdf_normalized")( + [pdf_x, normalization_factor] + ) + + inputs = { + "pdf_x": pdf_x, + "pdf_xgrid_integration": pdf_xgrid_integration, + "xgrid_integration": xgrid_integration, + "photon_integral": photon_integral, + } + model = MetaModel(inputs, pdf_normalized, name="impose_msr") + + return model, xgrid_integration + + def gen_integration_input(nx): """ Generates a np.array (shaped (nx,1)) of nx elements where the @@ -34,67 +133,3 @@ def gen_integration_input(nx): weights_array = np.array(weights).reshape(nx, 1) return xgrid, weights_array - - -def msr_impose(nx=int(2e3), mode='All', scaler=None, photons=None): - """ - This function receives: - Generates a function that applies a normalization layer to the fit. - - fit_layer: the 8-basis layer of PDF which we fit - The normalization is computed from the direct output of the NN (so the 7,8-flavours basis) - - final_layer: the 14-basis which is fed to the fktable - and it is applied to the input of the fktable (i.e., to the 14-flavours fk-basis). - It uses pdf_fit to compute the sum rule and returns a modified version of - the final_pdf layer with a normalisation by which the sum rule is imposed - - Parameters - ---------- - nx: int - number of points for the integration grid, default: 2000 - mode: str - what sum rules to compute (MSR, VSR or All), default: All - scaler: scaler - Function to apply to the input. If given the input to the model - will be a (1, None, 2) tensor where dim [:,:,0] is scaled - photon: :py:class:`validphys.photon.compute.Photon` - If given, gives the AddPhoton layer a function to compute the MSR component for the photon - """ - - # 1. Generate the fake input which will be used to integrate - xgrid, weights_array = gen_integration_input(nx) - # 1b If a scaler is provided, scale the input xgrid - if scaler: - xgrid = scaler(xgrid) - - # 2. Prepare the pdf for integration - # for that we need to multiply several flavours with 1/x - division_by_x = xDivide() - # 3. Now create the integration layer (the layer that will simply integrate, given some weight - integrator = xIntegrator(weights_array, input_shape=(nx,)) - - # 3.1 If a photon is given, compute the photon component of the MSR - photons_c = None - if photons: - photons_c = photons.integral - - # 4. Now create the normalization by selecting the right integrations - normalizer = MSR_Normalization(mode=mode, photons_contribution=photons_c) - - # 5. Make the xgrid array into a backend input layer so it can be given to the normalization - xgrid_input = op.numpy_to_input(xgrid, name="integration_grid") - # Finally prepare a function which will take as input the output of the PDF model - # and will return it appropiately normalized. - def apply_normalization(layer_pdf, ph_replica): - """ - layer_pdf: output of the PDF, unnormalized, ready for the fktable - """ - x_original = op.op_gather_keep_dims(xgrid_input, -1, axis=-1) - pdf_integrand = op.op_multiply([division_by_x(x_original), layer_pdf(xgrid_input)]) - normalization = normalizer(integrator(pdf_integrand), ph_replica) - - def ultimate_pdf(x): - return layer_pdf(x) * normalization - - return ultimate_pdf - - return apply_normalization, xgrid_input diff --git a/n3fit/src/n3fit/scaler.py b/n3fit/src/n3fit/scaler.py new file mode 100644 index 0000000000..2129f24d7f --- /dev/null +++ b/n3fit/src/n3fit/scaler.py @@ -0,0 +1,77 @@ +from typing import Callable, List, Optional + +import numpy as np +import numpy.typing as npt +from scipy.interpolate import PchipInterpolator + + +def generate_scaler( + input_list: List[npt.NDArray], interpolation_points: Optional[int] = None +) -> Callable: + """ + Generate the scaler function that applies feature scaling to the input data. + + Parameters + ---------- + input_list : list of numpy.ndarray + The list of input data arrays. + interpolation_points : int, optional + + Returns + ------- + _scaler : Callable + The scaler function that applies feature scaling to the input data. + """ + input_arr = np.concatenate(input_list, axis=1) + input_arr = np.sort(input_arr) + input_arr_size = input_arr.size + + # Define an evenly spaced grid in the domain [0,1] + # force_set_smallest is used to make sure the smallest point included in the scaling is + # 1e-9, to prevent trouble when saving it to the LHAPDF grid + force_set_smallest = input_arr.min() > 1e-9 + if force_set_smallest: + new_xgrid = np.linspace( + start=1 / input_arr_size, stop=1.0, endpoint=False, num=input_arr_size + ) + else: + new_xgrid = np.linspace(start=0, stop=1.0, endpoint=False, num=input_arr_size) + + # When mapping the FK xgrids onto our new grid, we need to consider degeneracies among + # the x-values in the FK grids + unique, counts = np.unique(input_arr, return_counts=True) + map_to_complete = [] + for cumsum_ in np.cumsum(counts): + # Make sure to include the smallest new_xgrid value, such that we have a point at + # x<=1e-9 + map_to_complete.append(new_xgrid[cumsum_ - counts[0]]) + map_to_complete = np.array(map_to_complete) + map_from_complete = unique + + # If needed, set feature_scaling(x=1e-9)=0 + if force_set_smallest: + map_from_complete = np.insert(map_from_complete, 0, 1e-9) + map_to_complete = np.insert(map_to_complete, 0, 0.0) + + # Select the indices of the points that will be used by the interpolator + onein = map_from_complete.size / (int(interpolation_points) - 1) + selected_points = [round(i * onein - 1) for i in range(1, int(interpolation_points))] + if selected_points[0] != 0: + selected_points = [0] + selected_points + map_from = map_from_complete[selected_points] + map_from = np.log(map_from) + map_to = map_to_complete[selected_points] + + try: + scaler = PchipInterpolator(map_from, map_to) + except ValueError as e: + raise ValueError( + "interpolation_points is larger than the number of unique input x-values" + ) from e + + def _scaler(x): + x_scaled = scaler(np.log(x)) + x_scaled = 2 * x_scaled - 1 + return np.concatenate([x_scaled, x], axis=-1) + + return _scaler diff --git a/n3fit/src/n3fit/tests/regressions/weights_1.h5 b/n3fit/src/n3fit/tests/regressions/weights_1.h5 index bcc79666b0..2c5b02e71e 100644 Binary files a/n3fit/src/n3fit/tests/regressions/weights_1.h5 and b/n3fit/src/n3fit/tests/regressions/weights_1.h5 differ diff --git a/n3fit/src/n3fit/tests/regressions/weights_2.h5 b/n3fit/src/n3fit/tests/regressions/weights_2.h5 index cd7b3d2d3c..e5a8adea86 100644 Binary files a/n3fit/src/n3fit/tests/regressions/weights_2.h5 and b/n3fit/src/n3fit/tests/regressions/weights_2.h5 differ diff --git a/n3fit/src/n3fit/tests/test_xops.py b/n3fit/src/n3fit/tests/test_xops.py new file mode 100644 index 0000000000..99ed67f64e --- /dev/null +++ b/n3fit/src/n3fit/tests/test_xops.py @@ -0,0 +1,34 @@ +""" + Test the x operations +""" +import numpy as np + +from n3fit.layers import xDivide + + +def test_xdivide_default(): + """Check that the default xDivide works as expected""" + x_div = xDivide() + test_input = np.array([1, 2, 3], dtype=np.float32).reshape((1, 3, 1)) + test_output = x_div(test_input) + + expected_output = np.ones(shape=(1, 3, 14)) + default_indices = [3, 4, 5, 6] + for i in default_indices: + expected_output[:, :, i] = 1 / test_input[:, :, 0] + + np.testing.assert_allclose(test_output, expected_output, rtol=1e-05) + + +def test_xdivide_indices(): + """Check that the default xDivide works as expected""" + custom_indices = [0, 1, 7] + x_div = xDivide(div_list=custom_indices) + test_input = np.array([1, 2, 3], dtype=np.float32).reshape((1, 3, 1)) + test_output = x_div(test_input) + + expected_output = np.ones(shape=(1, 3, 14)) + for i in custom_indices: + expected_output[:, :, i] = 1 / test_input[:, :, 0] + + np.testing.assert_allclose(test_output, expected_output, rtol=1e-05) diff --git a/n3fit/src/n3fit/vpinterface.py b/n3fit/src/n3fit/vpinterface.py index 52cf001980..7e2e754e0f 100644 --- a/n3fit/src/n3fit/vpinterface.py +++ b/n3fit/src/n3fit/vpinterface.py @@ -224,10 +224,12 @@ def get_preprocessing_factors(self, replica=None): if replica is None: replica = 1 # Replicas start counting in 1 so: - preprocessing_layers = self._models[replica - 1].get_layer_re(r"pdf_prepro_\d") - if len(preprocessing_layers) != 1: + preprocessing_layers = self._models[replica - 1].get_layer_re(r"preprocessing_factor_\d") + if len(preprocessing_layers) > 1: # We really don't want to fail at this point, but print a warning at least... log.warning("More than one preprocessing layer found within the model!") + elif len(preprocessing_layers) < 1: + log.warning("No preprocessing layer found within the model!") preprocessing_layer = preprocessing_layers[0] alphas_and_betas = None