Merge replicas in Prefactor layer

Simplify handling of dropout Factor out layer_generator in generate_dense_network Refactor dense_per_flavor_network Move setting of last nodes to generate_nn Add constant arguments Add constant arguments Move dropout to generate_nn Move concatenation of per_flavor layers into generate_nn Make the two layer generators almost equal remove separate dense and dense_per_flavor functions Add documentation. Simplify per_flavor layer concatenation Reverse order of loops over replicas and layers Fixes for dropout Fixes for per_flavour Fix issue with copying over nodes for per_flavour layer Fix seeds in per_flavour layer Add error for combination of dropout with per_flavour layers Add basis_size argument to per_flavour layer Fix model_gen tests to use new generate_nn in favor of now removed generate_dense and generate_dense_per_flavour Allow for nodes to be a tuple Move dropout, per_flavour check to checks Clarify layer type check Co-authored-by: Juan M. Cruz-Martinez <juacrumar@lairen.eu> Clarify naming in nn_generator Remove initializer_name argument clarify comment Co-authored-by: Juan M. Cruz-Martinez <juacrumar@lairen.eu> Add comment on shared layers Rewrite comprehension over replica seeds Add check on layer type Merge prefactors into single layer Add replica dimension to preprocessing factor in test Update preprocessing layer in vpinterface Remove assigning of weight slices Simplify loading weights from file Update regression data Always return a single NNs model for all replicas, adjust weight getting and setting accordingly Revert "Update regression data" This reverts commit 6f79368. Change structure of regression weights Remove now unused postfix Update regression weights Give explicit shape to scatter_to_one Update developing weights structure fix prefix typo add double ticks rename layer name constants use constants defined in metamodel.py for layer names Explain need for is_stacked_single_replicas shorten line fix constant loading Simplify get_replica_weights NNs -> all_NNs Clarify get_layer_replica_weights Co-authored-by: Juan M. Cruz-Martinez <juacrumar@lairen.eu> Clarify set_layer_replica_weights Remove comment about python 3.11 Co-authored-by: Juan M. Cruz-Martinez <juacrumar@lairen.eu> Fix typo in comment Co-authored-by: Tanjona Rabemananjara <rrabeman@nikhef.nl> Fix formatting in docstring Co-authored-by: Tanjona Rabemananjara <rrabeman@nikhef.nl> Rewording docstring Co-authored-by: Tanjona Rabemananjara <rrabeman@nikhef.nl>
NNPDF · Jan 26, 2024 · 8810e11 · 8810e11
1 parent f706be2
commit 8810e11
Show file tree

Hide file tree

Showing 12 changed files with 202 additions and 161 deletions.
diff --git a/n3fit/runcards/examples/developing_weights.h5 b/n3fit/runcards/examples/developing_weights.h5
diff --git a/n3fit/src/n3fit/backends/__init__.py b/n3fit/src/n3fit/backends/__init__.py
@@ -1,20 +1,23 @@
-from n3fit.backends.keras_backend.internal_state import (
-    set_initial_state,
-    clear_backend_state,
-    set_eager
-)
+from n3fit.backends.keras_backend import callbacks, constraints, operations
 from n3fit.backends.keras_backend.MetaLayer import MetaLayer
-from n3fit.backends.keras_backend.MetaModel import MetaModel
+from n3fit.backends.keras_backend.MetaModel import (
+    NN_LAYER_ALL_REPLICAS,
+    NN_PREFIX,
+    PREPROCESSING_LAYER_ALL_REPLICAS,
+    MetaModel,
+)
 from n3fit.backends.keras_backend.base_layers import (
+    Concatenate,
     Input,
-    concatenate,
     Lambda,
     base_layer_selector,
+    concatenate,
     regularizer_selector,
-    Concatenate,
 )
-from n3fit.backends.keras_backend import operations
-from n3fit.backends.keras_backend import constraints
-from n3fit.backends.keras_backend import callbacks
+from n3fit.backends.keras_backend.internal_state import (
+    clear_backend_state,
+    set_eager,
+    set_initial_state,
+)
 
 print("Using Keras backend")
diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
@@ -46,7 +46,8 @@
 }
 
 NN_PREFIX = "NN"
-PREPROCESSING_PREFIX = "preprocessing_factor"
+NN_LAYER_ALL_REPLICAS = "all_NNs"
+PREPROCESSING_LAYER_ALL_REPLICAS = "preprocessing_factor"
 
 # Some keys need to work for everyone
 for k, v in optimizers.items():
@@ -156,7 +157,7 @@ def perform_fit(self, x=None, y=None, epochs=1, **kwargs):
         of the model (the loss functions) to the partial losses.
 
         If the model was compiled with input and output data, they will not be passed through.
-        In this case by default the number of `epochs` will be set to 1
+        In this case by default the number of ``epochs`` will be set to 1
 
         ex:
             {'loss': [100], 'dataset_a_loss1' : [67], 'dataset_2_loss': [33]}
@@ -228,7 +229,7 @@ def compile(
     ):
         """
         Compile the model given an optimizer and a list of loss functions.
-        The optimizer must be one of those implemented in the `optimizer` attribute of this class.
+        The optimizer must be one of those implemented in the ``optimizer`` attribute of this class.
 
         Options:
             - A learning rate and a list of target outpout can be defined.
@@ -353,14 +354,10 @@ def get_replica_weights(self, i_replica):
             dict
                 dictionary with the weights of the replica
         """
-        NN_weights = [
-            tf.Variable(w, name=w.name) for w in self.get_layer(f"{NN_PREFIX}_{i_replica}").weights
-        ]
-        prepro_weights = [
-            tf.Variable(w, name=w.name)
-            for w in self.get_layer(f"{PREPROCESSING_PREFIX}_{i_replica}").weights
-        ]
-        weights = {NN_PREFIX: NN_weights, PREPROCESSING_PREFIX: prepro_weights}
+        weights = {}
+        for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]:
+            layer = self.get_layer(layer_type)
+            weights[layer_type] = get_layer_replica_weights(layer, i_replica)
 
         return weights
 
@@ -378,10 +375,9 @@ def set_replica_weights(self, weights, i_replica=0):
             i_replica: int
                 the replica number to set, defaulting to 0
         """
-        self.get_layer(f"{NN_PREFIX}_{i_replica}").set_weights(weights[NN_PREFIX])
-        self.get_layer(f"{PREPROCESSING_PREFIX}_{i_replica}").set_weights(
-            weights[PREPROCESSING_PREFIX]
-        )
+        for layer_type in [NN_LAYER_ALL_REPLICAS, PREPROCESSING_LAYER_ALL_REPLICAS]:
+            layer = self.get_layer(layer_type)
+            set_layer_replica_weights(layer=layer, weights=weights[layer_type], i_replica=i_replica)
 
     def split_replicas(self):
         """
@@ -411,51 +407,85 @@ def load_identical_replicas(self, model_file):
         """
         From a single replica model, load the same weights into all replicas.
         """
-        weights = self._format_weights_from_file(model_file)
+        single_replica = self.single_replica_generator()
+        single_replica.load_weights(model_file)
+        weights = single_replica.get_replica_weights(0)
 
         for i_replica in range(self.num_replicas):
             self.set_replica_weights(weights, i_replica)
 
-    def _format_weights_from_file(self, model_file):
-        """Read weights from a .h5 file and format into a dictionary of tf.Variables"""
-        weights = {}
 
-        with h5py.File(model_file, 'r') as f:
-            # look at layers of the form NN_i and take the lowest i
-            i_replica = 0
-            while f"{NN_PREFIX}_{i_replica}" not in f:
-                i_replica += 1
+def is_stacked_single_replicas(layer):
+    """
+    Check if the layer consists of stacked single replicas (Only happens for NN layers),
+    to determine how to extract single replica weights.
 
-            weights[NN_PREFIX] = self._extract_weights(
-                f[f"{NN_PREFIX}_{i_replica}"], NN_PREFIX, i_replica
-            )
-            weights[PREPROCESSING_PREFIX] = self._extract_weights(
-                f[f"{PREPROCESSING_PREFIX}_{i_replica}"], PREPROCESSING_PREFIX, i_replica
-            )
+    Parameters
+    ----------
+        layer: MetaLayer
+            the layer to check
 
-        return weights
+    Returns
+    -------
+        bool
+            True if the layer consists of stacked single replicas
+    """
+    if not isinstance(layer, MetaModel):
+        return False
+    return f"{NN_PREFIX}_0" in [sublayer.name for sublayer in layer.layers]
+
+
+def get_layer_replica_weights(layer, i_replica: int):
+    """
+    Get the weights for the given single replica ``i_replica``,
+    from a ``layer`` that contains the weights of all the replicas.
+
+    Note that the layer could be  a complete NN with many separated sub_layers
+    each of which containing weights for all replicas together.
+    This functions separates the per-replica weights and returns the list of weight as if the
+    input ``layer`` were made of _only_ replica ``i_replica``.
+    
+    Parameters
+    ----------
+        layer: MetaLayer
+            the layer to get the weights from
+        i_replica: int
+            the replica number
+
+    Returns
+    -------
+        weights: list
+            list of weights for the replica
+    """
+    if is_stacked_single_replicas(layer):
+        weights = layer.get_layer(f"{NN_PREFIX}_{i_replica}").weights
+    else:
+        weights = [tf.Variable(w[i_replica : i_replica + 1], name=w.name) for w in layer.weights]
+
+    return weights
+
+
+def set_layer_replica_weights(layer, weights, i_replica: int):
+    """
+    Set the weights for the given single replica ``i_replica``.
+    When the input ``layer`` contains weights for many replicas, ensures that
+    only those corresponding to replica ``i_replica`` are updated.
+
+    Parameters
+    ----------
+        layer: MetaLayer
+            the layer to set the weights for
+        weights: list
+            list of weights for the replica
+        i_replica: int
+            the replica number
+    """
+    if is_stacked_single_replicas(layer):
+        layer.get_layer(f"{NN_PREFIX}_{i_replica}").set_weights(weights)
+        return
+
+    full_weights = [w.numpy() for w in layer.weights]
+    for w_old, w_new in zip(full_weights, weights):
+        w_old[i_replica : i_replica + 1] = w_new
 
-    def _extract_weights(self, h5_group, weights_key, i_replica):
-        """Extract weights from a h5py group, turning them into Tensorflow variables"""
-        weights = []
-
-        def append_weights(name, node):
-            if isinstance(node, h5py.Dataset):
-                weight_name = node.name.split("/", 2)[-1]
-                weight_name = weight_name.replace(f"{NN_PREFIX}_{i_replica}", f"{NN_PREFIX}_0")
-                weight_name = weight_name.replace(
-                    f"{PREPROCESSING_PREFIX}_{i_replica}", f"{PREPROCESSING_PREFIX}_0"
-                )
-                weights.append(tf.Variable(node[()], name=weight_name))
-
-        h5_group.visititems(append_weights)
-
-        # have to put them in the same order
-        weights_ordered = []
-        weights_model_order = [w.name for w in self.get_replica_weights(0)[weights_key]]
-        for w in weights_model_order:
-            for w_h5 in weights:
-                if w_h5.name == w:
-                    weights_ordered.append(w_h5)
-
-        return weights_ordered
+    layer.set_weights(full_weights)
diff --git a/n3fit/src/n3fit/layers/msr_normalization.py b/n3fit/src/n3fit/layers/msr_normalization.py
@@ -40,6 +40,7 @@ def __init__(self, mode: str = "ALL", replicas: int = 1, **kwargs):
         else:
             raise ValueError(f"Mode {mode} not accepted for sum rules")
 
+        self.replicas = replicas
         indices = []
         self.divisor_indices = []
         if self._msr_enabled:
@@ -83,6 +84,7 @@ def call(self, pdf_integrated, photon_integral):
         reshape = lambda x: op.transpose(x[0])
         y = reshape(pdf_integrated)
         photon_integral = reshape(photon_integral)
+
         numerators = []
 
         if self._msr_enabled:
@@ -96,8 +98,9 @@ def call(self, pdf_integrated, photon_integral):
         divisors = op.gather(y, self.divisor_indices, axis=0)
 
         # Fill in the rest of the flavours with 1
+        num_flavours = y.shape[0]
         norm_constants = op.scatter_to_one(
-            numerators / divisors, indices=self.indices, output_shape=y.shape
+            numerators / divisors, indices=self.indices, output_shape=(num_flavours, self.replicas)
         )
 
         return op.batchit(op.transpose(norm_constants), batch_dimension=1)
diff --git a/n3fit/src/n3fit/layers/preprocessing.py b/n3fit/src/n3fit/layers/preprocessing.py
@@ -33,13 +33,16 @@ class Preprocessing(MetaLayer):
             Whether large x preprocessing factor should be active
         seed: int
             seed for the initializer of the random alpha and beta values
+        num_replicas: int (default 1)
+            The number of replicas
     """
 
     def __init__(
         self,
         flav_info: Optional[list] = None,
         seed: int = 0,
         large_x: bool = True,
+        num_replicas: int = 1,
         **kwargs,
     ):
         if flav_info is None:
@@ -49,6 +52,8 @@ def __init__(
         self.flav_info = flav_info
         self.seed = seed
         self.large_x = large_x
+        self.num_replicas = num_replicas
+
         self.alphas = []
         self.betas = []
         super().__init__(**kwargs)
@@ -87,7 +92,7 @@ def generate_weight(self, name: str, kind: str, dictionary: dict, set_to_zero: b
         # Generate the new trainable (or not) parameter
         newpar = self.builder_helper(
             name=name,
-            kernel_shape=(1,),
+            kernel_shape=(self.num_replicas, 1),
             initializer=initializer,
             trainable=trainable,
             constraint=constraint,
@@ -117,9 +122,12 @@ def call(self, x):
 
         Returns
         -------
-            prefactor: tensor(shape=[1,N,F])
+            prefactor: tensor(shape=[1,R,N,F])
         """
-        alphas = op.stack(self.alphas, axis=1)
-        betas = op.stack(self.betas, axis=1)
+        # weight tensors of shape (R, 1, F)
+        alphas = op.stack(self.alphas, axis=-1)
+        betas = op.stack(self.betas, axis=-1)
+
+        x = op.batchit(x, batch_dimension=0)
 
         return x ** (1 - alphas) * (1 - x) ** betas