From d2318a1387734810bf0fe0252b7f1a5c75e05570 Mon Sep 17 00:00:00 2001
From: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Date: Wed, 16 Jul 2025 17:23:28 -0500
Subject: [PATCH 1/2] YAdd training v5.1 files

---
 mlperf_logging/benchmark_meta.py              |  14 ++-
 mlperf_logging/compliance_checker/README.md   |  12 +-
 .../training_5.1.0/closed_common.yaml         |   2 +-
 .../training_5.1.0/closed_flux1.yaml          |  77 +++++++++++++
 .../training_5.1.0/closed_llama31_405b.yaml   |  85 ++++++++++++++
 .../training_5.1.0/closed_llama31_8b.yaml     |  49 ++++++++
 .../training_5.1.0/common.yaml                |   4 +-
 .../training_5.1.0/open_common.yaml           |   2 +-
 .../training_5.1.0/open_flux1.yaml            |  32 ++++++
 .../training_5.1.0/open_llama31_405b.yaml     |  78 +++++++++++++
 .../training_5.1.0/open_llama31_8b.yaml       |   8 ++
 mlperf_logging/mllog/constants.py             |   2 +
 mlperf_logging/rcp_checker/rcp_checker.py     |   2 +-
 .../training_5.1.0/rcps_flux1.json            |  65 +++++++++++
 .../training_5.1.0/rcps_llama31_405b.json     | 106 ++++++++++++++++++
 .../training_5.1.0/rcps_llama31_8b.json       |  25 +++++
 .../visualization_scripts/rcp_viewer.py       |   2 +-
 mlperf_logging/result_summarizer/config.yaml  |   4 +-
 18 files changed, 555 insertions(+), 14 deletions(-)
 create mode 100644 mlperf_logging/compliance_checker/training_5.1.0/closed_flux1.yaml
 create mode 100644 mlperf_logging/compliance_checker/training_5.1.0/closed_llama31_405b.yaml
 create mode 100644 mlperf_logging/compliance_checker/training_5.1.0/closed_llama31_8b.yaml
 create mode 100644 mlperf_logging/compliance_checker/training_5.1.0/open_flux1.yaml
 create mode 100644 mlperf_logging/compliance_checker/training_5.1.0/open_llama31_405b.yaml
 create mode 100644 mlperf_logging/compliance_checker/training_5.1.0/open_llama31_8b.yaml
 create mode 100644 mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json
 create mode 100644 mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_405b.json
 create mode 100644 mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_8b.json

diff --git a/mlperf_logging/benchmark_meta.py b/mlperf_logging/benchmark_meta.py
index d323050..cf7c3e3 100644
--- a/mlperf_logging/benchmark_meta.py
+++ b/mlperf_logging/benchmark_meta.py
@@ -20,6 +20,9 @@
         'rgat': 10,  
         'llama2_70b_lora': 10,
         'llama31_405b': 3,
+        # TODO: Update with official values
+        'llama31_8b': 10,
+        'flux1': 10,
     },
     
     'hpc' : {
@@ -143,7 +146,16 @@
         'llama2_70b_lora',
         'rgat',
         'llama31_405b'
-    ]    
+    ],
+    '5.1': [
+        'llama31_8b',
+        'dlrm_dcnv2',   
+        'retinanet',        
+        'flux1',
+        'llama2_70b_lora',
+        'rgat',
+        'llama31_405b'
+    ]  
     },
     
     'hpc': {
diff --git a/mlperf_logging/compliance_checker/README.md b/mlperf_logging/compliance_checker/README.md
index d9f3dee..523ce1b 100644
--- a/mlperf_logging/compliance_checker/README.md
+++ b/mlperf_logging/compliance_checker/README.md
@@ -12,7 +12,7 @@ To check a log file for compliance:
 
 By default, 5.1.0 training edition rules are used and the default config is set to `5.1.0/common.yaml`.
 This config will check all common keys and enqueue benchmark specific config to be checked as well.
-Old training editions, still supported are 4.0.0, 3.1.0, 3.0.0, 2.1.0, 2.0.0, 1.1.0, 1.0.0, 0.7.0 and 0.6.0
+Old training editions, still supported are 5.0.0, 4.1.0 4.0.0, 3.1.0, 3.0.0, 2.1.0, 2.0.0, 1.1.0, 1.0.0, 0.7.0 and 0.6.0
 
 To check hpc compliance rules (only 1.0.0 ruleset is supported), set --usage hpc --ruleset 1.0.0.
 
@@ -26,17 +26,19 @@ As log examples use [NVIDIA's training logs](https://github.com/mlperf/training_
     5.1.0/closed_common.yaml   - the common rules file for closed submissions. These rules apply to all benchmarks
     5.1.0/open_common.yaml     - the common rules file for open submissions. These rules apply to all benchmarks
     5.1.0/closed_retinanet.yaml      - Per-benchmark rules, closed submissions.    
-    5.1.0/closed_bert.yaml
+    5.1.0/closed_llama31_8b.yaml
+    5.1.0/closed_llama31_405b.yaml
     5.1.0/closed_dlrm_dcnv2.yaml
     5.1.0/closed_rgat.yaml
     5.1.0/closed_llama2_70b_lora.yaml
-    5.1.0/closed_flux.yaml
+    5.1.0/closed_flux1.yaml
     5.1.0/open_retinanet.yaml        - Per-benchmark rules, open submissions.    
-    5.1.0/open_bert.yaml
+    5.1.0/open_llama31_8b.yaml
+    5.1.0/open_llama31_405b.yaml
     5.1.0/open_dlrm_dcnv2.yaml
     5.1.0/open_rgat.yaml
     5.1.0/open_llama2_70b_lora.yaml
-    5.1.0/open_flux.yaml
+    5.1.0/open_flux1.yaml
 
 ### Existing config files for HPC submissions
 
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/closed_common.yaml b/mlperf_logging/compliance_checker/training_5.1.0/closed_common.yaml
index 2c49169..8639eeb 100755
--- a/mlperf_logging/compliance_checker/training_5.1.0/closed_common.yaml
+++ b/mlperf_logging/compliance_checker/training_5.1.0/closed_common.yaml
@@ -2,7 +2,7 @@
 - KEY:
     NAME:  submission_benchmark
     REQ:   EXACTLY_ONE
-    CHECK: " v['value'] in ['retinanet', 'stable_diffusion', 'dlrm_dcnv2', 'bert', 'rgat', 'llama2_70b_lora', 'flux'] "
+    CHECK: " v['value'] in ['retinanet', 'flux1', 'dlrm_dcnv2', 'llama31_8b', 'rgat', 'llama2_70b_lora', 'llama31_405b'] "
     POST:  " enqueue_config('training_5.1.0/closed_{}.yaml'.format(v['value'])) "
 
 - KEY:
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/closed_flux1.yaml b/mlperf_logging/compliance_checker/training_5.1.0/closed_flux1.yaml
new file mode 100644
index 0000000..984e9d0
--- /dev/null
+++ b/mlperf_logging/compliance_checker/training_5.1.0/closed_flux1.yaml
@@ -0,0 +1,77 @@
+# Stable diffusion uses two metrics, FID and CLIP.
+# These metrics can be calculated offline, using different scripts
+# and logged seperatly. Therefore, we create a virtual key
+# called aggregated_eval_accuracy, which aggregates
+# both metrics into a single log line
+
+- BEGIN:
+    CODE: |
+        from dataclasses import replace
+        agg_eval_lines = {}
+        for line in loglines:
+            if line.key == "eval_accuracy" and 'metric' in line.value['metadata']:
+                samples_count = line.value['metadata']['samples_count']
+                if samples_count not in agg_eval_lines:
+                    new_line = replace(line) # Make a copy
+                    new_line.key = "aggregated_eval_accuracy"
+                    new_line.full_string = "" # Not needed
+                    new_line.lineno = -1      # Not needed
+                    new_line.value = {'value': {'samples_count': samples_count}, 'metadata':{}}
+                    agg_eval_lines[samples_count] = new_line
+
+                agg_eval_lines[samples_count].timestamp = max(line.timestamp, agg_eval_lines[samples_count].timestamp)
+                agg_eval_lines[samples_count].value['value'][line.value['metadata']['metric']] = line.value['value']
+        loglines.extend(agg_eval_lines.values())
+
+- KEY:
+    NAME:  global_batch_size
+    REQ:   AT_LEAST_ONE
+    CHECK: " v['value'] >= 0 "
+
+- KEY:
+    NAME:  opt_name
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 'adamw' "
+
+- KEY:
+    NAME:  opt_adamw_beta_1
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 0.9 "
+
+- KEY:
+    NAME:  opt_adamw_beta_2
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 0.95 "
+
+- KEY:
+    NAME:  opt_adamw_epsilon
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 1e-08 "
+
+- KEY:
+    NAME:  opt_adamw_weight_decay
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 0.1 "
+
+- KEY:
+    NAME:  opt_base_learning_rate
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] >= 0.0 "
+
+- KEY:
+    NAME:  opt_learning_rate_warmup_steps
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] >= 0 "
+
+- KEY:
+    NAME:  opt_gradient_clip_norm
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 1.0 "
+
+# TODO: Update with official metric name
+- KEY:
+    NAME: averaged_validation_loss
+    REQ: AT_LEAST_ONE
+    CHECK:
+        - "'epoch_num' in v['metadata']"
+    ATLEAST_ONE_CHECK: "v['value'] <= 0.586 and v['value'] > 0.0"
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/closed_llama31_405b.yaml b/mlperf_logging/compliance_checker/training_5.1.0/closed_llama31_405b.yaml
new file mode 100644
index 0000000..c47fd87
--- /dev/null
+++ b/mlperf_logging/compliance_checker/training_5.1.0/closed_llama31_405b.yaml
@@ -0,0 +1,85 @@
+- KEY:
+    NAME:  global_batch_size
+    REQ:   EXACTLY_ONE
+    POST: >
+        s['global_batch_size'] = v['value']
+
+- KEY:
+    NAME:  max_sequence_length
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 8192 "
+
+- KEY:
+    NAME:  opt_name
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 'adamw' "
+
+- KEY:
+    NAME:  opt_base_learning_rate
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] * 1152 == s['global_batch_size'] * 8e-5 " 
+
+- KEY:
+    NAME:  opt_end_learning_rate
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_learning_rate_decay_steps
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_learning_rate_warmup_steps
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_learning_rate_decay_schedule
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 'cosine with linear warmup' "
+
+- KEY:
+    NAME:  opt_adamw_beta_1
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 0.9 "
+
+- KEY:
+    NAME:  opt_adamw_beta_2
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 0.95 "
+
+- KEY:
+    NAME:  opt_adamw_epsilon
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 1e-05 "
+
+- KEY:
+    NAME:  opt_adamw_weight_decay
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 0.1 "
+
+- KEY:
+    NAME:  opt_gradient_clip_norm
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 1.0 "
+
+- KEY:
+    NAME:  gradient_accumulation_steps
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] > 0 "
+
+- KEY:
+    NAME:  eval_samples
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 5760 "
+
+- KEY:
+    NAME:  eval_accuracy
+    REQ:   AT_LEAST_ONE
+    CHECK:
+        - "'samples_count' in v['metadata']"
+    ATLEAST_ONE_CHECK: "(v['value'] <= 5.6) and v['value'] > 0.0"
+
+- KEY:
+    NAME: init_checkpoint_step
+    REQ:  EXACTLY_ONE
+    CHECK: " v['value'] == 0 "
+
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/closed_llama31_8b.yaml b/mlperf_logging/compliance_checker/training_5.1.0/closed_llama31_8b.yaml
new file mode 100644
index 0000000..3619827
--- /dev/null
+++ b/mlperf_logging/compliance_checker/training_5.1.0/closed_llama31_8b.yaml
@@ -0,0 +1,49 @@
+- KEY:
+    NAME:  global_batch_size
+    REQ:   EXACTLY_ONE
+    POST: >
+        s['global_batch_size'] = v['value']
+
+# TODO: Update with official compliance requirements
+- KEY:
+    NAME:  opt_base_learning_rate
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_lamb_epsilon
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_learning_rate_training_steps
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_learning_rate_warmup_steps
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  num_warmup_steps
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  start_warmup_step
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_lamb_beta_1
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_lamb_beta_2
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_lamb_weight_decay_rate
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  eval_accuracy
+    REQ:   AT_LEAST_ONE
+    CHECK:
+        - "'epoch_num' in v['metadata']"
+    ATLEAST_ONE_CHECK: "(v['value'] >= 0.720) and v['value'] < 1.0"
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/common.yaml b/mlperf_logging/compliance_checker/training_5.1.0/common.yaml
index cfdd5a6..360854c 100755
--- a/mlperf_logging/compliance_checker/training_5.1.0/common.yaml
+++ b/mlperf_logging/compliance_checker/training_5.1.0/common.yaml
@@ -107,13 +107,13 @@
     NAME:  epoch_start
     REQ:   AT_LEAST_ONE_OR(block_start)
     CHECK:
-        - "'epoch_num' in v['metadata']"
+        - "('epoch_num' in v['metadata']) | ('samples_count' in v['metadata'])"
 
 - KEY:
     NAME:  epoch_stop
     REQ:   AT_LEAST_ONE_OR(block_stop)
     CHECK:
-        - "'epoch_num' in v['metadata']"
+        - "('epoch_num' in v['metadata']) | ('samples_count' in v['metadata'])"
 
 # making sure previous eval did print it's accuracy result
 - KEY:
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/open_common.yaml b/mlperf_logging/compliance_checker/training_5.1.0/open_common.yaml
index 97abafc..41015a8 100644
--- a/mlperf_logging/compliance_checker/training_5.1.0/open_common.yaml
+++ b/mlperf_logging/compliance_checker/training_5.1.0/open_common.yaml
@@ -2,5 +2,5 @@
 - KEY:
     NAME:  submission_benchmark
     REQ:   EXACTLY_ONE
-    CHECK: " v['value'] in ['retinanet', 'dlrm_dcnv2', 'bert', 'rgat', 'llama2_70b_lora', 'flux'] "
+    CHECK: " v['value'] in ['retinanet', 'flux1', 'dlrm_dcnv2', 'llama31_8b', 'rgat', 'llama2_70b_lora', 'llama31_405b'] "
     POST:  " enqueue_config('training_5.1.0/open_{}.yaml'.format(v['value'])) "
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/open_flux1.yaml b/mlperf_logging/compliance_checker/training_5.1.0/open_flux1.yaml
new file mode 100644
index 0000000..19a69fa
--- /dev/null
+++ b/mlperf_logging/compliance_checker/training_5.1.0/open_flux1.yaml
@@ -0,0 +1,32 @@
+# Stable diffusion uses two metrics, FID and CLIP.
+# These metrics can be calculated offline, using different scripts
+# and logged seperatly. Therefore, we create a virtual key
+# called aggregated_eval_accuracy, which aggregates
+# both metrics into a single log line
+
+- BEGIN:
+    CODE: |
+        from dataclasses import replace
+        agg_eval_lines = {}
+        for line in loglines:
+            if line.key == "eval_accuracy" and 'metric' in line.value['metadata']:
+                samples_count = line.value['metadata']['samples_count']
+                if samples_count not in agg_eval_lines:
+                    new_line = replace(line) # Make a copy
+                    new_line.key = "aggregated_eval_accuracy"
+                    new_line.full_string = "" # Not needed
+                    new_line.lineno = -1      # Not needed
+                    new_line.value = {'value': {'samples_count': samples_count}, 'metadata':{}}
+                    agg_eval_lines[samples_count] = new_line
+
+                agg_eval_lines[samples_count].timestamp = max(line.timestamp, agg_eval_lines[samples_count].timestamp)
+                agg_eval_lines[samples_count].value['value'][line.value['metadata']['metric']] = line.value['value']
+        loglines.extend(agg_eval_lines.values())
+
+# TODO: Update with official metric name
+- KEY:
+    NAME: averaged_validation_loss
+    REQ: AT_LEAST_ONE
+    CHECK:
+        - "'epoch_num' in v['metadata']"
+    ATLEAST_ONE_CHECK: "v['value'] <= 0.586 and v['value'] > 0.0"
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/open_llama31_405b.yaml b/mlperf_logging/compliance_checker/training_5.1.0/open_llama31_405b.yaml
new file mode 100644
index 0000000..0a29e8b
--- /dev/null
+++ b/mlperf_logging/compliance_checker/training_5.1.0/open_llama31_405b.yaml
@@ -0,0 +1,78 @@
+- KEY:
+    NAME:  global_batch_size
+    REQ:   EXACTLY_ONE
+    POST: >
+        s['global_batch_size'] = v['value']
+
+- KEY:
+    NAME:  max_sequence_length
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 8192 "
+
+- KEY:
+    NAME:  opt_name
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 'adamw' "
+
+- KEY:
+    NAME:  opt_base_learning_rate
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_end_learning_rate
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_learning_rate_decay_steps
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_learning_rate_warmup_steps
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_learning_rate_decay_schedule
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_adamw_beta_1
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_adamw_beta_2
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_adamw_epsilon
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_adamw_weight_decay
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  opt_gradient_clip_norm
+    REQ:   EXACTLY_ONE
+
+- KEY:
+    NAME:  gradient_accumulation_steps
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] > 0 "
+
+- KEY:
+    NAME:  eval_samples
+    REQ:   EXACTLY_ONE
+    CHECK: " v['value'] == 5760 "
+
+- KEY:
+    NAME:  eval_accuracy
+    REQ:   AT_LEAST_ONE
+    CHECK:
+        - "'epoch_num' in v['metadata']"
+    ATLEAST_ONE_CHECK: "(v['value'] <= 5.6) and v['value'] > 0.0"
+
+- KEY:
+    NAME: init_checkpoint_step
+    REQ:  EXACTLY_ONE
+    CHECK: " v['value'] == 0 "
+
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/open_llama31_8b.yaml b/mlperf_logging/compliance_checker/training_5.1.0/open_llama31_8b.yaml
new file mode 100644
index 0000000..ff3f204
--- /dev/null
+++ b/mlperf_logging/compliance_checker/training_5.1.0/open_llama31_8b.yaml
@@ -0,0 +1,8 @@
+
+# TODO: Update with official compliance requirements
+- KEY:
+    NAME:  eval_accuracy
+    REQ:   AT_LEAST_ONE
+    CHECK:
+        - "'epoch_num' in v['metadata']"
+    ATLEAST_ONE_CHECK: "v['value'] < 1.0"
diff --git a/mlperf_logging/mllog/constants.py b/mlperf_logging/mllog/constants.py
index d272c1e..880a814 100644
--- a/mlperf_logging/mllog/constants.py
+++ b/mlperf_logging/mllog/constants.py
@@ -55,6 +55,8 @@
 GNN = "gnn"
 RGAT = "rgat"
 LLAMA31_405B = "llama31_405b"
+LLAMA31_8B = "llama31_8b"
+FLUX1 = "flux1"
 
 # Constant values - model info
 ADAGRAD = "adagrad"
diff --git a/mlperf_logging/rcp_checker/rcp_checker.py b/mlperf_logging/rcp_checker/rcp_checker.py
index 3806a90..1fb028b 100644
--- a/mlperf_logging/rcp_checker/rcp_checker.py
+++ b/mlperf_logging/rcp_checker/rcp_checker.py
@@ -441,7 +441,7 @@ def _set_results_scaling(self, scale_factor, results_dir):
     def _eval_submission_record(self, rcp_record, subm_epochs, results_dir):
         '''Compare reference and submission convergence.'''
 
-        if self.ruleset == "5.0.0" and self.benchmark == "llama31_405b": 
+        if self.ruleset in ["5.0.0", "5.1.0"] and self.benchmark == "llama31_405b": 
             rcp_record['Max Speedup'] = rcp_record['RCP Mean'] / (rcp_record['Min Epochs'] - 46080)
         
         subm_epochs.sort()
diff --git a/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json b/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json
new file mode 100644
index 0000000..e6c0fa8
--- /dev/null
+++ b/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json
@@ -0,0 +1,65 @@
+{
+  "flux1_ref_1024":
+  {
+    "Benchmark": "flux1",
+    "Creator": "",
+    "When": "",
+    "Platform": "",
+    "BS": 1024,
+    "Hyperparams": {
+      "opt_adamw_beta_1": 0,
+      "opt_adamw_beta_2": 0,
+      "opt_adamw_epsilon": 0,
+      "opt_adamw_weight_decay": 0,
+      "opt_base_learning_rate": 0,
+      "opt_learning_rate_warmup_steps": 0
+    },
+    "Epochs to converge": [
+      0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0
+    ]
+  },
+  "flux1_ref_2048":
+  {
+    "Benchmark": "flux1",
+    "Creator": "",
+    "When": "",
+    "Platform": "",
+    "BS": 2048,
+    "Hyperparams": {
+      "opt_adamw_beta_1": 0,
+      "opt_adamw_beta_2": 0,
+      "opt_adamw_epsilon": 0,
+      "opt_adamw_weight_decay": 0,
+      "opt_base_learning_rate": 0,
+      "opt_learning_rate_warmup_steps": 0
+    },
+    "Epochs to converge": [
+      0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0
+    ]
+  },
+  "flux1_ref_4096":
+  {
+    "Benchmark": "flux1",
+    "Creator": "",
+    "When": "",
+    "Platform": "",
+    "BS": 4096,
+    "Hyperparams": {
+      "opt_adamw_beta_1": 0,
+      "opt_adamw_beta_2": 0,
+      "opt_adamw_epsilon": 0,
+      "opt_adamw_weight_decay": 0,
+      "opt_base_learning_rate": 0,
+      "opt_learning_rate_warmup_steps": 0
+    },
+    "Epochs to converge": [
+      0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0
+    ]
+  }
+}
\ No newline at end of file
diff --git a/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_405b.json b/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_405b.json
new file mode 100644
index 0000000..70adaf5
--- /dev/null
+++ b/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_405b.json
@@ -0,0 +1,106 @@
+{
+    "llama31_405b_ref_1008":
+    {
+      "Benchmark": "llama31_405b",
+      "Creator": "NVIDIA",
+      "When": "Reference RCPs before 5.0 submission",
+      "Platform": "288xDGX-H100",
+      "BS": 1008,
+      "Hyperparams": {
+        "opt_base_learning_rate": 7e-05,
+        "opt_learning_rate_warmup_steps": 9143,
+        "gradient_accumulation_steps": 126
+      },
+      "Epochs to converge": [
+        324576,324576,324576,
+        324576,324576,324576
+      ]
+    },
+    "llama31_405b_ref_1152":
+    {
+      "Benchmark": "llama31_405b",
+      "Creator": "NVIDIA",
+      "When": "Reference RCPs before 5.0 submission",
+      "Platform": "288xDGX-H100",
+      "BS": 1152,
+      "Hyperparams": {
+        "opt_base_learning_rate": 8e-05,
+        "opt_learning_rate_warmup_steps": 8000,
+        "gradient_accumulation_steps": 144
+      },
+      "Epochs to converge": [
+        322560,322560,322560,
+        322560,322560,322560
+      ]
+    },
+  
+    "llama31_405b_ref_2304":
+    {
+        "Benchmark": "llama31_405b",
+        "Creator": "NVIDIA",
+        "When": "Reference RCPs before 5.0 submission",
+        "Platform": "288xDGX-H100",
+        "BS": 2304,
+        "Hyperparams": {
+          "opt_base_learning_rate": 16e-05,
+          "opt_learning_rate_warmup_steps": 4000,
+          "gradient_accumulation_steps": 288
+        },
+        "Epochs to converge": [
+          368640,368640,368640,
+          368640,414720,414720
+        ]
+      },
+    "llama31_405b_ref_4608":
+    {
+        "Benchmark": "llama31_405b",
+        "Creator": "NVIDIA",
+        "When": "Reference RCPs before 5.0 submission",
+        "Platform": "288xDGX-H100",
+        "BS": 4608,
+        "Hyperparams": {
+          "opt_base_learning_rate": 32e-05,
+          "opt_learning_rate_warmup_steps": 2000,
+          "gradient_accumulation_steps": 576
+        },
+        "Epochs to converge": [
+          460800,460800,506880,
+          506880,506880,506880
+        ]
+      },
+    "llama31_405b_ref_6912":
+    {
+        "Benchmark": "llama31_405b",
+        "Creator": "NVIDIA",
+        "When": "Reference RCPs before 5.0 submission",
+        "Platform": "72xDGX-H100",
+        "BS": 6912,
+        "Hyperparams": {
+          "opt_base_learning_rate": 48e-05,
+          "opt_learning_rate_warmup_steps": 1334,
+          "gradient_accumulation_steps": 3456
+        },
+        "Epochs to converge": [
+          580608,580608,580608,
+          628992,628992,628992
+        ]
+      },
+    "llama31_405b_ref_9216":
+    {
+        "Benchmark": "llama31_405b",
+        "Creator": "NVIDIA",
+        "When": "Reference RCPs before 5.0 submission",
+        "Platform": "288xDGX-H100",
+        "BS": 9216,
+        "Hyperparams": {
+          "opt_base_learning_rate": 64e-05,
+          "opt_learning_rate_warmup_steps": 1000,
+          "gradient_accumulation_steps": 1152
+        },
+        "Epochs to converge": [
+          645120,645120,691200,
+          691200,737280,737280
+        ]
+      }
+  }
+  
\ No newline at end of file
diff --git a/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_8b.json b/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_8b.json
new file mode 100644
index 0000000..bed1d1c
--- /dev/null
+++ b/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_8b.json
@@ -0,0 +1,25 @@
+{
+
+  "llama31_8b_ref_X":
+  {
+    "Benchmark": "",
+    "Creator": "",
+    "When": "",
+    "Platform": "",
+    "BS": 0,
+    "Hyperparams": {
+      "opt_base_learning_rate": 0,
+      "opt_epsilon": 0,
+      "opt_learning_rate_training_steps": 0,
+      "num_warmup_steps": 0,
+      "start_warmup_step": 0,
+      "opt_lamb_beta_1": 0,
+      "opt_lamb_beta_2": 0,
+      "opt_lamb_weight_decay_rate": 0,
+      "gradient_accumulation_steps": 0
+    },
+    "Epochs to converge": [
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+  }
+}
diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
index 1c4b968..cba24d7 100755
--- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
+++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py
@@ -22,7 +22,7 @@ def main():
     parser.add_argument('--usage', type=str, default='training',
                         choices=['training', 'hpc'],
                         help="the WG that produced the benchmark")
-    parser.add_argument('--version', type=str, default='5.0.0',
+    parser.add_argument('--version', type=str, default='5.1.0',
                         help='what version of the ruleset')
     parser.add_argument('--verbose', action='store_true')
     parser.add_argument('--unpruned', action='store_true',
diff --git a/mlperf_logging/result_summarizer/config.yaml b/mlperf_logging/result_summarizer/config.yaml
index e0306fa..897d29a 100644
--- a/mlperf_logging/result_summarizer/config.yaml
+++ b/mlperf_logging/result_summarizer/config.yaml
@@ -94,12 +94,12 @@ columns:
       llama31_405b: ["Benchmark results (minutes)", "LLM", "C4", "Llama31-405B"]
       default: [" ", " ", " "]
     "5.1.0":
-      bert: ["Benchmark results (minutes)", "NLP", "Wikipedia", "BERT"]
       dlrm_dcnv2: ["Benchmark results (minutes)", "Recommendation", "1TB Multihot Clickthrough", "DLRM DCNv2"]      
       retinanet: ["Benchmark results (minutes)", "Object detection, light-weight", "OpenImages", "RetinaNet"]
-      flux: ["Benchmark results (minutes)", "Text to image", "CC12M and Coco-2014", "Flux"]
+      flux1: ["Benchmark results (minutes)", "Text to image", "CC12M and Coco-2014 for eval", "Flux1"]
       llama2_70b_lora: ["Benchmark results (minutes)", "LLM-Finetune", "SCROLSS Gov Report", "LLama2-70B-LoRA"]
       rgat: ["Benchmark results (minutes)", "Graph node classification", "IGBH-Full", "R-GAT"]
+      llama31_8b: ["Benchmark results (minutes)", "Small LLM", "C4", "Llama31-8b"]
       llama31_405b: ["Benchmark results (minutes)", "LLM", "C4", "Llama31-405B"]
       default: [" ", " ", " "]
 

From 7ccd11ab07c689e4f29c46c348ccfa34ca7d8043 Mon Sep 17 00:00:00 2001
From: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Date: Thu, 17 Jul 2025 17:09:43 -0500
Subject: [PATCH 2/2] Minor fixes and renames

---
 mlperf_logging/compliance_checker/README.md   |  2 +-
 .../training_5.1.0/closed_flux.yaml           | 41 ---------
 .../training_5.1.0/closed_flux1.yaml          | 21 +----
 .../training_5.1.0/open_flux.yaml             |  6 --
 .../training_5.1.0/open_flux1.yaml            | 19 ----
 mlperf_logging/rcp_checker/rcp_checker.py     |  6 +-
 .../rcp_checker/training_5.1.0/rcps_flux.json | 65 -------------
 .../training_5.1.0/rcps_flux1.json            | 92 +++++++++----------
 8 files changed, 51 insertions(+), 201 deletions(-)
 delete mode 100644 mlperf_logging/compliance_checker/training_5.1.0/closed_flux.yaml
 delete mode 100644 mlperf_logging/compliance_checker/training_5.1.0/open_flux.yaml
 delete mode 100644 mlperf_logging/rcp_checker/training_5.1.0/rcps_flux.json

diff --git a/mlperf_logging/compliance_checker/README.md b/mlperf_logging/compliance_checker/README.md
index 523ce1b..48c6ed5 100644
--- a/mlperf_logging/compliance_checker/README.md
+++ b/mlperf_logging/compliance_checker/README.md
@@ -12,7 +12,7 @@ To check a log file for compliance:
 
 By default, 5.1.0 training edition rules are used and the default config is set to `5.1.0/common.yaml`.
 This config will check all common keys and enqueue benchmark specific config to be checked as well.
-Old training editions, still supported are 5.0.0, 4.1.0 4.0.0, 3.1.0, 3.0.0, 2.1.0, 2.0.0, 1.1.0, 1.0.0, 0.7.0 and 0.6.0
+Old training editions, still supported are 5.0.0, 4.1.0, 4.0.0, 3.1.0, 3.0.0, 2.1.0, 2.0.0, 1.1.0, 1.0.0, 0.7.0 and 0.6.0
 
 To check hpc compliance rules (only 1.0.0 ruleset is supported), set --usage hpc --ruleset 1.0.0.
 
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/closed_flux.yaml b/mlperf_logging/compliance_checker/training_5.1.0/closed_flux.yaml
deleted file mode 100644
index d0ed330..0000000
--- a/mlperf_logging/compliance_checker/training_5.1.0/closed_flux.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-- KEY:
-    NAME:  global_batch_size
-    REQ:   EXACTLY_ONE
-    POST: >
-        s['global_batch_size'] = v['value']
-
-
-- KEY:
-    NAME:  opt_learning_rate_warmup_steps
-    REQ:   EXACTLY_ONE
-
-- KEY:
-    NAME:  opt_base_learning_rate
-    REQ:   EXACTLY_ONE
-
-- KEY:
-    NAME:  opt_gradient_clip_norm
-    REQ:   EXACTLY_ONE
-
-- KEY:
-    NAME:  opt_adamw_weight_decay
-    REQ:   EXACTLY_ONE
-
-- KEY:
-    NAME:  opt_adamw_epsilon
-    REQ:   EXACTLY_ONE
-
-- KEY:
-    NAME:  opt_adamw_beta_1
-    REQ:   EXACTLY_ONE
-
-- KEY:
-    NAME:  opt_adamw_beta_2
-    REQ:   EXACTLY_ONE
-
-- KEY:
-    NAME:  eval_accuracy
-    REQ:   AT_LEAST_ONE
-    CHECK:
-        - "'samples_count' in v['metadata']"
-    ATLEAST_ONE_CHECK: "(v['value'] <= 0.6) and v['value'] > 0.0"
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/closed_flux1.yaml b/mlperf_logging/compliance_checker/training_5.1.0/closed_flux1.yaml
index 984e9d0..23955bc 100644
--- a/mlperf_logging/compliance_checker/training_5.1.0/closed_flux1.yaml
+++ b/mlperf_logging/compliance_checker/training_5.1.0/closed_flux1.yaml
@@ -4,25 +4,6 @@
 # called aggregated_eval_accuracy, which aggregates
 # both metrics into a single log line
 
-- BEGIN:
-    CODE: |
-        from dataclasses import replace
-        agg_eval_lines = {}
-        for line in loglines:
-            if line.key == "eval_accuracy" and 'metric' in line.value['metadata']:
-                samples_count = line.value['metadata']['samples_count']
-                if samples_count not in agg_eval_lines:
-                    new_line = replace(line) # Make a copy
-                    new_line.key = "aggregated_eval_accuracy"
-                    new_line.full_string = "" # Not needed
-                    new_line.lineno = -1      # Not needed
-                    new_line.value = {'value': {'samples_count': samples_count}, 'metadata':{}}
-                    agg_eval_lines[samples_count] = new_line
-
-                agg_eval_lines[samples_count].timestamp = max(line.timestamp, agg_eval_lines[samples_count].timestamp)
-                agg_eval_lines[samples_count].value['value'][line.value['metadata']['metric']] = line.value['value']
-        loglines.extend(agg_eval_lines.values())
-
 - KEY:
     NAME:  global_batch_size
     REQ:   AT_LEAST_ONE
@@ -70,7 +51,7 @@
 
 # TODO: Update with official metric name
 - KEY:
-    NAME: averaged_validation_loss
+    NAME: eval_accuracy
     REQ: AT_LEAST_ONE
     CHECK:
         - "'epoch_num' in v['metadata']"
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/open_flux.yaml b/mlperf_logging/compliance_checker/training_5.1.0/open_flux.yaml
deleted file mode 100644
index f732825..0000000
--- a/mlperf_logging/compliance_checker/training_5.1.0/open_flux.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-- KEY:
-    NAME:  eval_accuracy
-    REQ:   AT_LEAST_ONE
-    CHECK:
-        - "'samples_count' in v['metadata']"
-    ATLEAST_ONE_CHECK: "(v['value'] <= 0.6) and v['value'] > 0.0"
diff --git a/mlperf_logging/compliance_checker/training_5.1.0/open_flux1.yaml b/mlperf_logging/compliance_checker/training_5.1.0/open_flux1.yaml
index 19a69fa..4144e05 100644
--- a/mlperf_logging/compliance_checker/training_5.1.0/open_flux1.yaml
+++ b/mlperf_logging/compliance_checker/training_5.1.0/open_flux1.yaml
@@ -4,25 +4,6 @@
 # called aggregated_eval_accuracy, which aggregates
 # both metrics into a single log line
 
-- BEGIN:
-    CODE: |
-        from dataclasses import replace
-        agg_eval_lines = {}
-        for line in loglines:
-            if line.key == "eval_accuracy" and 'metric' in line.value['metadata']:
-                samples_count = line.value['metadata']['samples_count']
-                if samples_count not in agg_eval_lines:
-                    new_line = replace(line) # Make a copy
-                    new_line.key = "aggregated_eval_accuracy"
-                    new_line.full_string = "" # Not needed
-                    new_line.lineno = -1      # Not needed
-                    new_line.value = {'value': {'samples_count': samples_count}, 'metadata':{}}
-                    agg_eval_lines[samples_count] = new_line
-
-                agg_eval_lines[samples_count].timestamp = max(line.timestamp, agg_eval_lines[samples_count].timestamp)
-                agg_eval_lines[samples_count].value['value'][line.value['metadata']['metric']] = line.value['value']
-        loglines.extend(agg_eval_lines.values())
-
 # TODO: Update with official metric name
 - KEY:
     NAME: averaged_validation_loss
diff --git a/mlperf_logging/rcp_checker/rcp_checker.py b/mlperf_logging/rcp_checker/rcp_checker.py
index 1fb028b..c08f4ea 100644
--- a/mlperf_logging/rcp_checker/rcp_checker.py
+++ b/mlperf_logging/rcp_checker/rcp_checker.py
@@ -32,7 +32,7 @@
         'gnn': 10,
         'rgat': 10,  
         'llama2_70b_lora': 10,
-        'flux': 10,
+        'flux1': 10,
         'llama31_405b': 3,
     },
     "hpc": {
@@ -83,7 +83,7 @@ def read_submission_file(result_file, ruleset, use_train_samples):
                     eval_metric = json.loads(eval_accuracy_str)["metadata"]["metric"]
                     eval_score = json.loads(eval_accuracy_str)["value"]
                     stable_diffusion_eval_results[eval_step][eval_metric] = eval_score
-                elif benchmark in {"llama2_70b_lora", "flux", "llama31_405b"} and ("eval_error" in str or "eval_accuracy" in str):
+                elif benchmark in {"llama2_70b_lora", "flux1", "llama31_405b"} and ("eval_error" in str or "eval_accuracy" in str):
                     eval_accuracy_str = str
                     conv_epoch = json.loads(eval_accuracy_str)["metadata"]["samples_count"]
                     eval_score = json.loads(eval_accuracy_str)["value"]
@@ -210,7 +210,7 @@ def _process_raw_rcp_data(self, raw_rcp_data):
         '''
         processed_rcps = {}
         for record, record_contents in raw_rcp_data.items():
-            conv_unit = "samples to converge" if record_contents['Benchmark'] in ['llama2_70b_lora', 'flux'] else "Epochs to converge"
+            conv_unit = "samples to converge" if record_contents['Benchmark'] in ['llama2_70b_lora', 'flux1'] else "Epochs to converge"
             processed_record = {'Benchmark': record_contents['Benchmark'],
                                 'BS': record_contents['BS'],
                                 'Hyperparams': record_contents['Hyperparams'],
diff --git a/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux.json b/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux.json
deleted file mode 100644
index ba15ef9..0000000
--- a/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux.json
+++ /dev/null
@@ -1,65 +0,0 @@
-{
-  "flux_ref_1024": {
-    "Benchmark": "flux",
-    "Creator": "NVIDIA",
-    "When": "Reference RCPs before v5.1",
-    "Platform": "8xDGX-B200",
-    "BS": 1024,
-    "Hyperparams": {
-      "opt_adamw_beta_1": 0.9,
-      "opt_adamw_beta_2": 0.95,
-      "opt_adamw_epsilon": 1e-8,
-      "opt_adamw_weight_decay": 0.1,
-      "opt_base_learning_rate": 2.0e-4,
-      "opt_learning_rate_warmup_steps": 0,
-      "opt_gradient_clip_norm": 1.0
-    },
-    "samples to converge": [
-      8912896, 8650752, 9437184, 8126464, 8388608, 9175040, 8650752, 8126464,
-      8388608, 9961472, 7864320, 8126464, 9699328, 8650752, 9437184, 8912896,
-      8388608, 9175040, 8126464, 9175040
-    ]
-  },
-  "flux_ref_2048": {
-    "Benchmark": "flux",
-    "Creator": "NVIDIA",
-    "When": "Reference RCPs before v5.1",
-    "Platform": "8xDGX-B200",
-    "BS": 2048,
-    "Hyperparams": {
-      "opt_adamw_beta_1": 0.9,
-      "opt_adamw_beta_2": 0.95,
-      "opt_adamw_epsilon": 1e-8,
-      "opt_adamw_weight_decay": 0.1,
-      "opt_base_learning_rate": 2.5e-4,
-      "opt_learning_rate_warmup_steps": 0,
-      "opt_gradient_clip_norm": 1.0
-    },
-    "samples to converge": [
-      11272192, 10223616, 11534336, 10747904, 9699328, 10485760, 11010048,
-      10223616, 11796480, 10485760, 10747904, 11272192, 9699328, 10485760,
-      11534336, 9961472, 10485760, 10485760, 11272192, 11272192
-    ]
-  },
-  "flux_ref_4096": {
-    "Benchmark": "flux",
-    "Creator": "NVIDIA",
-    "When": "Reference RCPs before v5.1",
-    "Platform": "8xDGX-B200",
-    "BS": 4096,
-    "Hyperparams": {
-      "opt_adamw_beta_1": 0.9,
-      "opt_adamw_beta_2": 0.95,
-      "opt_adamw_epsilon": 1e-8,
-      "opt_adamw_weight_decay": 0.1,
-      "opt_base_learning_rate": 4.0e-4,
-      "opt_learning_rate_warmup_steps": 100,
-      "opt_gradient_clip_norm": 1.0
-    },
-    "samples to converge": [
-      15466496, 15728640, 15990784, 15466496, 15728640, 15466496, 14942208,
-      14680064, 15728640, 15990784, 15990784, 15728640, 15728640, 16252928,
-      14942208, 15728640, 16252928, 15204352, 16515072, 14942208
-    ]
-  }
-}
diff --git a/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json b/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json
index e6c0fa8..3fb9815 100644
--- a/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json
+++ b/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json
@@ -1,65 +1,65 @@
 {
-  "flux1_ref_1024":
-  {
+  "flux_ref_1024": {
     "Benchmark": "flux1",
-    "Creator": "",
-    "When": "",
-    "Platform": "",
+    "Creator": "NVIDIA",
+    "When": "Reference RCPs before v5.1",
+    "Platform": "8xDGX-B200",
     "BS": 1024,
     "Hyperparams": {
-      "opt_adamw_beta_1": 0,
-      "opt_adamw_beta_2": 0,
-      "opt_adamw_epsilon": 0,
-      "opt_adamw_weight_decay": 0,
-      "opt_base_learning_rate": 0,
-      "opt_learning_rate_warmup_steps": 0
+      "opt_adamw_beta_1": 0.9,
+      "opt_adamw_beta_2": 0.95,
+      "opt_adamw_epsilon": 1e-8,
+      "opt_adamw_weight_decay": 0.1,
+      "opt_base_learning_rate": 2.0e-4,
+      "opt_learning_rate_warmup_steps": 0,
+      "opt_gradient_clip_norm": 1.0
     },
-    "Epochs to converge": [
-      0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0
+    "samples to converge": [
+      8912896, 8650752, 9437184, 8126464, 8388608, 9175040, 8650752, 8126464,
+      8388608, 9961472, 7864320, 8126464, 9699328, 8650752, 9437184, 8912896,
+      8388608, 9175040, 8126464, 9175040
     ]
   },
-  "flux1_ref_2048":
-  {
+  "flux_ref_2048": {
     "Benchmark": "flux1",
-    "Creator": "",
-    "When": "",
-    "Platform": "",
+    "Creator": "NVIDIA",
+    "When": "Reference RCPs before v5.1",
+    "Platform": "8xDGX-B200",
     "BS": 2048,
     "Hyperparams": {
-      "opt_adamw_beta_1": 0,
-      "opt_adamw_beta_2": 0,
-      "opt_adamw_epsilon": 0,
-      "opt_adamw_weight_decay": 0,
-      "opt_base_learning_rate": 0,
-      "opt_learning_rate_warmup_steps": 0
+      "opt_adamw_beta_1": 0.9,
+      "opt_adamw_beta_2": 0.95,
+      "opt_adamw_epsilon": 1e-8,
+      "opt_adamw_weight_decay": 0.1,
+      "opt_base_learning_rate": 2.5e-4,
+      "opt_learning_rate_warmup_steps": 0,
+      "opt_gradient_clip_norm": 1.0
     },
-    "Epochs to converge": [
-      0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0
+    "samples to converge": [
+      11272192, 10223616, 11534336, 10747904, 9699328, 10485760, 11010048,
+      10223616, 11796480, 10485760, 10747904, 11272192, 9699328, 10485760,
+      11534336, 9961472, 10485760, 10485760, 11272192, 11272192
     ]
   },
-  "flux1_ref_4096":
-  {
+  "flux_ref_4096": {
     "Benchmark": "flux1",
-    "Creator": "",
-    "When": "",
-    "Platform": "",
+    "Creator": "NVIDIA",
+    "When": "Reference RCPs before v5.1",
+    "Platform": "8xDGX-B200",
     "BS": 4096,
     "Hyperparams": {
-      "opt_adamw_beta_1": 0,
-      "opt_adamw_beta_2": 0,
-      "opt_adamw_epsilon": 0,
-      "opt_adamw_weight_decay": 0,
-      "opt_base_learning_rate": 0,
-      "opt_learning_rate_warmup_steps": 0
+      "opt_adamw_beta_1": 0.9,
+      "opt_adamw_beta_2": 0.95,
+      "opt_adamw_epsilon": 1e-8,
+      "opt_adamw_weight_decay": 0.1,
+      "opt_base_learning_rate": 4.0e-4,
+      "opt_learning_rate_warmup_steps": 100,
+      "opt_gradient_clip_norm": 1.0
     },
-    "Epochs to converge": [
-      0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0
+    "samples to converge": [
+      15466496, 15728640, 15990784, 15466496, 15728640, 15466496, 14942208,
+      14680064, 15728640, 15990784, 15990784, 15728640, 15728640, 16252928,
+      14942208, 15728640, 16252928, 15204352, 16515072, 14942208
     ]
   }
-}
\ No newline at end of file
+}