Skip to content

Beamline neural network reconstruction training #171

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 37 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
08586ff
Added files
simonge Sep 16, 2024
df1b38e
Update
simonge Oct 30, 2024
810cc3b
Make canvas name more useful
simonge Mar 11, 2025
0973e5d
Add beam generator macro
simonge Mar 11, 2025
1323fe0
Update analysis
simonge Apr 30, 2025
6329372
Add phasespace scripts
simonge May 2, 2025
67d765c
Change name of functors
simonge May 2, 2025
3afabcb
Remove image and acceptance benchmarks for now
simonge Jun 4, 2025
99633c2
Prepare for CI testing
simonge Jun 4, 2025
0deb701
Remove other benchmarks
simonge Jun 4, 2025
163341b
Add missing config.yml
simonge Jun 4, 2025
7a58355
Correct typo
simonge Jun 4, 2025
5cd62be
Re-enable other benchmarks and update cores
simonge Jun 17, 2025
312ca42
Add some pass fail checks
simonge Jun 18, 2025
02e0c0e
Set sim and analysis dir by variable
simonge Jun 18, 2025
e39d0f9
Revert exclusion of other benchmarks
simonge Jun 19, 2025
67d4206
Review suggestions
simonge Jun 19, 2025
81fad99
Snakefile: fix for out-of-tree running
veprbl Jun 19, 2025
6b21ff1
Snakefile restore indentation
veprbl Jun 19, 2025
b18bf3f
Add header to inputs too
simonge Jun 23, 2025
c7e268f
Add low-q2 phase space electron tests
simonge May 20, 2025
c964d40
Add acceptance sim
simonge Jun 19, 2025
8b98ed6
Make simulation run with correct range and 1000x faster
simonge Jun 20, 2025
31fe8ef
Add outputs from script
simonge Jun 20, 2025
ccddf53
Change code inputs to workflow source path
simonge Jun 23, 2025
f44d690
rename phasespace to acceptance
simonge Jun 23, 2025
4df2119
Remove unused code
simonge Jun 23, 2025
96e8b50
Define both simulations in the yml
simonge Jun 23, 2025
40d4fe8
Merge remote-tracking branch 'origin/master' into beamline_acceptance
simonge Jun 23, 2025
6878af8
Add entry fraction plot
simonge Jun 24, 2025
09f8681
Make filtering more robust
simonge Jun 24, 2025
40e1fa9
Change entry limit warning
simonge Jun 24, 2025
158c8ef
Add reconstruction training based on beampipe exit
simonge Jun 25, 2025
d937fdb
Update model and atempt to setup snakemake
simonge Jun 25, 2025
e5a4ab8
Fix snakemane rule and silence npsim info
simonge Jun 25, 2025
13030cc
Fix snakemake attribute
simonge Jun 25, 2025
a208cd7
Update benchmarks/beamline/Snakefile
simonge Jul 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions benchmarks/beamline/ProcessData.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import uproot
import awkward as ak

def create_arrays(dataFiles,beamEnergy=18):

# List of branches to load
branches = ["features","targets"]

# Load data from concatenated list of files
data = uproot.concatenate([f"{file}:events" for file in dataFiles], branches, library="ak")

input_data = data["features"]
target_data = data["targets"]/beamEnergy

return input_data, target_data
141 changes: 141 additions & 0 deletions benchmarks/beamline/RegressionModel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class ProjectToX0Plane(nn.Module):
def forward(self, x):
# x shape: (batch, 6) -> [x, y, z, px, py, pz]
x0 = x[:, 0]
y0 = x[:, 1]
z0 = x[:, 2]
px = x[:, 3]
py = x[:, 4]
pz = x[:, 5]

# Avoid division by zero for px
eps = 1e-8
px_safe = torch.where(px.abs() < eps, eps * torch.sign(px) + eps, px)
t = -x0 / px_safe

y_proj = y0 + py * t
z_proj = z0 + pz * t

# Output: [y_proj, z_proj, px, pz]
return torch.stack([y_proj, z_proj, px, pz], dim=1)

class RegressionModel(nn.Module):
def __init__(self):
super(RegressionModel, self).__init__()
self.project_to_x0 = ProjectToX0Plane()
self.fc1 = nn.Linear(4, 512)
self.fc2 = nn.Linear(512, 64)
self.fc4 = nn.Linear(64, 3)

# Normalization parameters
self.input_mean = nn.Parameter(torch.zeros(4), requires_grad=False)
self.input_std = nn.Parameter(torch.ones(4), requires_grad=False)
self.output_mean = nn.Parameter(torch.zeros(3), requires_grad=False)
self.output_std = nn.Parameter(torch.ones(3), requires_grad=False)

def forward(self, x):
# Apply projection and normalization
x = self.project_to_x0(x)
x = (x - self.input_mean) / self.input_std

# Pass through the fully connected layers
x = self._core_forward(x)

# Denormalize outputs
x = x * self.output_std + self.output_mean
return x

def _core_forward(self, x):
# Core fully connected layers
x = torch.tanh(self.fc1(x))
x = torch.tanh(self.fc2(x))
x = self.fc4(x)
return x

def adapt(self, input_data, output_data):
# Compute normalization parameters from training data
self.input_mean.data = torch.tensor(input_data.mean(axis=0), dtype=torch.float32)
self.input_std.data = torch.tensor(input_data.std(axis=0), dtype=torch.float32)
self.output_mean.data = torch.tensor(output_data.mean(axis=0), dtype=torch.float32)
self.output_std.data = torch.tensor(output_data.std(axis=0), dtype=torch.float32)

def preprocess_data(model, data_loader):
inputs = data_loader.dataset.tensors[0]
targets = data_loader.dataset.tensors[1]

# Apply projection
projected_inputs = ProjectToX0Plane()(inputs)

# Compute normalization parameters
model.adapt(projected_inputs.detach().numpy(), targets.detach().numpy())

# Normalize inputs and targets
normalized_inputs = (projected_inputs - model.input_mean) / model.input_std
normalized_targets = (targets - model.output_mean) / model.output_std

# Replace the dataset with preprocessed data
data_loader.dataset.tensors = (normalized_inputs, normalized_targets)

def makeModel():
# Create the model
model = RegressionModel()
# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.0001)
# Define the loss function
criterion = nn.MSELoss()

return model, optimizer, criterion

def trainModel(epochs, train_loader, val_loader):

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model, optimizer, criterion = makeModel()
model.to(device)

# Verify that the model parameters are on the GPU
# for name, param in model.named_parameters():
# print(f"{name} is on {param.device}")

# Preprocess training and validation data
preprocess_data(model, train_loader)
preprocess_data(model, val_loader)

for epoch in range(epochs):
model.train()
running_loss = 0.0
for inputs, targets in train_loader:
# inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = model._core_forward(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)

epoch_loss = running_loss / len(train_loader.dataset)
# print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}")


# Validation step
model.eval()
val_loss = 0.0
with torch.no_grad():
for val_inputs, val_targets in val_loader:
# val_inputs, val_targets = val_inputs.to(device), val_targets.to(device)
val_outputs = model._core_forward(val_inputs)
val_loss += criterion(val_outputs, val_targets).item() * val_inputs.size(0)
# val_outputs = model(val_input)
# val_loss = criterion(val_outputs, val_target)

val_loss /= len(val_loader.dataset)

print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss}, Val Loss: {val_loss}")

return model
86 changes: 85 additions & 1 deletion benchmarks/beamline/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,27 @@ rule beamline_steering_sim:
--enableG4GPS \
--macroFile {input.macro} \
--compactFile $DETECTOR_PATH/epic_ip6_extended.xml \
--printLevel WARNING \
--outputFile {output} \
--physics.rangecut 1000000 \
"""

rule beamline_acceptance_sim:
input:
warmup="warmup/epic_ip6_extended.edm4hep.root",
macro=workflow.source_path("acceptanceGPS.mac"),
output:
SIMOUTDIR+"acceptanceTest{CAMPAIGN}.edm4hep.root",
shell:
"""
exec npsim \
--runType run \
--enableG4GPS \
--macroFile {input.macro} \
--compactFile $DETECTOR_PATH/epic_ip6_extended.xml \
--printLevel WARNING \
--outputFile {output} \
--physics.rangecut 1000000 \
"""

rule beamline_steering_analysis:
Expand All @@ -39,6 +59,65 @@ rule beamline_steering_analysis:
"{output.pipe_parameter_canvas}")'
"""

rule beamline_acceptance_analysis:
params:
xml=os.getenv("DETECTOR_PATH")+"/epic_ip6_extended.xml",
input:
script=workflow.source_path("acceptanceAnalysis.C"),
header=workflow.source_path("shared_functions.h"),
data=SIMOUTDIR+"acceptanceTest{CAMPAIGN}.edm4hep.root",
output:
rootfile=ANALYSISDIR+"acceptanceTestAnalysis{CAMPAIGN}.root",
beampipe_canvas=ANALYSISDIR+"acceptance_in_beampipe_{CAMPAIGN}.png",
etheta_canvas=ANALYSISDIR+"acceptance_energy_theta_{CAMPAIGN}.png",
etheta_acceptance_canvas=ANALYSISDIR+"acceptance_energy_theta_acceptance_{CAMPAIGN}.png",
entrys_canvas=ANALYSISDIR+"acceptance_entrys_{CAMPAIGN}.png",
shell:
"""
root -l -b -q '{input.script}("{input.data}", "{output.rootfile}", "{params.xml}", "{output.beampipe_canvas}","{output.etheta_canvas}","{output.etheta_acceptance_canvas}",
"{output.entrys_canvas}")'
"""

# Processes the simulation output data for training
rule beamline_steering_reconstruction_preperation:
input:
script=workflow.source_path("processData.C"),
data=SIMOUTDIR+"acceptanceTest{CAMPAIGN}.edm4hep.root",
output:
rootfile=ANALYSISDIR+"BeamlineSteeringReconstructionData{CAMPAIGN}.root",
shell:
"""
root -l -b -q '{input.script}("{input.data}", "{output.rootfile}")'
"""

# Trains a regression model to predict the MCParticle momentum from the B2eR beamline virtual tracker hits.
rule beamline_steering_reconstruction_training:
input:
script=workflow.source_path("SteeringRegression.py"),
scriptdata=workflow.source_path("ProcessData.py"),
scriptmodel=workflow.source_path("RegressionModel.py"),
data=ANALYSISDIR+"BeamlineSteeringReconstructionData{CAMPAIGN}.root",
output:
onnxfile=ANALYSISDIR+"BeamlineSteeringReconstruction{CAMPAIGN}.onnx",
shell:
"""
python {input.script} --dataFiles {input.data} --outModelFile {output.onnxfile}
"""

# Trains a regression model to predict the MCParticle momentum from the B2eR beamline virtual tracker hits.
rule beamline_steering_reconstruction_test:
input:
script=workflow.source_path("TestModel.py"),
data=ANALYSISDIR+"BeamlineSteeringReconstructionData{CAMPAIGN}.root",
model=ANALYSISDIR+"BeamlineSteeringReconstruction{CAMPAIGN}.onnx",
output:
directory("sim_output/beamline/analysis/NN_Test_{CAMPAIGN}/")
shell:
"""
mkdir {output}
python {input.script} --dataFiles {input.data} --modelFile {input.model} --outDir {output}
"""

rule beamline:
input:
ANALYSISDIR+"beamlineTestAnalysis{CAMPAIGN}.root",
Expand All @@ -47,7 +126,12 @@ rule beamline:
ANALYSISDIR+"y_py_{CAMPAIGN}.png",
ANALYSISDIR+"fitted_position_means_stdevs_{CAMPAIGN}.png",
ANALYSISDIR+"fitted_momentum_means_stdevs_{CAMPAIGN}.png",
ANALYSISDIR+"pipe_parameter_{CAMPAIGN}.png",
ANALYSISDIR+"pipe_parameter_{CAMPAIGN}.png",
ANALYSISDIR+"acceptanceTestAnalysis{CAMPAIGN}.root",
ANALYSISDIR+"acceptance_in_beampipe_{CAMPAIGN}.png",
ANALYSISDIR+"acceptance_energy_theta_{CAMPAIGN}.png",
ANALYSISDIR+"acceptance_energy_theta_acceptance_{CAMPAIGN}.png",
ANALYSISDIR+"acceptance_entrys_{CAMPAIGN}.png",
output:
directory("results/beamline/{CAMPAIGN}/")
shell:
Expand Down
55 changes: 55 additions & 0 deletions benchmarks/beamline/SteeringRegression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import torch
import argparse
from ProcessData import create_arrays
from torch.utils.data import DataLoader, TensorDataset
from RegressionModel import makeModel, trainModel

# Parse arguments
parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.')
parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files')
parser.add_argument('--outModelFile', type=str, default="regression_model.onnx", help='Output file for the trained model')
parser.add_argument('--batchSize', type=int, default=4096, help='Batch size for training')
parser.add_argument('--epochs', type=int, default=100, help='Number of epochs for training')
args = parser.parse_args()

input_data, target_data = create_arrays(args.dataFiles)

# print(f"Input data shape: {input_data.shape}")
# print(f"Target data shape: {target_data.shape}")

torch_input_data = torch.tensor(input_data)
torch_target_data = torch.tensor(target_data)

print(f"Input data shape: {torch_input_data.shape}")
print(f"Target data shape: {torch_target_data.shape}")

# Split data into training and validation sets
validation_fraction = 0.25
split_index = int(len(torch_input_data) * (1 - validation_fraction))

val_input_data = torch_input_data[split_index:]
val_target_data = torch_target_data[split_index:]
train_input_data = torch_input_data[:split_index]
train_target_data = torch_target_data[:split_index]

# Create TensorDatasets
train_dataset = TensorDataset(train_input_data, train_target_data)
val_dataset = TensorDataset(val_input_data, val_target_data)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=args.batchSize, shuffle=True )
val_loader = DataLoader(val_dataset, batch_size=args.batchSize, shuffle=False)

print(f"Training data: {len(train_input_data)} samples")

model = trainModel(args.epochs, train_loader, val_loader)

# Save the trained model to ONNX format

dummy_input = torch_input_data[0].unsqueeze(0) # Create a dummy input for the model

torch.onnx.export(model, dummy_input, args.outModelFile,
input_names=['input'], output_names=['output'],
dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}})

print(f"Model has been saved to {args.outModelFile}")
Loading