diff --git a/benchmarks/beamline/ProcessData.py b/benchmarks/beamline/ProcessData.py new file mode 100644 index 00000000..7eb6ad1a --- /dev/null +++ b/benchmarks/beamline/ProcessData.py @@ -0,0 +1,15 @@ +import uproot +import awkward as ak + +def create_arrays(dataFiles,beamEnergy=18): + + # List of branches to load + branches = ["features","targets"] + + # Load data from concatenated list of files + data = uproot.concatenate([f"{file}:events" for file in dataFiles], branches, library="ak") + + input_data = data["features"] + target_data = data["targets"]/beamEnergy + + return input_data, target_data \ No newline at end of file diff --git a/benchmarks/beamline/RegressionModel.py b/benchmarks/beamline/RegressionModel.py new file mode 100644 index 00000000..02ce775f --- /dev/null +++ b/benchmarks/beamline/RegressionModel.py @@ -0,0 +1,141 @@ +import torch +import torch.nn as nn +import torch.optim as optim +import numpy as np + +class ProjectToX0Plane(nn.Module): + def forward(self, x): + # x shape: (batch, 6) -> [x, y, z, px, py, pz] + x0 = x[:, 0] + y0 = x[:, 1] + z0 = x[:, 2] + px = x[:, 3] + py = x[:, 4] + pz = x[:, 5] + + # Avoid division by zero for px + eps = 1e-8 + px_safe = torch.where(px.abs() < eps, eps * torch.sign(px) + eps, px) + t = -x0 / px_safe + + y_proj = y0 + py * t + z_proj = z0 + pz * t + + # Output: [y_proj, z_proj, px, pz] + return torch.stack([y_proj, z_proj, px, pz], dim=1) + +class RegressionModel(nn.Module): + def __init__(self): + super(RegressionModel, self).__init__() + self.project_to_x0 = ProjectToX0Plane() + self.fc1 = nn.Linear(4, 512) + self.fc2 = nn.Linear(512, 64) + self.fc4 = nn.Linear(64, 3) + + # Normalization parameters + self.input_mean = nn.Parameter(torch.zeros(4), requires_grad=False) + self.input_std = nn.Parameter(torch.ones(4), requires_grad=False) + self.output_mean = nn.Parameter(torch.zeros(3), requires_grad=False) + self.output_std = nn.Parameter(torch.ones(3), requires_grad=False) + + def forward(self, x): + # Apply projection and normalization + x = self.project_to_x0(x) + x = (x - self.input_mean) / self.input_std + + # Pass through the fully connected layers + x = self._core_forward(x) + + # Denormalize outputs + x = x * self.output_std + self.output_mean + return x + + def _core_forward(self, x): + # Core fully connected layers + x = torch.tanh(self.fc1(x)) + x = torch.tanh(self.fc2(x)) + x = self.fc4(x) + return x + + def adapt(self, input_data, output_data): + # Compute normalization parameters from training data + self.input_mean.data = torch.tensor(input_data.mean(axis=0), dtype=torch.float32) + self.input_std.data = torch.tensor(input_data.std(axis=0), dtype=torch.float32) + self.output_mean.data = torch.tensor(output_data.mean(axis=0), dtype=torch.float32) + self.output_std.data = torch.tensor(output_data.std(axis=0), dtype=torch.float32) + +def preprocess_data(model, data_loader): + inputs = data_loader.dataset.tensors[0] + targets = data_loader.dataset.tensors[1] + + # Apply projection + projected_inputs = ProjectToX0Plane()(inputs) + + # Compute normalization parameters + model.adapt(projected_inputs.detach().numpy(), targets.detach().numpy()) + + # Normalize inputs and targets + normalized_inputs = (projected_inputs - model.input_mean) / model.input_std + normalized_targets = (targets - model.output_mean) / model.output_std + + # Replace the dataset with preprocessed data + data_loader.dataset.tensors = (normalized_inputs, normalized_targets) + +def makeModel(): + # Create the model + model = RegressionModel() + # Define the optimizer + optimizer = optim.Adam(model.parameters(), lr=0.0001) + # Define the loss function + criterion = nn.MSELoss() + + return model, optimizer, criterion + +def trainModel(epochs, train_loader, val_loader): + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + + model, optimizer, criterion = makeModel() + model.to(device) + + # Verify that the model parameters are on the GPU + # for name, param in model.named_parameters(): + # print(f"{name} is on {param.device}") + + # Preprocess training and validation data + preprocess_data(model, train_loader) + preprocess_data(model, val_loader) + + for epoch in range(epochs): + model.train() + running_loss = 0.0 + for inputs, targets in train_loader: + # inputs, targets = inputs.to(device), targets.to(device) + optimizer.zero_grad() + outputs = model._core_forward(inputs) + loss = criterion(outputs, targets) + loss.backward() + optimizer.step() + running_loss += loss.item() * inputs.size(0) + + epoch_loss = running_loss / len(train_loader.dataset) + # print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}") + + + # Validation step + model.eval() + val_loss = 0.0 + with torch.no_grad(): + for val_inputs, val_targets in val_loader: + # val_inputs, val_targets = val_inputs.to(device), val_targets.to(device) + val_outputs = model._core_forward(val_inputs) + val_loss += criterion(val_outputs, val_targets).item() * val_inputs.size(0) + # val_outputs = model(val_input) + # val_loss = criterion(val_outputs, val_target) + + val_loss /= len(val_loader.dataset) + + print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss}, Val Loss: {val_loss}") + + return model \ No newline at end of file diff --git a/benchmarks/beamline/Snakefile b/benchmarks/beamline/Snakefile index b9478bc6..75c524e7 100644 --- a/benchmarks/beamline/Snakefile +++ b/benchmarks/beamline/Snakefile @@ -13,7 +13,27 @@ rule beamline_steering_sim: --enableG4GPS \ --macroFile {input.macro} \ --compactFile $DETECTOR_PATH/epic_ip6_extended.xml \ + --printLevel WARNING \ --outputFile {output} \ + --physics.rangecut 1000000 \ + """ + +rule beamline_acceptance_sim: + input: + warmup="warmup/epic_ip6_extended.edm4hep.root", + macro=workflow.source_path("acceptanceGPS.mac"), + output: + SIMOUTDIR+"acceptanceTest{CAMPAIGN}.edm4hep.root", + shell: + """ + exec npsim \ + --runType run \ + --enableG4GPS \ + --macroFile {input.macro} \ + --compactFile $DETECTOR_PATH/epic_ip6_extended.xml \ + --printLevel WARNING \ + --outputFile {output} \ + --physics.rangecut 1000000 \ """ rule beamline_steering_analysis: @@ -39,6 +59,65 @@ rule beamline_steering_analysis: "{output.pipe_parameter_canvas}")' """ +rule beamline_acceptance_analysis: + params: + xml=os.getenv("DETECTOR_PATH")+"/epic_ip6_extended.xml", + input: + script=workflow.source_path("acceptanceAnalysis.C"), + header=workflow.source_path("shared_functions.h"), + data=SIMOUTDIR+"acceptanceTest{CAMPAIGN}.edm4hep.root", + output: + rootfile=ANALYSISDIR+"acceptanceTestAnalysis{CAMPAIGN}.root", + beampipe_canvas=ANALYSISDIR+"acceptance_in_beampipe_{CAMPAIGN}.png", + etheta_canvas=ANALYSISDIR+"acceptance_energy_theta_{CAMPAIGN}.png", + etheta_acceptance_canvas=ANALYSISDIR+"acceptance_energy_theta_acceptance_{CAMPAIGN}.png", + entrys_canvas=ANALYSISDIR+"acceptance_entrys_{CAMPAIGN}.png", + shell: + """ + root -l -b -q '{input.script}("{input.data}", "{output.rootfile}", "{params.xml}", "{output.beampipe_canvas}","{output.etheta_canvas}","{output.etheta_acceptance_canvas}", + "{output.entrys_canvas}")' + """ + +# Processes the simulation output data for training +rule beamline_steering_reconstruction_preperation: + input: + script=workflow.source_path("processData.C"), + data=SIMOUTDIR+"acceptanceTest{CAMPAIGN}.edm4hep.root", + output: + rootfile=ANALYSISDIR+"BeamlineSteeringReconstructionData{CAMPAIGN}.root", + shell: + """ + root -l -b -q '{input.script}("{input.data}", "{output.rootfile}")' + """ + +# Trains a regression model to predict the MCParticle momentum from the B2eR beamline virtual tracker hits. +rule beamline_steering_reconstruction_training: + input: + script=workflow.source_path("SteeringRegression.py"), + scriptdata=workflow.source_path("ProcessData.py"), + scriptmodel=workflow.source_path("RegressionModel.py"), + data=ANALYSISDIR+"BeamlineSteeringReconstructionData{CAMPAIGN}.root", + output: + onnxfile=ANALYSISDIR+"BeamlineSteeringReconstruction{CAMPAIGN}.onnx", + shell: + """ + python {input.script} --dataFiles {input.data} --outModelFile {output.onnxfile} + """ + +# Trains a regression model to predict the MCParticle momentum from the B2eR beamline virtual tracker hits. +rule beamline_steering_reconstruction_test: + input: + script=workflow.source_path("TestModel.py"), + data=ANALYSISDIR+"BeamlineSteeringReconstructionData{CAMPAIGN}.root", + model=ANALYSISDIR+"BeamlineSteeringReconstruction{CAMPAIGN}.onnx", + output: + directory("sim_output/beamline/analysis/NN_Test_{CAMPAIGN}/") + shell: + """ + mkdir {output} + python {input.script} --dataFiles {input.data} --modelFile {input.model} --outDir {output} + """ + rule beamline: input: ANALYSISDIR+"beamlineTestAnalysis{CAMPAIGN}.root", @@ -47,7 +126,12 @@ rule beamline: ANALYSISDIR+"y_py_{CAMPAIGN}.png", ANALYSISDIR+"fitted_position_means_stdevs_{CAMPAIGN}.png", ANALYSISDIR+"fitted_momentum_means_stdevs_{CAMPAIGN}.png", - ANALYSISDIR+"pipe_parameter_{CAMPAIGN}.png", + ANALYSISDIR+"pipe_parameter_{CAMPAIGN}.png", + ANALYSISDIR+"acceptanceTestAnalysis{CAMPAIGN}.root", + ANALYSISDIR+"acceptance_in_beampipe_{CAMPAIGN}.png", + ANALYSISDIR+"acceptance_energy_theta_{CAMPAIGN}.png", + ANALYSISDIR+"acceptance_energy_theta_acceptance_{CAMPAIGN}.png", + ANALYSISDIR+"acceptance_entrys_{CAMPAIGN}.png", output: directory("results/beamline/{CAMPAIGN}/") shell: diff --git a/benchmarks/beamline/SteeringRegression.py b/benchmarks/beamline/SteeringRegression.py new file mode 100644 index 00000000..50d30004 --- /dev/null +++ b/benchmarks/beamline/SteeringRegression.py @@ -0,0 +1,55 @@ +import torch +import argparse +from ProcessData import create_arrays +from torch.utils.data import DataLoader, TensorDataset +from RegressionModel import makeModel, trainModel + +# Parse arguments +parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.') +parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files') +parser.add_argument('--outModelFile', type=str, default="regression_model.onnx", help='Output file for the trained model') +parser.add_argument('--batchSize', type=int, default=4096, help='Batch size for training') +parser.add_argument('--epochs', type=int, default=100, help='Number of epochs for training') +args = parser.parse_args() + +input_data, target_data = create_arrays(args.dataFiles) + +# print(f"Input data shape: {input_data.shape}") +# print(f"Target data shape: {target_data.shape}") + +torch_input_data = torch.tensor(input_data) +torch_target_data = torch.tensor(target_data) + +print(f"Input data shape: {torch_input_data.shape}") +print(f"Target data shape: {torch_target_data.shape}") + +# Split data into training and validation sets +validation_fraction = 0.25 +split_index = int(len(torch_input_data) * (1 - validation_fraction)) + +val_input_data = torch_input_data[split_index:] +val_target_data = torch_target_data[split_index:] +train_input_data = torch_input_data[:split_index] +train_target_data = torch_target_data[:split_index] + +# Create TensorDatasets +train_dataset = TensorDataset(train_input_data, train_target_data) +val_dataset = TensorDataset(val_input_data, val_target_data) + +# Create DataLoaders +train_loader = DataLoader(train_dataset, batch_size=args.batchSize, shuffle=True ) +val_loader = DataLoader(val_dataset, batch_size=args.batchSize, shuffle=False) + +print(f"Training data: {len(train_input_data)} samples") + +model = trainModel(args.epochs, train_loader, val_loader) + +# Save the trained model to ONNX format + +dummy_input = torch_input_data[0].unsqueeze(0) # Create a dummy input for the model + +torch.onnx.export(model, dummy_input, args.outModelFile, + input_names=['input'], output_names=['output'], + dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}) + +print(f"Model has been saved to {args.outModelFile}") \ No newline at end of file diff --git a/benchmarks/beamline/TestModel.py b/benchmarks/beamline/TestModel.py new file mode 100644 index 00000000..dfd95931 --- /dev/null +++ b/benchmarks/beamline/TestModel.py @@ -0,0 +1,211 @@ +import onnxruntime as ort +import argparse +import numpy as np +from ProcessData import create_arrays +import matplotlib.pyplot as plt +from matplotlib.colors import LogNorm +from scipy.stats import norm +from scipy.optimize import curve_fit + +# Parse arguments +parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.') +parser.add_argument('--modelFile', type=str, default="regression_model.onnx", help='Path to the ONNX model file') +parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files') +parser.add_argument('--outDir', type=str, default=".", help='Output directory') +args = parser.parse_args() +modelFile = args.modelFile +dataFiles = args.dataFiles +outDir = args.outDir +outGraphFile = outDir + "/output_vs_target.png" +outGraphFile2 = outDir + "/output_vs_target2.png" +outGraphFile3 = outDir + "/output_vs_target3.png" +outGraphFile4 = outDir + "/output_vs_target4.png" + +input_data, target_data = create_arrays(dataFiles) + +target_data = np.array(target_data) + +# Load the ONNX model +session = ort.InferenceSession(modelFile) + +# Run the model on the input data +input_name = session.get_inputs()[0].name +output_name = session.get_outputs()[0].name +input_data = np.array(input_data,dtype=np.float32) +output = session.run([output_name], {input_name: input_data}) +output = np.array(output[0]) + +out_theta = np.arctan2(np.sqrt(output[:,0]**2 + output[:,1]**2),output[:,2]) +out_phi = np.arctan2(output[:,1],output[:,0]) +out_mag = np.sqrt(output[:,0]**2 + output[:,1]**2 + output[:,2]**2) +in_theta = np.arctan2(np.sqrt(target_data[:,0]**2 + target_data[:,1]**2),target_data[:,2]) +in_phi = np.arctan2(target_data[:,1],target_data[:,0]) +in_mag = np.sqrt(target_data[:,0]**2 + target_data[:,1]**2 + target_data[:,2]**2) + + +thetadiff = out_theta - in_theta +phidiff = out_phi - in_phi +# Move phidiff to within -pi/2 and pi/2 +phidiff = (phidiff + np.pi) % (2 * np.pi) - np.pi +magdiff = out_mag - in_mag + +diff = (target_data - output)/target_data +diffrange = [[-5,5],[-5,5],[-0.5,0.5]] +datarange = [[-0.02,0.02],[-0.02,0.02],[-1,0]] + +# Use the 'seismic' colormap +cmap = plt.get_cmap('seismic') + +# Creates histograms to compare the target and output data +fig, axs = plt.subplots(3, 3, figsize=(12, 12)) +for i in range(3): + # 2D histograms showing trends in the data + axs[0,i].hist2d(target_data[:,i], output[:,i], bins=100, range=[datarange[i],datarange[i]], cmap="seismic", norm=LogNorm(), label="Output vs Target") + axs[0,i].set_xlabel(f"Variable {i} Target") + axs[0,i].set_ylabel(f"Variable {i} Output") + + axs[1,i].hist(diff[:,i], bins=100, alpha=0.5, range=diffrange[i], label="Difference") + axs[1,i].set_xlabel(f"Variable {i} Difference") + axs[1,i].set_ylabel("Counts") + + axs[2,i].hist2d(target_data[:,i], diff[:,i], bins=100, range=[datarange[i],diffrange[i]], cmap="seismic", norm=LogNorm(), label="Difference vs Target") + axs[2,i].set_xlabel(f"Variable {i} Target") + axs[2,i].set_ylabel(f"Variable {i} Difference") + +plt.show() +plt.savefig(outGraphFile) + +# Creates histograms to compare theta, phi and mag target and output data +fig2, axs2 = plt.subplots(3, 3, figsize=(12, 12)) + +thetarange = [np.pi-0.01,np.pi] +phirange = [-np.pi,np.pi] +magrange = [0,1] + +thetadiffrange = [-0.01,0.01] +phidiffrange = [-np.pi,np.pi] +magdiffrange = [-0.1,0.1] + +# 2D histograms showing trends in the data +axs2[0,0].hist2d(out_theta, in_theta, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs2[0,0].set_xlabel("Theta Target") +axs2[0,0].set_ylabel("Theta Output") + +axs2[0,1].hist2d(out_phi, in_phi, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target") +axs2[0,1].set_xlabel("Phi Target") +axs2[0,1].set_ylabel("Phi Output") + +axs2[0,2].hist2d(out_mag, in_mag, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs2[0,2].set_xlabel("Mag Target") +axs2[0,2].set_ylabel("Mag Output") + +axs2[1,0].hist(thetadiff, bins=100, alpha=0.5, range=thetadiffrange, label="Difference") +axs2[1,0].set_xlabel("Theta Difference") +axs2[1,0].set_ylabel("Counts") + +axs2[1,1].hist(phidiff, bins=100, alpha=0.5, range=phidiffrange, label="Difference") +axs2[1,1].set_xlabel("Phi Difference") +axs2[1,1].set_ylabel("Counts") + +axs2[1,2].hist(magdiff, bins=100, alpha=0.5, range=magdiffrange, label="Difference") +axs2[1,2].set_xlabel("Mag Difference") +axs2[1,2].set_ylabel("Counts") + +axs2[2,0].hist2d(in_theta, thetadiff, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs2[2,0].set_xlabel("Theta Target") +axs2[2,0].set_ylabel("Theta Difference") + +axs2[2,1].hist2d(in_phi, phidiff, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target") +axs2[2,1].set_xlabel("Phi Target") +axs2[2,1].set_ylabel("Phi Difference") + +axs2[2,2].hist2d(in_mag, magdiff, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs2[2,2].set_xlabel("Mag Target") +axs2[2,2].set_ylabel("Mag Difference") + +plt.show() +plt.savefig(outGraphFile2) + +# Create histograms where the theta value has been cut at less than 3.14 +fig3, axs3 = plt.subplots(3, 3, figsize=(12, 12)) + +out_theta_cut = out_theta[out_theta < 3.14] +in_theta_cut = in_theta[out_theta < 3.14] +thetadiff_cut = thetadiff[out_theta < 3.14] + +out_phi_cut = out_phi[out_theta < 3.14] +in_phi_cut = in_phi[out_theta < 3.14] +phidiff_cut = phidiff[out_theta < 3.14] + +out_mag_cut = out_mag[out_theta < 3.14] +in_mag_cut = in_mag[out_theta < 3.14] +magdiff_cut = magdiff[out_theta < 3.14] + +axs3[0,0].hist2d(out_theta_cut, in_theta_cut, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs3[0,0].set_xlabel("Theta Target") +axs3[0,0].set_ylabel("Theta Output") + +axs3[0,1].hist2d(out_phi_cut, in_phi_cut, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target") +axs3[0,1].set_xlabel("Phi Target") +axs3[0,1].set_ylabel("Phi Output") + +axs3[0,2].hist2d(out_mag_cut, in_mag_cut, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs3[0,2].set_xlabel("Mag Target") +axs3[0,2].set_ylabel("Mag Output") + +axs3[1,0].hist(thetadiff_cut, bins=100, alpha=0.5, range=thetadiffrange, label="Difference") +axs3[1,0].set_xlabel("Theta Difference") +axs3[1,0].set_ylabel("Counts") + +axs3[1,1].hist(phidiff_cut, bins=100, alpha=0.5, range=phidiffrange, label="Difference") +axs3[1,1].set_xlabel("Phi Difference") +axs3[1,1].set_ylabel("Counts") + +axs3[1,2].hist(magdiff_cut, bins=100, alpha=0.5, range=magdiffrange, label="Difference") +axs3[1,2].set_xlabel("Mag Difference") +axs3[1,2].set_ylabel("Counts") + +axs3[2,0].hist2d(in_theta_cut, thetadiff_cut, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs3[2,0].set_xlabel("Theta Target") +axs3[2,0].set_ylabel("Theta Difference") + +axs3[2,1].hist2d(in_phi_cut, phidiff_cut, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target") +axs3[2,1].set_xlabel("Phi Target") +axs3[2,1].set_ylabel("Phi Difference") + +axs3[2,2].hist2d(in_mag_cut, magdiff_cut, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs3[2,2].set_xlabel("Mag Target") +axs3[2,2].set_ylabel("Mag Difference") + +plt.show() +plt.savefig(outGraphFile3) + +# Create plots where a Gaussian has been fitted to the data +# Function to fit a Gaussian and plot the results +def plot_gaussian_fit(ax, data, range, xlabel, ylabel): + def gaussian(x, mu, sigma, amplitude): + return amplitude * np.exp(-0.5 * ((x - mu) / sigma) ** 2) + + hist, bin_edges = np.histogram(data, bins=100, range=range, density=True) + bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 + + popt, _ = curve_fit(gaussian, bin_centers, hist, p0=[0, 0.01, 1]) + mu, sigma, amplitude = popt + print(f"mu={mu}, sigma={sigma}, amplitude={amplitude}") + + x = np.linspace(range[0], range[1], 100) + ax.plot(x, gaussian(x, *popt), 'k', linewidth=2) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.hist(data, bins=100, alpha=0.5, range=range, edgecolor='black', density=True) + ax.legend([f'Fit: $\mu$={mu:.5f}, $\sigma$={sigma:.5f}']) + +# Create histograms with Gaussian fits +fig4, axs4 = plt.subplots(3, 1, figsize=(8, 12)) + +plot_gaussian_fit(axs4[0], thetadiff, thetadiffrange, "Theta Difference", "Density") +plot_gaussian_fit(axs4[1], phidiff_cut, phidiffrange, "Phi Difference", "Density") +plot_gaussian_fit(axs4[2], magdiff, magdiffrange, "Mag Difference", "Density") + +plt.show() +plt.savefig(outGraphFile4) \ No newline at end of file diff --git a/benchmarks/beamline/acceptanceAnalysis.C b/benchmarks/beamline/acceptanceAnalysis.C new file mode 100644 index 00000000..5f530408 --- /dev/null +++ b/benchmarks/beamline/acceptanceAnalysis.C @@ -0,0 +1,255 @@ +// Script to plot the x and y positions and momentum of beamline particles as they pass through the magnets + +#include +#include "DD4hep/Detector.h" +#include "DDRec/CellIDPositionConverter.h" +#include "edm4hep/MCParticleCollection.h" +#include "edm4hep/SimTrackerHitCollection.h" +#include "edm4hep/SimCalorimeterHitCollection.h" +#include "edm4hep/Vector3f.h" +#include "edm4hep/Vector3d.h" +#include "ROOT/RDataFrame.hxx" +#include "ROOT/RDF/RInterface.hxx" +#include "ROOT/RVec.hxx" +#include "shared_functions.h" +#include "TCanvas.h" +#include "TStyle.h" + + +using RVecS = ROOT::VecOps::RVec; +using RNode = ROOT::RDF::RNode; + +int acceptanceAnalysis( TString inFile = "/scratch/EIC/G4out/beamline/acceptanceTest.edm4hep.root", + TString outFile = "output.root", + std::string compactName = "/home/simong/EIC/epic/install/share/epic/epic_ip6_extended.xml", + TString beampipeCanvasName = "acceptance_in_beampipe.png", + TString EThetaCanvasName = "acceptance_energy_theta.png", + TString EThetaAccCanvasName= "acceptance_energy_theta_acceptance.png", + TString entryFractionCanvasName = "acceptance_entries.png") { + + //Set ROOT style + gStyle->SetPadLeftMargin(0.1); // Set left margin + gStyle->SetPadRightMargin(0.0); // Set right margin + gStyle->SetPadTopMargin(0.0); // Set top margin + gStyle->SetPadBottomMargin(0.1);// Set bottom margin + gStyle->SetTitleAlign(13); + gStyle->SetTitleX(0.12); // Place the title on the top right + gStyle->SetTitleY(0.985); // Place the title on the top right + gStyle->SetTitleSize(0.08, "t"); + gStyle->SetTitleXOffset(1.0); // Adjust y-axis title offset + gStyle->SetTitleYOffset(1.0); // Adjust y-axis title offset + gStyle->SetOptStat(0); + + int pass = 0; + + //Set implicit multi-threading + // ROOT::EnableImplicitMT(); + + //Load the detector config + dd4hep::Detector& detector = dd4hep::Detector::getInstance(); + detector.fromCompact(compactName); + + ROOT::RDataFrame d0("events",inFile, {"BackwardsBeamlineHits"}); + RNode d1 = d0; + RVecS colNames = d1.GetColumnNames(); + + //Get total number of entries + int nEntries = d1.Count().GetValue(); + //Set number of entries to process + // d1 = d1.Range(0,1000); + + //Get the collection + std::string mcParticlesName = "MCParticles"; + std::string readoutName = "BackwardsBeamlineHits"; + + std::cout << "Running lazy RDataframe execution" << std::endl; + + if(Any(colNames==mcParticlesName)){ + + // Get theta and energy of the particles + d1 = d1.Define("theta",[](const vector& mcParticles) { + // Calculate theta from momentum components as angle from negative z axis + double px = mcParticles[0].momentum.x; + double py = mcParticles[0].momentum.y; + double pz = mcParticles[0].momentum.z; + double p = std::sqrt(px*px + py*py + pz*pz); + double theta = M_PI-std::acos(pz / p); // Angle from the z-axis + + return theta; + }, {mcParticlesName}) + .Define("energy",[](const vector& mcParticles) { + + //Calculate energy from mass and momentum + double mass = mcParticles[0].mass; + double px = mcParticles[0].momentum.x; + double py = mcParticles[0].momentum.y; + double pz = mcParticles[0].momentum.z; + double energy = std::sqrt(px*px + py*py + pz*pz + mass*mass); + + return energy; + }, {mcParticlesName}); + + } + else{ + std::cout << "Collection " << mcParticlesName << " not found in file" << std::endl; + return 1; + } + + int eBins = 100; + int thetaBins = 100; + + auto totalETheta = d1.Histo2D({"Energy vs Theta","Energy vs Theta",eBins,4,18,thetaBins,0,0.011},"energy","theta"); + + if(Any(colNames==readoutName)){ + + d1 = d1.Define("pipeID",getSubID("pipe",detector),{readoutName}) + .Define("endID",getSubID("end",detector),{readoutName}) + .Define("pipeParameters",getVolumeParametersFromCellID(detector),{readoutName}) + .Define("pipeRadius",[](const ROOT::VecOps::RVec& params) { + ROOT::VecOps::RVec radii; + for (const auto& param : params) { + radii.push_back(param.radius); + } + return radii; + }, {"pipeParameters"}); + + + //global x,y,z position and momentum + d1 = d1 .Define("NHits","BackwardsBeamlineHits.size()"); + + d1 = d1.Define("hitPosMom",globalToLocal(detector),{readoutName}) + .Define("xpos","hitPosMom[0]") + .Define("ypos","hitPosMom[1]") + .Define("zpos","hitPosMom[2]"); + + } + else{ + std::cout << "Collection " << readoutName << " not found in file" << std::endl; + return 1; + } + + // Calculate the maximum pipe radius for plotting + auto maxPipeRadius = 1.2*d1.Max("pipeRadius").GetValue(); + + std::cout << "Executing Analysis and creating histograms" << std::endl; + + //Create array of histogram results + std::map> hHistsxy; + std::map> hHistsETheta; + + + std::map> pipeRadii; + std::map filterEntries; + std::map filterAcceptanceIntegral; + + //Create histograms + for(int i=0; i<=7; i++){ + + std::string name = "pipeID"; + std::string str_i = std::to_string(i); + name += str_i; + auto filterDF = d1.Define("xposf","xpos[pipeID=="+str_i+"]") + .Define("yposf","ypos[pipeID=="+str_i+"]") + .Define("pipeRadiusf","pipeRadius[pipeID=="+str_i+"]"); + + + TString beamspotName = "Beamspot ID"+str_i+";x offset [cm]; y offset [cm]"; + TString xyname = name+";x Offset [cm]; y Offset [cm]"; + TString xname = name+";x Offset [cm]; x trajectory component"; + TString yname = name+";y Offset [cm]; y trajectory component"; + hHistsxy[name] = filterDF.Histo2D({beamspotName,beamspotName,400,-maxPipeRadius,maxPipeRadius,400,-maxPipeRadius,maxPipeRadius},"xposf","yposf"); + + auto extraFilterDF = filterDF.Filter("All(pipeID<"+std::to_string(i+1)+")&&Any(pipeID=="+std::to_string(i)+")" ); + TString EThetaName = "Energy vs Theta ID"+str_i+";Energy [GeV]; Theta [rad]"; + TString nameETheta = name+";Energy [GeV]; Theta [rad]"; + hHistsETheta[name] = extraFilterDF.Histo2D({EThetaName,EThetaName,eBins,4,18,thetaBins,0,0.011},"energy","theta"); + + //Parameters of the pipe + pipeRadii[name] = filterDF.Max("pipeRadiusf"); + // std::cout << "Pipe ID: " << name << " Radius: " << pipeRadii[name] << " " << filterDF.Min("pipeRadiusf").GetValue() << std::endl; + + } + + // Create histograms of the beamspot + TCanvas *cXY = new TCanvas("beamspot_canvas","beamspot_canvas",3000,1600); + cXY->Divide(4,2); + int i=1; + for(auto [name,h] : hHistsxy){ + // Get the pipe radius for this histogram + auto pipeRadius = pipeRadii[name].GetValue(); + cXY->cd(i++); + + h->Draw("col"); + //Draw cicle + TEllipse *circle = new TEllipse(0,0,pipeRadius); + circle->SetLineColor(kRed); + circle->SetLineWidth(2); + circle->SetFillStyle(0); + circle->Draw("same"); + + } + + // Cnavas for energy vs theta + TCanvas *cETheta = new TCanvas("energy_theta_canvas","energy_theta_canvas",3000,1600); + cETheta->Divide(4,2); + i=1; + for(auto [name,h] : hHistsETheta){ + cETheta->cd(i++); + h->Draw("colz"); + filterEntries[name] = h->GetEntries()/ nEntries; + } + + // Canvas for energy vs theta acceptance + TCanvas *cEThetaAcc = new TCanvas("energy_theta_acceptance_canvas","energy_theta_acceptance_canvas",3000,1600); + cEThetaAcc->Divide(4,2); + i=1; + for(auto [name,h] : hHistsETheta){ + cEThetaAcc->cd(i++); + h->Divide(totalETheta.GetPtr()); + h->Draw("colz"); + filterAcceptanceIntegral[name] = h->Integral()/ (eBins*thetaBins); // Normalize by the number of bins + } + + TH1F* hPipeEntries = CreateFittedHistogram("hNumberOfHits", + "Number of Hits per Pipe ID", + filterEntries, + {}, + "Pipe ID"); + TH1F* hPipeAcceptance = CreateFittedHistogram("hPipeAcceptance", + "Acceptance per Pipe ID", + filterAcceptanceIntegral, + {}, + "Pipe ID"); + + TCanvas *cPipeAcceptance = new TCanvas("cPipeAcceptance", "Pipe Acceptance", 1200, 400); + cPipeAcceptance->Divide(2, 1); + cPipeAcceptance->cd(1); + hPipeEntries->Draw(""); + cPipeAcceptance->cd(2); + hPipeAcceptance->Draw(""); + cPipeAcceptance->SetGrid(); + cPipeAcceptance->Update(); + + // Save 2D canvases as pngs + cXY->SaveAs(beampipeCanvasName); + cETheta->SaveAs(EThetaCanvasName); + cEThetaAcc->SaveAs(EThetaAccCanvasName); + cPipeAcceptance->SaveAs(entryFractionCanvasName); + + TFile *f = new TFile(outFile,"RECREATE"); + cXY->Write(); + cETheta->Write(); + cEThetaAcc->Write(); + cPipeAcceptance->Write(); + + f->Close(); + + std::cout << "Analysis complete. Results saved to " << outFile << std::endl; + return pass; + + // std::cout << "Saving events to file" << std::endl; + + // ROOT::RDF::RSnapshotOptions opts; + // opts.fMode = "UPDATE"; + // d1.Snapshot("events",outFile,{"pipeID","endID","pipeRadius","xpos","ypos","zpos","xmom","ymom","zmom","xpos_global","ypos_global","zpos_global","px_global","py_global","pz_global"},opts); +} diff --git a/benchmarks/beamline/acceptanceGPS.mac b/benchmarks/beamline/acceptanceGPS.mac new file mode 100644 index 00000000..5f87735e --- /dev/null +++ b/benchmarks/beamline/acceptanceGPS.mac @@ -0,0 +1,14 @@ + +/gps/ang/type beam2d +/gps/ang/sigma_x 0.011 rad # 11 mrad +/gps/ang/sigma_y 0.011 rad # 11 mrad +/gps/pos/type Beam +/gps/pos/sigma_x 0.119 mm +/gps/pos/sigma_y 0.0107 mm +/gps/particle e- +/gps/ene/type Lin +/gps/ene/gradient 0.00001 +/gps/ene/min 6 GeV +/gps/ene/max 18 GeV + +/run/beamOn 4000000 \ No newline at end of file diff --git a/benchmarks/beamline/beamlineAnalysis.C b/benchmarks/beamline/beamlineAnalysis.C index 851c4fd0..42525598 100644 --- a/benchmarks/beamline/beamlineAnalysis.C +++ b/benchmarks/beamline/beamlineAnalysis.C @@ -59,7 +59,7 @@ int beamlineAnalysis( TString inFile = "/scratch/EIC/G4out/beamline/b //Set number of entries to process // d1 = d1.Range(0,1000); int nEntries = d1.Count().GetValue(); - float acceptableLoss = 0.99; // Set the acceptable loss percentage to 1% + float acceptableLoss = 0.999; // Set the acceptable loss percentage to 0.1% float acceptableEntries = nEntries * acceptableLoss; //Get the collection @@ -171,28 +171,39 @@ int beamlineAnalysis( TString inFile = "/scratch/EIC/G4out/beamline/b std::map pipeZPos; std::map pipeRotation; - auto xmin = d1.Min("xpos").GetValue(); - auto xmax = d1.Max("xpos").GetValue(); - auto ymin = d1.Min("ypos").GetValue(); - auto ymax = d1.Max("ypos").GetValue(); - auto pxmin = d1.Min("xmom").GetValue(); - auto pxmax = d1.Max("xmom").GetValue(); - auto pymin = d1.Min("ymom").GetValue(); - auto pymax = d1.Max("ymom").GetValue(); + // Queue up all actions + auto xmin_ptr = d1.Min("xpos"); + auto xmax_ptr = d1.Max("xpos"); + auto ymin_ptr = d1.Min("ypos"); + auto ymax_ptr = d1.Max("ypos"); + auto pxmin_ptr = d1.Min("xmom"); + auto pxmax_ptr = d1.Max("xmom"); + auto pymin_ptr = d1.Min("ymom"); + auto pymax_ptr = d1.Max("ymom"); + + // Now trigger the event loop (only once) + auto xmin = xmin_ptr.GetValue(); + auto xmax = xmax_ptr.GetValue(); + auto ymin = ymin_ptr.GetValue(); + auto ymax = ymax_ptr.GetValue(); + auto pxmin = pxmin_ptr.GetValue(); + auto pxmax = pxmax_ptr.GetValue(); + auto pymin = pymin_ptr.GetValue(); + auto pymax = pymax_ptr.GetValue(); //Create histograms for(int i=0; i<=7; i++){ std::string name = "pipeID"; name += std::to_string(i); - auto filterDF = d1.Define("xposf","xpos["+std::to_string(i)+"]") - .Define("yposf","ypos["+std::to_string(i)+"]") - .Define("xmomf","xmom["+std::to_string(i)+"]") - .Define("ymomf","ymom["+std::to_string(i)+"]") - .Define("pipeRadiusf","pipeRadius["+std::to_string(i)+"]") - .Define("xdetf","xdet["+std::to_string(i)+"]") - .Define("zdetf","zdet["+std::to_string(i)+"]") - .Define("rotationf","rotation["+std::to_string(i)+"]"); + auto filterDF = d1.Define("xposf","xpos[pipeID=="+std::to_string(i)+"]") + .Define("yposf","ypos[pipeID=="+std::to_string(i)+"]") + .Define("xmomf","xmom[pipeID=="+std::to_string(i)+"]") + .Define("ymomf","ymom[pipeID=="+std::to_string(i)+"]") + .Define("pipeRadiusf","pipeRadius[pipeID=="+std::to_string(i)+"]") + .Define("xdetf","xdet[pipeID=="+std::to_string(i)+"]") + .Define("zdetf","zdet[pipeID=="+std::to_string(i)+"]") + .Define("rotationf","rotation[pipeID=="+std::to_string(i)+"]"); //Calculate Min and Max values @@ -292,6 +303,8 @@ int beamlineAnalysis( TString inFile = "/scratch/EIC/G4out/beamline/b } + + // Create histograms of the beamspot TCanvas *cXY = new TCanvas("beamspot_canvas","beamspot_canvas",3000,1600); cXY->Divide(4,2); @@ -301,7 +314,7 @@ int beamlineAnalysis( TString inFile = "/scratch/EIC/G4out/beamline/b // Check histogram entries if(h->GetEntries() < acceptableEntries){ std::cout << "Warning: Only " << h->GetEntries()/nEntries << " of particles contributing to histogram " << name - << " , which is below the accepted threshold of " << acceptableEntries << std::endl; + << " , which is below the accepted threshold of " << acceptableEntries/nEntries << std::endl; pass = 1; } diff --git a/benchmarks/beamline/beamlineGPS.mac b/benchmarks/beamline/beamlineGPS.mac index d3cdc21b..54267451 100644 --- a/benchmarks/beamline/beamlineGPS.mac +++ b/benchmarks/beamline/beamlineGPS.mac @@ -8,4 +8,4 @@ /gps/energy 18 GeV /gps/particle e- -/run/beamOn 10000 \ No newline at end of file +/run/beamOn 100000 \ No newline at end of file diff --git a/benchmarks/beamline/config.yml b/benchmarks/beamline/config.yml index ef6b84e4..1fc920dd 100644 --- a/benchmarks/beamline/config.yml +++ b/benchmarks/beamline/config.yml @@ -1,4 +1,4 @@ -sim:beamline: +sim:beamline:beamspot: extends: .det_benchmark stage: simulate script: @@ -6,11 +6,19 @@ sim:beamline: snakemake $SNAKEMAKE_FLAGS --cores 2 \ sim_output/beamline/beamlineTestlocal.edm4hep.root +sim:beamline:acceptance: + extends: .det_benchmark + stage: simulate + script: + - | + snakemake $SNAKEMAKE_FLAGS --cores 2 \ + sim_output/beamline/acceptanceTestlocal.edm4hep.root + bench:beamline: extends: .det_benchmark stage: benchmarks needs: - - ["sim:beamline"] + - ["sim:beamline:beamspot", "sim:beamline:acceptance"] script: - snakemake $SNAKEMAKE_FLAGS --cores 2 beamline_local diff --git a/benchmarks/beamline/processData.C b/benchmarks/beamline/processData.C new file mode 100644 index 00000000..e2262b31 --- /dev/null +++ b/benchmarks/beamline/processData.C @@ -0,0 +1,37 @@ +#include "ROOT/RDataFrame.hxx" +#include "TFile.h" +#include "TTree.h" +#include "edm4hep/MCParticle.h" +#include "edm4hep/SimTrackerHit.h" +#include + +void processData(const TString inputFile="/home/simong/EIC/detector_benchmarks_anl/sim_output/beamline/acceptanceTestcurrent.edm4hep.root", const TString outputFile="test.root", const int desired_cellID = 66757) { + // Define the branches to read + std::vector branches = { + "BackwardsBeamlineHits.cellID", + "BackwardsBeamlineHits.position.x", + "BackwardsBeamlineHits.position.y", + "BackwardsBeamlineHits.position.z", + "BackwardsBeamlineHits.momentum.x", + "BackwardsBeamlineHits.momentum.y", + "BackwardsBeamlineHits.momentum.z", + "MCParticles.momentum.x", + "MCParticles.momentum.y", + "MCParticles.momentum.z" + }; + + // Create a ROOT DataFrame to read the input files + ROOT::RDataFrame df("events", inputFile); + + //Filter on events with only a single MCParticle + auto filterDF = df.Filter("MCParticles.size()==1") + .Define("desiredHits", "BackwardsBeamlineHits[BackwardsBeamlineHits.cellID=="+std::to_string(desired_cellID)+"]") + .Filter("desiredHits.size()==1") + .Define("features", "std::array{static_cast(desiredHits[0].position.x), static_cast(desiredHits[0].position.y), static_cast(desiredHits[0].position.z), static_cast(desiredHits[0].momentum.x), static_cast(desiredHits[0].momentum.y), static_cast(desiredHits[0].momentum.z)}") + .Define("targets", "std::array{static_cast(MCParticles[0].momentum.x), static_cast(MCParticles[0].momentum.y), static_cast(MCParticles[0].momentum.z)}"); + + // Save the filtered data to a new ROOT file + filterDF.Snapshot("events", outputFile, {"features","targets"}); + + std::cout << "Filtered data saved to " << outputFile << std::endl; +} \ No newline at end of file