Skip to content

Commit

Permalink
tests for different radial transforms and exposing those options in MACE
Browse files Browse the repository at this point in the history
  • Loading branch information
RylieWeaver committed Oct 4, 2024
1 parent 6034c0c commit 8b9d52b
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 3 deletions.
5 changes: 3 additions & 2 deletions hydragnn/models/MACEStack.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,14 @@ class MACEStack(Base):
def __init__(
self,
r_max: float, # The cutoff radius for the radial basis functions and edge_index
radial_type: str, # The type of radial basis function to use
distance_transform: str, # The distance transform to use
num_bessel: int, # The number of radial bessel functions. This dictates the richness of radial information in message-passing.
max_ell: int, # Max l-type for CG-tensor product. Theoretically, there is no max l-type, but in practice, we need to truncate the CG-tensor product to keep tractible computation
node_max_ell: int, # Max l-type for node features
avg_num_neighbors: float,
num_polynomial_cutoff, # The polynomial cutoff function ensures that the function goes to zero at the cutoff radius smoothly. Same as envelope_exponent for DimeNet
correlation, # Used in the product basis block and *roughly* determines the richness of interaction in the n-body interaction of layer 'n'.
radial_type, # The type of radial basis function to use
*args,
**kwargs,
):
Expand Down Expand Up @@ -148,7 +149,7 @@ def __init__(
num_bessel=num_bessel,
num_polynomial_cutoff=num_polynomial_cutoff,
radial_type=radial_type,
distance_transform=None,
distance_transform=distance_transform,
)
self.node_embedding = LinearNodeEmbeddingBlock(
irreps_in=self.node_attr_irreps,
Expand Down
5 changes: 4 additions & 1 deletion hydragnn/models/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def create_model_config(
config["Architecture"]["num_after_skip"],
config["Architecture"]["num_radial"],
config["Architecture"]["radial_type"],
config["Architecture"]["distance_transform"],
config["Architecture"]["basis_emb_size"],
config["Architecture"]["int_emb_size"],
config["Architecture"]["out_emb_size"],
Expand Down Expand Up @@ -97,6 +98,7 @@ def create_model(
num_after_skip: int = None,
num_radial: int = None,
radial_type: str = None,
distance_transform: str = None,
basis_emb_size: int = None,
int_emb_size: int = None,
out_emb_size: int = None,
Expand Down Expand Up @@ -349,13 +351,14 @@ def create_model(
assert node_max_ell >= 1, "MACE requires node_max_ell >= 1."
model = MACEStack(
radius,
radial_type,
distance_transform,
num_radial,
max_ell,
node_max_ell,
avg_num_neighbors,
envelope_exponent,
correlation,
radial_type,
input_dim,
hidden_dim,
output_dim,
Expand Down
4 changes: 4 additions & 0 deletions hydragnn/utils/input_config_parsing/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ def update_config(config, train_loader, val_loader, test_loader):

if "radius" not in config["NeuralNetwork"]["Architecture"]:
config["NeuralNetwork"]["Architecture"]["radius"] = None
if "radial_type" not in config["NeuralNetwork"]["Architecture"]:
config["NeuralNetwork"]["Architecture"]["radial_type"] = None
if "distance_transform" not in config["NeuralNetwork"]["Architecture"]:
config["NeuralNetwork"]["Architecture"]["distance_transform"] = None
if "num_gaussians" not in config["NeuralNetwork"]["Architecture"]:
config["NeuralNetwork"]["Architecture"]["num_gaussians"] = None
if "num_filters" not in config["NeuralNetwork"]["Architecture"]:
Expand Down
1 change: 1 addition & 0 deletions tests/inputs/ci.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"model_type": "PNA",
"radius": 2.0,
"max_neighbours": 100,
"radial_type": "bessel",
"num_gaussians": 50,
"envelope_exponent": 5,
"int_emb_size": 64,
Expand Down
208 changes: 208 additions & 0 deletions tests/test_radial_transforms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
##############################################################################
# Copyright (c) 2024, Oak Ridge National Laboratory #
# All rights reserved. #
# #
# This file is part of HydraGNN and is distributed under a BSD 3-clause #
# license. For the licensing terms see the LICENSE file in the top-level #
# directory. #
# #
# SPDX-License-Identifier: BSD-3-Clause #
##############################################################################

import sys, os, json
import pytest

import torch

torch.manual_seed(97)
import shutil

import hydragnn, tests
from hydragnn.utils.input_config_parsing.config_utils import merge_config


# Main unit test function called by pytest wrappers.
## Adapted from test_graphs.py ... Currently, only the single head model json is tested, although the multihead functionality remains.
def unittest_train_model(
model_type,
radial_type,
distance_transform,
ci_input,
use_lengths=True,
overwrite_data=False,
use_deepspeed=False,
overwrite_config=None,
):
world_size, rank = hydragnn.utils.distributed.get_comm_size_and_rank()

os.environ["SERIALIZED_DATA_PATH"] = os.getcwd()

# Read in config settings and override model type.
config_file = os.path.join(os.getcwd(), "tests/inputs", ci_input)
with open(config_file, "r") as f:
config = json.load(f)
config["NeuralNetwork"]["Architecture"]["model_type"] = model_type
config["NeuralNetwork"]["Architecture"]["radial_type"] = radial_type
config["NeuralNetwork"]["Architecture"]["distance_transform"] = distance_transform

# Overwrite config settings if provided
if overwrite_config:
config = merge_config(config, overwrite_config)

"""
to test this locally, set ci.json as
"Dataset": {
...
"path": {
"train": "serialized_dataset/unit_test_singlehead_train.pkl",
"test": "serialized_dataset/unit_test_singlehead_test.pkl",
"validate": "serialized_dataset/unit_test_singlehead_validate.pkl"}
...
"""
# use pkl files if exist by default
for dataset_name in config["Dataset"]["path"].keys():
if dataset_name == "total":
pkl_file = (
os.environ["SERIALIZED_DATA_PATH"]
+ "/serialized_dataset/"
+ config["Dataset"]["name"]
+ ".pkl"
)
else:
pkl_file = (
os.environ["SERIALIZED_DATA_PATH"]
+ "/serialized_dataset/"
+ config["Dataset"]["name"]
+ "_"
+ dataset_name
+ ".pkl"
)
if os.path.exists(pkl_file):
config["Dataset"]["path"][dataset_name] = pkl_file

# In the unit test runs, it is found MFC favors graph-level features over node-level features, compared with other models;
# hence here we decrease the loss weight coefficient for graph-level head in MFC.
if model_type == "MFC" and ci_input == "ci_multihead.json":
config["NeuralNetwork"]["Architecture"]["task_weights"][0] = 2

# Only run with edge lengths for models that support them.
if use_lengths:
config["NeuralNetwork"]["Architecture"]["edge_features"] = ["lengths"]

if rank == 0:
num_samples_tot = 500
# check if serialized pickle files or folders for raw files provided
pkl_input = False
if list(config["Dataset"]["path"].values())[0].endswith(".pkl"):
pkl_input = True
# only generate new datasets, if not pkl
if not pkl_input:
for dataset_name, data_path in config["Dataset"]["path"].items():
if overwrite_data:
shutil.rmtree(data_path)
if not os.path.exists(data_path):
os.makedirs(data_path)
if dataset_name == "total":
num_samples = num_samples_tot
elif dataset_name == "train":
num_samples = int(
num_samples_tot
* config["NeuralNetwork"]["Training"]["perc_train"]
)
elif dataset_name == "test":
num_samples = int(
num_samples_tot
* (1 - config["NeuralNetwork"]["Training"]["perc_train"])
* 0.5
)
elif dataset_name == "validate":
num_samples = int(
num_samples_tot
* (1 - config["NeuralNetwork"]["Training"]["perc_train"])
* 0.5
)
if not os.listdir(data_path):
tests.deterministic_graph_data(
data_path, number_configurations=num_samples
)

# Run Training
hydragnn.run_training(config, use_deepspeed)

(
error,
error_mse_task,
true_values,
predicted_values,
) = hydragnn.run_prediction(config, use_deepspeed)

# Set RMSE and sample MAE error thresholds
thresholds = {
"SAGE": [0.20, 0.20],
"PNA": [0.10, 0.10],
"PNAPlus": [0.10, 0.10],
"MFC": [0.20, 0.30],
"GIN": [0.25, 0.20],
"GAT": [0.60, 0.70],
"CGCNN": [0.175, 0.175],
"SchNet": [0.20, 0.20],
"DimeNet": [0.50, 0.50],
"EGNN": [0.20, 0.20],
"MACE": [0.60, 0.70],
}

verbosity = 2

for ihead in range(len(true_values)):
error_head_mse = error_mse_task[ihead]
error_str = (
str("{:.6f}".format(error_head_mse))
+ " < "
+ str(thresholds[model_type][0])
)
hydragnn.utils.print.print_distributed(verbosity, "head: " + error_str)
assert (
error_head_mse < thresholds[model_type][0]
), "Head RMSE checking failed for " + str(ihead)

head_true = true_values[ihead]
head_pred = predicted_values[ihead]
# Check individual samples
mae = torch.nn.L1Loss()
sample_mean_abs_error = mae(head_true, head_pred)
error_str = (
"{:.6f}".format(sample_mean_abs_error)
+ " < "
+ str(thresholds[model_type][1])
)
assert (
sample_mean_abs_error < thresholds[model_type][1]
), "MAE sample checking failed!"

# Check RMSE error
error_str = str("{:.6f}".format(error)) + " < " + str(thresholds[model_type][0])
hydragnn.utils.print.print_distributed(verbosity, "total: " + error_str)
assert error < thresholds[model_type][0], "Total RMSE checking failed!" + str(error)


@pytest.mark.parametrize(
"model_type",
["MACE"],
)
@pytest.mark.parametrize("basis_function", ["bessel", "gaussian", "chebyshev"])
@pytest.mark.parametrize("distance_transform", ["None", "Agnesi", "Soft"])
def pytest_train_model_transforms(
model_type,
basis_function,
distance_transform,
use_lengths=True,
overwrite_data=False,
):
unittest_train_model(
model_type,
basis_function,
distance_transform,
"ci.json",
use_lengths,
overwrite_data,
)

0 comments on commit 8b9d52b

Please sign in to comment.