RolnickLab · Ramlaoui · Dec 5, 2023 · Dec 14, 2023 · Dec 14, 2023 · Feb 7, 2024
diff --git a/configs/models/dpp.yaml b/configs/models/dpp.yaml
@@ -22,6 +22,21 @@ default:
     phys_embeds: False # True
     phys_hidden_channels: 0
     energy_head: False # can be {False, weighted-av-initial-embeds, weighted-av-final-embeds}
+    ######################################### Ewald message passing hyperparameters
+    use_atom_to_atom_mp: False
+    use_ewald: False
+    detach_ewald: True
+    ewald_hyperparams:
+      num_k_x: 2                              # Frequency cutoff [Å^-1]
+      num_k_y: 2                              # Voxel grid resolution [Å^-1]
+      num_k_z: 5                              # Gaussian radial basis size (Fourier filter)
+      downprojection_size: 8                  # Size of linear bottleneck layer
+      num_hidden: 3                           # Number of residuals in update function
+      # qm9
+      k_cutoff: 0.8                           # Frequency cutoff [Å^-1]
+      delta_k: 0.2                            # Voxel grid resolution [Å^-1]
+      num_k_rbf: 128                          # Gaussian radial basis size (Fourier filter)
+    #########################################
   optim:
     batch_size: 4
     eval_batch_size: 4
@@ -82,7 +97,7 @@ is2re:
 s2ef:
   default:
     model:
-      regress_forces: "from_energy"
+      regress_forces: "direct"
       force_decoder_type: "mlp" # can be {"" or "simple"} | only used if regress_forces is True
       force_decoder_model_config:
         simple:
@@ -127,19 +142,19 @@ s2ef:
     #   If the global batch size (num_gpus * batch_size) is modified
     #   the lr_milestones and warmup_steps need to be adjusted accordingly.
     optim:
-      batch_size: 96
-      eval_batch_size: 96
+      batch_size: 48
+      eval_batch_size: 48
       eval_every: 10000
       num_workers: 8
       lr_initial: 0.0001
       lr_gamma: 0.1
       lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
-        - 20833
-        - 31250
-        - 41666
+        - 10433
+        - 15500
+        - 21633
       warmup_steps: 10416
       warmup_factor: 0.2
-      max_epochs: 3
+      max_epochs: 15
       force_coefficient: 50
       energy_coefficient: 1
       energy_grad_coefficient: 5

diff --git a/configs/models/equiformer_v2.yaml b/configs/models/equiformer_v2.yaml
@@ -0,0 +1,86 @@
+# includes:
+#   - configs/s2ef/2M/base.yml
+
+# trainer: equiformerv2_forces
+default:
+  model:
+    name: equiformer_v2
+
+    use_pbc:                  True
+    regress_forces:           True
+    otf_graph:                True
+    max_neighbors:            20
+    max_radius:               12.0
+    max_num_elements:         90
+
+    num_layers:               12
+    sphere_channels:          128
+    attn_hidden_channels:     64              # [64, 96] This determines the hidden size of message passing. Do not necessarily use 96.
+    num_heads:                8
+    attn_alpha_channels:      64              # Not used when `use_s2_act_attn` is True.
+    attn_value_channels:      16
+    ffn_hidden_channels:      128
+    norm_type:                'layer_norm_sh'    # ['rms_norm_sh', 'layer_norm', 'layer_norm_sh']
+
+    lmax_list:                [6]
+    mmax_list:                [2]
+    grid_resolution:          18              # [18, 16, 14, None] For `None`, simply comment this line.
+
+    num_sphere_samples:       128
+
+    edge_channels:              128
+    use_atom_edge_embedding:    True
+    share_atom_edge_embedding:  False         # If `True`, `use_atom_edge_embedding` must be `True` and the atom edge embedding will be shared across all blocks.
+    distance_function:          'gaussian'
+    num_distance_basis:         512           # not used
+
+    attn_activation:          'silu'
+    use_s2_act_attn:          False       # [False, True] Switch between attention after S2 activation or the original EquiformerV1 attention.
+    use_attn_renorm:          True        # Attention re-normalization. Used for ablation study.
+    ffn_activation:           'silu'      # ['silu', 'swiglu']
+    use_gate_act:             False       # [True, False] Switch between gate activation and S2 activation
+    use_grid_mlp:             True        # [False, True] If `True`, use projecting to grids and performing MLPs for FFNs.
+    use_sep_s2_act:           True        # Separable S2 activation. Used for ablation study.
+
+    alpha_drop:               0.1         # [0.0, 0.1]
+    drop_path_rate:           0.05        # [0.0, 0.05]
+    proj_drop:                0.0
+
+    weight_init:              'uniform'    # ['uniform', 'normal']
+
+  optim:
+    batch_size:                   1         # 6
+    eval_batch_size:              1         # 6
+    load_balancing: atoms
+    num_workers: 8
+    lr_initial:                   0.0004    # [0.0002, 0.0004], eSCN uses 0.0008 for batch size 96
+
+    optimizer: AdamW
+    weight_decay: 0.001
+    scheduler: LambdaLR
+    lambda_type: cosine
+    warmup_factor: 0.2
+    warmup_epochs: 0.1
+    lr_min_factor: 0.01         #
+
+    max_epochs: 30
+    force_coefficient: 100
+    energy_coefficient: 2
+    clip_grad_norm: 100
+    ema_decay: 0.999
+    loss_energy: mae
+    loss_force: l2mae
+
+    eval_every: 5000
+
+s2ef:
+  default: {}
+
+  2M: {}
+
+is2re:
+  default:
+    model:
+      regress_forces: False
+
+  all: {}
diff --git a/configs/models/faenet.yaml b/configs/models/faenet.yaml
@@ -1,6 +1,13 @@
 default:
+  cano_args:
+    equivariance_module: "" # "", fa, untrained_cano, trained_cano, sign_equiv_sfa, trained_sign_inv_sfa, untrained_sign_inv_sfa, trained_sign_inv_sfa_E3, untrained_sign_inv_sfa_E3
+    cano_type: "3D" # "2D", "3D", "DA", ""
+    cano_method: "" # "", pointnet, simple (= 0 hidden layer pointnet), dgcnn
+    # Frame averaging specific arguments
+    fa_method: "random" # {"", all, random, det, se3-all, se3-random, se3-det}
+  # Legacy FA arguments:
   frame_averaging: "" # {"2D", "3D", "DA", ""}
-  fa_method: "" # {"", all, randon, det, se3-all, se3-randon, se3-det}
+  fa_method: "" # {"", all, random, det, se3-all, se3-random, se3-det}
   model:
     name: faenet
     act: swish
@@ -41,6 +48,19 @@ default:
       res_updown:
         hidden_channels: 128
         norm: batch1d # batch1d, layer or null
+    ######################################### Ewald message passing hyperparameters
+    use_ewald: False
+    ewald_hyperparams:
+      num_k_x: 1                              # Frequency cutoff [Å^-1]
+      num_k_y: 1                              # Voxel grid resolution [Å^-1]
+      num_k_z: 3                              # Gaussian radial basis size (Fourier filter)
+      downprojection_size: 8                  # Size of linear bottleneck layer
+      num_hidden: 3                           # Number of residuals in update function
+      # params for qm9
+      k_cutoff: 0.4                           # Frequency cutoff [Å^-1]
+      delta_k: 0.2                            # Voxel grid resolution [Å^-1]
+      num_k_rbf: 48                           # Gaussian radial basis size (Fourier filter)
+    #########################################
   optim:
     batch_size: 256
     eval_batch_size: 256
@@ -69,7 +89,7 @@ is2re:
   default:
     graph_rewiring: remove-tag-0
     frame_averaging: "2D" # {"2D", "3D", "DA", ""}
-    fa_method: "se3-random" # {"", all, randon, det, se3-all, se3-randon, se3-det}
+    fa_method: "se3-random" # {"", all, random, det, se3-all, se3-random, se3-det}
   # *** Important note ***
   #   The total number of gpus used for this run was 1.
   #   If the global batch size (num_gpus * batch_size) is modified

diff --git a/configs/models/painn.yaml b/configs/models/painn.yaml
@@ -19,7 +19,41 @@ is2re:
 s2ef:
   default: {}
   200k: {}
-  2M: {}
+  2M:
+    model:
+      name: painn
+      hidden_channels: 512
+      num_layers: 6
+      num_rbf: 128
+      cutoff: 12.0
+      max_neighbors: 50
+      # scale_file: configs/s2ef/all/painn/painn_nb6_scaling_factors.pt
+      regress_forces: True
+      direct_forces: True
+      use_pbc: True
+
+    optim:
+      batch_size: 32
+      eval_batch_size: 32
+      load_balancing: atoms
+      eval_every: 5000
+      num_workers: 2
+      optimizer: AdamW
+      optimizer_params:
+        amsgrad: True
+        weight_decay: 0.  # 2e-6 (TF weight decay) / 1e-4 (lr) = 2e-2
+      lr_initial: 1.e-4
+      lr_gamma: 0.8
+      scheduler: ReduceLROnPlateau
+      mode: min
+      factor: 0.8
+      patience: 3
+      max_epochs: 80
+      force_coefficient: 100
+      energy_coefficient: 1
+      ema_decay: 0.999
+      clip_grad_norm: 10
+
   20M: {}
   all: {}
 

diff --git a/configs/models/schnet.yaml b/configs/models/schnet.yaml
@@ -16,6 +16,19 @@ default:
     phys_embeds: False # True
     phys_hidden_channels: 0
     energy_head: False # can be {False, weighted-av-initial-embeds, weighted-av-final-embeds, random}
+    ######################################### Ewald message passing hyperparameters
+    use_ewald: False
+    ewald_hyperparams:
+      num_k_x: 1                              # Frequency cutoff [Å^-1]
+      num_k_y: 1                              # Voxel grid resolution [Å^-1]
+      num_k_z: 3                              # Gaussian radial basis size (Fourier filter)
+      downprojection_size: 8                  # Size of linear bottleneck layer
+      num_hidden: 3                           # Number of residuals in update function
+      # params for qm9
+      k_cutoff: 0.4                           # Frequency cutoff [Å^-1]
+      delta_k: 0.2                            # Voxel grid resolution [Å^-1]
+      num_k_rbf: 48                           # Gaussian radial basis size (Fourier filter)
+    #########################################
   optim:
     batch_size: 64
     eval_batch_size: 64

diff --git a/configs/models/tasks/s2ef.yaml b/configs/models/tasks/s2ef.yaml
@@ -11,14 +11,25 @@ default:
     grad_input: atomic forces
     train_on_free_atoms: True
     eval_on_free_atoms: True
+    relax_dataset:
+      # path to lmdb of systems to be relaxed (uses same lmdbs as is2re)
+      src: /network/scratch/s/schmidtv/ocp/datasets/ocp/is2re/all/val_id/
+    write_pos: True
+    relaxation_steps: 300
+    relax_opt:
+      maxstep: 0.04
+      memory: 50
+      damping: 1.0
+      alpha: 70.0
+      traj_dir: "trajectories" # specify directory you wish to log the entire relaxations, suppress otherwise
   normalizer: null
   mode: train
   optim:
     optimizer: AdamW
   model:
     otf_graph: False
     max_num_neighbors: 40
-    regress_forces: from_energy # can be in{ "from_energy", "direct", "direct_with_gradient_target" }
+    regress_forces: direct_with_gradient_target # can be in{ "from_energy", "direct", "direct_with_gradient_target" }
   dataset:
     default_val: val_id
     train:

diff --git a/mila/sbatch.py b/mila/sbatch.py
@@ -41,7 +41,7 @@
     conda activate {env}
 fi
 {wandb_offline}
-srun --gpus-per-task=1 --output={output} {python_command}
+srun --output={output} {python_command}
 """
 
 
@@ -234,9 +234,7 @@ def load_sbatch_args_from_dir(dir):
         k, v = (
             line[2:]
             if line.startswith("--")
-            else line[1:]
-            if line.startswith("-")
-            else line
+            else line[1:] if line.startswith("-") else line
         ).split("=")
         sbatch_args[k] = v
     args = {
@@ -280,9 +278,7 @@ def load_sbatch_args_from_dir(dir):
     modules = (
         []
         if not args.modules
-        else args.modules.split(",")
-        if isinstance(args.modules, str)
-        else args.modules
+        else args.modules.split(",") if isinstance(args.modules, str) else args.modules
     )
     if args.verbose:
         args.pretty_print()