BCV-Uniandes · juliocesar-io · Aug 4, 2024 · Aug 4, 2024 · Aug 4, 2024 · Aug 4, 2024
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
-/log
+/log
+__pycache__
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,16 @@
+# Use NVIDIA PyTorch image as the base
+FROM nvcr.io/nvidia/pytorch:22.03-py3
+
+# Base pytorch 
+RUN conda install pytorch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0 cudatoolkit=11.6 -c pytorch -c conda-forge
+
+# Set required versions for each core dependency using cu116
+RUN pip install torch-scatter==2.0.9 torch-sparse==0.6.14 torch-cluster==1.6.0 torch-spline-conv==1.2.1 torch-geometric==2.1.0 -f https://data.pyg.org/whl/torch-1.12.0+cu116.html
+
+# Copy the requirements.txt file into the container
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+
+# Set the default command to bash
+CMD ["/bin/bash"]
+
diff --git a/README.md b/README.md
@@ -15,7 +15,8 @@ Membranes MDPI, 2022.<br><br>
 <sup>3 </sup> Grupo de Diseño de Productos y Procesos (GDPP), Department of Chemical and Food Engineering, Universidad de los Andes, Bogota 111711, Colombia.<br/>
 
 ## Installation
-The following steps are required in order to run AMPs-Net:<br />
+
+The following steps are required in order to run AMPs-Net using a local Conda environment:<br />
 
 ```bash
 $ export PATH=/usr/local/cuda-11.0/bin:$PATH 
@@ -27,6 +28,84 @@ $ conda activate amps_env
 $ bash amps_env.sh
 ```
 
+Then you should be able to run the inference using the binary classification model:
+ ```bash
+./run_inference_AMP.sh  \ 
+    -m <model_inference_path> \ 
+    -b <batch_size> \ 
+    -d <device> \ 
+    -n <num_metadata> \ 
+    -f <file_name_inference>
+```
+
+or using the multilabel model:
+
+ ```bash
+./run_inference_multilabel.sh \ 
+    -m <model_inference_path> \ 
+    -b <batch_size> \ 
+    -d <device> \ 
+    -n <num_metadata> \ 
+    -f <file_name_inference>
+```
+
+Command parameters:
+
+- **model_inference_path**: is the checkpoints model path used for inference, either `checkpoints/BINARY_AMPs` or `checkpoints/MULTILABEL_AMPs`.
+- **batch_size**:  how many samples per batch to load in the torch DataLoader
+- **device**: GPU device number to use, usually 0.
+- **num_metadata**: number of metadata features for the graph representation
+
+## Installation with Docker
+
+You will need a machine with a CUDA compatible GPU with the NVIDIA Container Toolkit installed.
+
+Please follow these steps:
+
+1.  Install [Docker](https://www.docker.com/).
+    *   Install
+        [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
+        for GPU support.
+    *   Setup running
+        [Docker as a non-root user](https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user).
+
+2.  Clone this repository and `cd` into it.
+
+    ```bash
+    git clone https://github.com/BCV-Uniandes/AMPs-Net
+    cd AMPs-Net
+    ```
+
+3. Prepare your inference files in the `Inference` folder, see the [Usage](#Usage) section.
+
+4. Build the Docker image:
+
+    ```bash
+    docker build -t amp-net:latest .
+    ```
+
+5. Run inference with the binary classification model. e.g, using the provided `Example.csv` file:
+
+    ```bash
+    docker run \
+        -it --rm --gpus all \
+        -v "$(pwd)":/workspace \
+        amp-net:latest \
+        bash -c "/workspace/run_inference_AMP.sh -m checkpoints/BINARY_AMPs -b 4 -d 0 -n 8 -f Example.csv"
+    ```
+
+6. Run inference with the multilabel model. e.g, using the provided `Example.csv` file:
+
+    ```bash
+    docker run \
+        -it --rm --gpus all \
+        -v "$(pwd)":/workspace \
+        amp-net:latest \
+        bash -c "/workspace/run_inference_multilabel.sh -m checkpoints/MULTILABEL_AMPs -b 4 -d 0 -n 8 -f Example.csv"
+    ```
+
+7. Check the predictions in the `Inference` folder.
+
 ## Models
 We provide pretrained models available for download in the following [link](http://157.253.243.19/AMPs-Net/).
 Last update on the models on the 01/01/2023.

diff --git a/inference.py b/inference.py
@@ -1,5 +1,5 @@
 import torch
-from torch_geometric.data import DataLoader
+from torch_geometric.loader import DataLoader
 from model.model import DeeperGCN
 from tqdm import tqdm
 from args import ArgsInit
@@ -10,6 +10,19 @@
 import pandas as pd
 import os
 
+def get_device(args):
+    if torch.cuda.is_available():
+        device_count = torch.cuda.device_count()
+        if args.device < device_count:
+            device = torch.device(f"cuda:{args.device}")
+        else:
+            print(f"Warning: Specified CUDA device {args.device} is not available. Using device 0 instead.")
+            device = torch.device("cuda:0")
+    else:
+        print("CUDA is not available. Falling back to CPU.")
+        device = torch.device("cpu")
+    return device
+
 
 @torch.no_grad()
 def eval(model, device, loader, num_classes, args, target=None):
@@ -27,7 +40,7 @@ def eval(model, device, loader, num_classes, args, target=None):
     print("------Copying model 4---------")
     prop_predictor4 = copy.deepcopy(model)
 
-    test_model_path = "./log/" + args.save
+    test_model_path = args.save
 
     test_model_path1 = test_model_path + "/Fold1/model_ckpt/Checkpoint.pth"
     test_model_path2 = test_model_path + "/Fold2/model_ckpt/Checkpoint.pth"
@@ -36,16 +49,16 @@ def eval(model, device, loader, num_classes, args, target=None):
 
     # LOAD MODELS
     print("------- Loading weights----------")
-    prop_predictor1.load_state_dict(torch.load(test_model_path1)["model_state_dict"])
+    prop_predictor1.load_state_dict(torch.load(test_model_path1, map_location='cuda:0')["model_state_dict"])
     prop_predictor1.to(device)
 
-    prop_predictor2.load_state_dict(torch.load(test_model_path2)["model_state_dict"])
+    prop_predictor2.load_state_dict(torch.load(test_model_path2, map_location='cuda:0')["model_state_dict"])
     prop_predictor2.to(device)
 
-    prop_predictor3.load_state_dict(torch.load(test_model_path3)["model_state_dict"])
+    prop_predictor3.load_state_dict(torch.load(test_model_path3, map_location='cuda:0')["model_state_dict"])
     prop_predictor3.to(device)
 
-    prop_predictor4.load_state_dict(torch.load(test_model_path4)["model_state_dict"])
+    prop_predictor4.load_state_dict(torch.load(test_model_path4, map_location='cuda:0')["model_state_dict"])
     prop_predictor4.to(device)
 
     # METHOD.EVAL
@@ -101,14 +114,8 @@ def main():
 
     args = ArgsInit().args
 
-    if args.use_gpu:
-        device = (
-            torch.device("cuda:" + str(args.device))
-            if torch.cuda.is_available()
-            else torch.device("cpu")
-        )
-    else:
-        device = torch.device("cpu")
+    device = get_device(args)
+    print(f"Using device: {device}")
 
     if args.binary:
         args.nclasses = 2
@@ -164,9 +171,9 @@ def main():
     inference_results = pd.DataFrame.from_dict(save_item)
 
     if args.binary:
-        saving_dir = "./Inference/AMPs/"
+        saving_dir = "Inference/AMPs/"
     elif args.multilabel:
-        saving_dir = "./Inference/MultiLabel/"
+        saving_dir = "Inference/MultiLabel/"
 
     if not os.path.exists(saving_dir):
         os.makedirs(saving_dir, exist_ok=True)

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,7 @@
+rdkit==2024.3.3
+modlamp==4.3.0
+ogb==1.3.6 
+tqdm==4.63.0
+h5py==3.11.0
+scipy==1.9.0
+numpy==1.24.4
diff --git a/run_inference_AMP.sh b/run_inference_AMP.sh
@@ -1,7 +1,31 @@
-INFERENCE_MODEL=''
-BS=4
-DEVICE=7
-NUM_METADATA=8
-FILE_INFERENCE='Example.csv'
-python inference.py --device $DEVICE --use_gpu --conv_encode_edge --block res+ --gcn_aggr softmax --learn_t --t 1.0 --dropout 0.2 --batch_size $BS --file_infe $FILE_INFERENCE --metadata --num_metadata 8 --binary
-python filter_peptides_descriptors.py --file_name $FILE_INFERENCE
+#!/bin/bash
+
+# Function to display usage message
+usage() {
+    echo "Usage: $0 -m INFERENCE_MODEL -b BS -d DEVICE -n NUM_METADATA -f FILE_INFERENCE"
+    exit 1
+}
+
+# Parse command line arguments
+while getopts m:b:d:n:f: flag
+do
+    case "${flag}" in
+        m) INFERENCE_MODEL=${OPTARG};;
+        b) BS=${OPTARG};;
+        d) DEVICE=${OPTARG};;
+        n) NUM_METADATA=${OPTARG};;
+        f) FILE_INFERENCE=${OPTARG};;
+        *) usage;;
+    esac
+done
+
+# Check if all mandatory arguments are provided
+if [ -z "$INFERENCE_MODEL" ] || [ -z "$BS" ] || [ -z "$DEVICE" ] || [ -z "$NUM_METADATA" ] || [ -z "$FILE_INFERENCE" ]; then
+    usage
+fi
+
+# Run inference script
+python inference.py --device $DEVICE --use_gpu --conv_encode_edge --block res+ --gcn_aggr softmax --learn_t --t 1.0 --dropout 0.2 --batch_size $BS --file_infe $FILE_INFERENCE --metadata --num_metadata $NUM_METADATA --binary --save $INFERENCE_MODEL
+
+# Run filter peptides descriptors script
+python filter_peptides_descriptors.py --file_name $FILE_INFERENCE
diff --git a/run_inference_multilabel.sh b/run_inference_multilabel.sh
@@ -1,7 +1,28 @@
-INFERENCE_MODEL='TEST_MULTILABEL_AMP'
-BS=4
-DEVICE=7
-NUM_METADATA=8
-FILE_INFERENCE='Example.csv'
-python inference.py --device $DEVICE --use_gpu --conv_encode_edge --block res+ --gcn_aggr softmax --learn_t --t 1.0 --dropout 0.2 --batch_size $BS --save $INFERENCE_MODEL --file_infe $FILE_INFERENCE --metadata --num_metadata 8 --multilabel --nclasses 4
+#!/bin/bash
 
+# Function to display usage message
+usage() {
+    echo "Usage: $0 -m INFERENCE_MODEL -b BS -d DEVICE -n NUM_METADATA -f FILE_INFERENCE"
+    exit 1
+}
+
+# Parse command line arguments
+while getopts m:b:d:n:f: flag
+do
+    case "${flag}" in
+        m) INFERENCE_MODEL=${OPTARG};;
+        b) BS=${OPTARG};;
+        d) DEVICE=${OPTARG};;
+        n) NUM_METADATA=${OPTARG};;
+        f) FILE_INFERENCE=${OPTARG};;
+        *) usage;;
+    esac
+done
+
+# Check if all mandatory arguments are provided
+if [ -z "$INFERENCE_MODEL" ] || [ -z "$BS" ] || [ -z "$DEVICE" ] || [ -z "$NUM_METADATA" ] || [ -z "$FILE_INFERENCE" ]; then
+    usage
+fi
+
+# Run inference script
+python inference.py --use_gpu --device $DEVICE --conv_encode_edge --block res+ --gcn_aggr softmax --learn_t --t 1.0 --dropout 0.2 --batch_size $BS --save $INFERENCE_MODEL --file_infe $FILE_INFERENCE --metadata --num_metadata $NUM_METADATA --multilabel --nclasses 4