diff --git a/gpus/.dockerignore b/gpus/.dockerignore new file mode 100644 index 0000000..382f954 --- /dev/null +++ b/gpus/.dockerignore @@ -0,0 +1 @@ +workspace/ \ No newline at end of file diff --git a/gpus/.gitignore b/gpus/.gitignore new file mode 100644 index 0000000..ece6ca2 --- /dev/null +++ b/gpus/.gitignore @@ -0,0 +1 @@ +/workspace \ No newline at end of file diff --git a/gpus/Dockerfile b/gpus/Dockerfile new file mode 100644 index 0000000..a274d64 --- /dev/null +++ b/gpus/Dockerfile @@ -0,0 +1,8 @@ +FROM nvidia/cuda:11.6.1-base-ubuntu20.04 + +# Copy code +COPY . /workspace +RUN chmod +x /workspace/*.sh + +# Set working directory +WORKDIR /workspace \ No newline at end of file diff --git a/gpus/README.md b/gpus/README.md new file mode 100644 index 0000000..bbeb3aa --- /dev/null +++ b/gpus/README.md @@ -0,0 +1,38 @@ +# GPUs example + +## Project setup + +An important requirement is that you must have Docker and/or Singularity installed. + +```bash +# Create Python environment and install MLCube with runners +virtualenv -p python3 ./env && source ./env/bin/activate && pip install mlcube-docker mlcube-singularity +# Fetch the gpus example from GitHub +git clone https://github.com/mlcommons/mlcube_examples && cd ./mlcube_examples +git fetch origin pull/68/head:feature/gpu_example && git checkout feature/gpu_example +cd ./gpus/ +``` + +## MLCube tasks + +There is only one taks that will output the variable `CUDA_VISIBLE_DEVICES` along with the ouput of the `nvidia-smi` command: + +```shell +mlcube run --task=check_gpus +``` + +You can modify the number of gpus by editing the number of `accelerator_count` inside the **mlcube.yaml** file. + +Also you can override the number of gpus to use by using the `--gpus` flag when running the command, example: + +```shell +mlcube run --task=check_gpus --gpus=2 +``` + +### Singularity + +For running on Singularity, you can define the platform while running the command as follows: + +```shell +mlcube run --task=check_gpus --platform=singularity +``` diff --git a/gpus/check_gpus.sh b/gpus/check_gpus.sh new file mode 100644 index 0000000..c9e1135 --- /dev/null +++ b/gpus/check_gpus.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +LOG_DIR=${LOG_DIR:-"/"} + +# Handle MLCube parameters +while [ $# -gt 0 ]; do + case "$1" in + --log_dir=*) + LOG_DIR="${1#*=}" + ;; + *) ;; + esac + shift +done + +echo "CUDA_VISIBLE_DEVICES $CUDA_VISIBLE_DEVICES" |& tee "$LOG_DIR/gpus.log" +echo "NVIDIA_VISIBLE_DEVICES $NVIDIA_VISIBLE_DEVICES" |& tee "$LOG_DIR/gpus.log" +nvidia-smi |& tee -a "$LOG_DIR/gpus.log" +nvidia-smi --query-gpu=gpu_name,uuid --format=csv |& tee -a "$LOG_DIR/gpus.log" diff --git a/gpus/mlcube.yaml b/gpus/mlcube.yaml new file mode 100644 index 0000000..ea62eca --- /dev/null +++ b/gpus/mlcube.yaml @@ -0,0 +1,24 @@ +name: check_gpus +description: Check gpus example +authors: + - { name: "MLCommons Best Practices Working Group" } + +platform: + accelerator_count: 1 + +docker: + # Image name. + image: dfjbtest/gpus_example:0.0.1 + # Docker build context relative to $MLCUBE_ROOT. Default is `build`. + build_context: "./" + # Docker file name within docker build context, default is `Dockerfile`. + build_file: "Dockerfile" + # GPU arguments + gpu_args: "--gpus=1" + +tasks: + check_gpus: + entrypoint: ./check_gpus.sh + parameters: + outputs: + log_dir: logs/