Skip to content

Commit

Permalink
Merge pull request #277 from mlcommons/mlperf-inference
Browse files Browse the repository at this point in the history
dev <- Mlperf inference
  • Loading branch information
arjunsuresh committed Sep 22, 2024
2 parents 696655b + af7330f commit fd32ff6
Show file tree
Hide file tree
Showing 30 changed files with 582 additions and 296 deletions.
18 changes: 7 additions & 11 deletions .github/workflows/test-mlperf-inference-gptj.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,16 @@
name: MLPerf inference GPT-J

on:
pull_request:
branches: [ "main1", "dev1" ]
push:
branches: [ "main", "dev", "mlperf-inference" ]
paths:
- '.github/workflows/test-mlperf-inference-gptj.yml'
- '**'
- '!**.md'

jobs:
build:

runs-on: ubuntu-latest
runs-on: [ self-hosted, linux, x64 ]
strategy:
fail-fast: false
matrix:
Expand All @@ -23,15 +22,12 @@ jobs:
precision: [ "bfloat16" ]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python3 -m pip install cmind
source gh_action/bin/deactivate || python3 -m venv gh_action
source gh_action/bin/activate
export CM_REPOS=$HOME/GH_CM
cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
- name: Test MLPerf Inference GPTJ
run: |
cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="cTuning" --model=gptj --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet
cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
397 changes: 230 additions & 167 deletions automation/script/module.py

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions automation/script/module_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1524,7 +1524,7 @@ def dockerfile(i):
if i.get('print_deps'):
cm_input = {'action': 'run',
'automation': 'script',
'tags': f'{tag_string}',
'tags': f"""{i.get('tags')}""",
'print_deps': True,
'quiet': True,
'silent': True,
Expand Down Expand Up @@ -1557,7 +1557,7 @@ def dockerfile(i):
'fake_run_option': fake_run_option,
'comments': comments,
'run_cmd': f'{run_cmd} --quiet',
'script_tags': f'{tag_string}',
'script_tags': f"""{i.get('tags')}""",
'copy_files': docker_copy_files,
'quiet': True,
'env': env,
Expand Down Expand Up @@ -2037,11 +2037,11 @@ def docker(i):
'image_repo': image_repo,
'interactive': interactive,
'mounts': mounts,
'image_name': 'cm-script-'+script_alias,
'image_name': i.get('docker_image_name', ''),
# 'image_tag': script_alias,
'image_tag_extra': image_tag_extra,
'detached': detached,
'script_tags': f'{tag_string}',
'script_tags': f"""{i.get('tags')}""",
'run_cmd': final_run_cmd,
'v': i.get('v', False),
'quiet': True,
Expand Down
10 changes: 7 additions & 3 deletions script/app-mlperf-inference-mlcommons-python/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ deps:
- tags: get,dataset,coco2014,_validation
names:
- coco2014-preprocessed
- coco2014-dataset
enable_if_env:
CM_MODEL:
- stable-diffusion-xl
Expand Down Expand Up @@ -590,6 +591,9 @@ deps:
- tags: generate,user-conf,mlperf,inference
names:
- user-conf-generator
skip_if_env:
CM_RUN_STATE_DOCKER:
- 'yes'

# Install MLPerf loadgen
- tags: get,loadgen
Expand Down Expand Up @@ -1232,7 +1236,7 @@ variations:
float16:
group: precision
add_deps_recursive:
ml-model:
ml-model-float16:
tags:
_fp16
env:
Expand All @@ -1243,9 +1247,9 @@ variations:
bfloat16:
group: precision
add_deps_recursive:
ml-model:
ml-model-bfloat16:
tags:
_fp16
_fp32
env:
CM_MLPERF_QUANTIZATION: off
CM_MLPERF_MODEL_PRECISION: bfloat16
Expand Down
6 changes: 5 additions & 1 deletion script/app-mlperf-inference-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ input_mapping:
mlperf_conf: CM_MLPERF_CONF
mode: CM_MLPERF_LOADGEN_MODE
output_dir: CM_MLPERF_OUTPUT_DIR
performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT
scenario: CM_MLPERF_LOADGEN_SCENARIO
user_conf: CM_MLPERF_USER_CONF
devices: CM_MLPERF_NVIDIA_HARNESS_DEVICES
Expand Down Expand Up @@ -477,6 +476,11 @@ variations:
names:
- scipy
version: 1.10.1
- tags: get,generic-python-lib,_package.numpy
names:
- numpy
version_max: 1.22.99
version_max_usable: "1.22"

sdxl,v4.1:
deps:
Expand Down
2 changes: 1 addition & 1 deletion script/app-mlperf-inference-nvidia/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ def preprocess(i):
if input_format:
run_config += f" --input_format={input_format}"

performance_sample_count = env.get('CM_MLPERF_PERFORMANCE_SAMPLE_COUNT')
performance_sample_count = env.get('CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT')
if performance_sample_count:
run_config += f" --performance_sample_count={performance_sample_count}"

Expand Down
6 changes: 6 additions & 0 deletions script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1565,15 +1565,21 @@ docker:
- tags: get,mlperf,inference,results,dir,local
names:
- get-mlperf-inference-results-dir
skip_if_env:
OUTPUT_BASE_DIR: [ on ]
- tags: get,mlperf,inference,submission,dir,local
names:
- get-mlperf-inference-submission-dir
skip_if_env:
CM_MLPERF_INFERENCE_SUBMISSION_DIR: [ on ]

pre_run_cmds:
#- cm pull repo && cm run script --tags=get,git,repo,_repo.https://github.com/GATEOverflow/inference_results_v4.0.git --update
- cm pull repo
mounts:
- "${{ CM_DATASET_IMAGENET_PATH }}:${{ CM_DATASET_IMAGENET_PATH }}"
- "${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}:${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}"
- "${{ OUTPUT_BASE_DIR }}:${{ OUTPUT_BASE_DIR }}"
- "${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}:${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}"
- "${{ GPTJ_CHECKPOINT_PATH }}:${{ GPTJ_CHECKPOINT_PATH }}"
- "${{ CM_CRITEO_PREPROCESSED_PATH }}:${{ CM_CRITEO_PREPROCESSED_PATH }}"
Expand Down
1 change: 0 additions & 1 deletion script/build-docker-image/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,4 @@ prehook_deps:
- enable_if_env:
CM_BUILD_DOCKERFILE:
- 'yes'
- '1'
tags: build,dockerfile
58 changes: 29 additions & 29 deletions script/build-docker-image/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def preprocess(i):
else:
build_dockerfile = True
env['CM_BUILD_DOCKERFILE'] = "yes"
env['CM_DOCKERFILE_BUILD_FROM_IMAGE_SCRIPT'] = "yes"


CM_DOCKER_BUILD_ARGS = env.get('+ CM_DOCKER_BUILD_ARGS', [])
Expand All @@ -34,55 +35,54 @@ def preprocess(i):
# else:
# env['CM_BUILD_DOCKERFILE'] = "no"
#
if "CM_DOCKER_IMAGE_REPO" not in env:
if env.get("CM_DOCKER_IMAGE_REPO", "") == '':
env['CM_DOCKER_IMAGE_REPO'] = "local"

docker_image_name = env.get('CM_DOCKER_IMAGE_NAME', '')
if docker_image_name == '':
docker_image_name = env.get('CM_DOCKER_RUN_SCRIPT_TAGS','').replace(',', '-').replace('_','')
if docker_image_name == '':
docker_image_name = 'cm'

env['CM_DOCKER_IMAGE_NAME'] = docker_image_name
docker_image_name = "cm-script-" +env.get('CM_DOCKER_RUN_SCRIPT_TAGS','').replace(',', '-').replace('_','-')
env['CM_DOCKER_IMAGE_NAME'] = docker_image_name

if env.get("CM_DOCKER_IMAGE_TAG", "") == '':
env['CM_DOCKER_IMAGE_TAG'] = "latest"

if env.get("CM_DOCKER_CACHE", "yes") in ["no", "False", False]:
if str(env.get("CM_DOCKER_CACHE", "yes")).lower() in ["no", "false", "0"]:
env["CM_DOCKER_CACHE_ARG"] = " --no-cache"

CMD = ''

image_name = get_image_name(env)

if not build_dockerfile:
# Write .dockerignore
with open('.dockerignore', 'w') as f:
f.write('.git\n')
if build_dockerfile:
dockerfile_path = "\${CM_DOCKERFILE_WITH_PATH}"

# Prepare CMD to build image
XCMD = [
'docker build ' + env.get('CM_DOCKER_CACHE_ARG',''),
' ' + build_args,
' -f "' + dockerfile_path + '"',
' -t "' + image_name,
' .'
]
# Write .dockerignore
with open('.dockerignore', 'w') as f:
f.write('.git\n')

with open(dockerfile_path + '.build.sh', 'w') as f:
f.write(' \\\n'.join(XCMD) + '\n')
# Prepare CMD to build image
XCMD = [
'docker build ' + env.get('CM_DOCKER_CACHE_ARG',''),
' ' + build_args,
' -f "' + dockerfile_path + '"',
' -t "' + image_name,
' .'
]

with open(dockerfile_path + '.build.bat', 'w') as f:
f.write(' ^\n'.join(XCMD) + '\n')
with open(dockerfile_path + '.build.sh', 'w') as f:
f.write(' \\\n'.join(XCMD) + '\n')

CMD = ''.join(XCMD)
with open(dockerfile_path + '.build.bat', 'w') as f:
f.write(' ^\n'.join(XCMD) + '\n')

print ('================================================')
print ('CM generated the following Docker build command:')
print ('')
print (CMD)
CMD = ''.join(XCMD)

print ('')
print ('================================================')
print ('CM generated the following Docker build command:')
print ('')
print (CMD)

print ('')

env['CM_DOCKER_BUILD_CMD'] = CMD

Expand Down
1 change: 0 additions & 1 deletion script/build-dockerfile/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ post_deps:
- enable_if_env:
CM_BUILD_DOCKER_IMAGE:
- 'yes'
- '1'
names:
- build-docker-image
tags: build,docker,image
Expand Down
13 changes: 8 additions & 5 deletions script/build-dockerfile/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def preprocess(i):
input_args = []
copy_files = []

if 'CM_DOCKER_RUN_SCRIPT_TAGS' in env:
if env.get('CM_DOCKER_RUN_SCRIPT_TAGS', '') != '':
script_tags=env['CM_DOCKER_RUN_SCRIPT_TAGS']
found_scripts = cm.access({'action': 'search', 'automation': 'script', 'tags': script_tags})
scripts_list = found_scripts['list']
Expand Down Expand Up @@ -62,7 +62,7 @@ def preprocess(i):
else:
cm_mlops_repo_branch_string = ""

if 'CM_DOCKERFILE_WITH_PATH' not in env:
if env.get('CM_DOCKERFILE_WITH_PATH', '') == '':
env['CM_DOCKERFILE_WITH_PATH'] = os.path.join(os.getcwd(), "Dockerfile")

dockerfile_with_path = env['CM_DOCKERFILE_WITH_PATH']
Expand Down Expand Up @@ -180,9 +180,10 @@ def preprocess(i):

f.write(EOL+'# Install python packages' + EOL)
python = get_value(env, config, 'PYTHON', 'CM_DOCKERFILE_PYTHON')
f.write('RUN {} -m venv cm-venv'.format(python) + " " + EOL)
f.write('RUN . cm-venv/bin/activate' + EOL)
f.write('RUN {} -m pip install --user '.format(python) + " ".join(get_value(env, config, 'python-packages')) + ' ' + pip_extra_flags + ' ' + EOL)
f.write('RUN {} -m venv /home/cmuser/venv/cm'.format(python) + " " + EOL)
f.write('ENV PATH="/home/cmuser/venv/cm/bin:$PATH"' + EOL)
#f.write('RUN . /opt/venv/cm/bin/activate' + EOL)
f.write('RUN {} -m pip install '.format(python) + " ".join(get_value(env, config, 'python-packages')) + ' ' + pip_extra_flags + ' ' + EOL)

f.write(EOL+'# Download CM repo for scripts' + EOL)

Expand Down Expand Up @@ -260,6 +261,8 @@ def preprocess(i):
s = r['string']
f.write(s + EOL)

print(f"""Dockerfile written at {dockerfile_with_path}""")

f.close()

#f = open(env['CM_DOCKERFILE_WITH_PATH'], "r")
Expand Down
43 changes: 43 additions & 0 deletions script/clean-nvidia-mlperf-inference-scratch-space/_cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
alias: clean-nvidia-mlperf-inference-scratch-space
automation_alias: script
automation_uid: 5b4e0237da074764
cache: false
tags:
- clean
- nvidia
- scratch
- space
- mlperf
- inference
uid: bb41f6e3608e4e8a
deps:
# Get Nvidia scratch space where data and models get downloaded
- tags: get,mlperf,inference,nvidia,scratch,space
names:
- nvidia-scratch-space

variations:
sdxl:
group: model
env:
CM_MODEL: sdxl
downloaded-data:
group: artifact
env:
CM_CLEAN_ARTIFACT_NAME: downloaded_data
preprocessed-data:
group: artifact
env:
CM_CLEAN_ARTIFACT_NAME: preprocessed_data
downloaded-model:
group: artifact
env:
CM_CLEAN_ARTIFACT_NAME: downloaded_model
v4.1:
group: version
env:
CM_NVIDIA_MLPERF_INFERENCE_CODE_VERSION: v4.1
v4.0:
group: version
env:
CM_NVIDIA_MLPERF_INFERENCE_CODE_VERSION: v4.0
22 changes: 22 additions & 0 deletions script/clean-nvidia-mlperf-inference-scratch-space/customize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from cmind import utils
import os

def preprocess(i):

os_info = i['os_info']

env = i['env']

meta = i['meta']

automation = i['automation']

quiet = (env.get('CM_QUIET', False) == 'yes')

return {'return':0}

def postprocess(i):

env = i['env']

return {'return':0}
1 change: 1 addition & 0 deletions script/clean-nvidia-mlperf-inference-scratch-space/run.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rem native script
17 changes: 17 additions & 0 deletions script/clean-nvidia-mlperf-inference-scratch-space/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH}

#To export any variable
#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out

#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency

echo "Running: "
echo "${CM_RUN_CMD}"
echo ""

if [[ ${CM_FAKE_RUN} != "yes" ]]; then
eval "${CM_RUN_CMD}"
test $? -eq 0 || exit 1
fi
Loading

0 comments on commit fd32ff6

Please sign in to comment.