From a9da06e98457a01b95a265c3bfa4ad84e18694ee Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 01:39:32 +0530 Subject: [PATCH 1/2] added support for vllm server --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 8 +++++--- .../app-mlperf-inference-mlcommons-python/customize.py | 10 +++++----- script/app-mlperf-inference/_cm.yaml | 6 ++++++ script/process-mlperf-accuracy/_cm.json | 7 ++++++- script/process-mlperf-accuracy/customize.py | 6 +++++- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 7c14cf349..82804d846 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -82,7 +82,7 @@ new_env_keys: - CM_HW_NAME - CM_ML_MODEL_* - CM_MAX_EXAMPLES - + - CM_VLLM_* new_state_keys: - mlperf-inference-implementation - CM_SUT_* @@ -403,9 +403,11 @@ deps: CM_MODEL: - llama2-70b-99 - llama2-70b-99.9 - skip_if_env: + skip_if_any_env: CM_MLPERF_CUSTOM_MODEL_PATH: - - "on" + - "on" + CM_MLPERF_INFERENCE_API_SERVER: + - "on" ## mixtral-8x7b - tags: get,ml-model,mixtral diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 352c0d8d6..8c9f0e7ec 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -75,7 +75,7 @@ def preprocess(i): else: env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf_conf "+ x + env['CM_MLPERF_CONF'] + x - if env.get('CM_NETWORK_LOADGEN', '') != "lon": + if env.get('CM_NETWORK_LOADGEN', '') != "lon" and env.get('CM_MLPERF_INFERENCE_API_SERVER','')=='': env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH') if not env['MODEL_DIR']: env['MODEL_DIR'] = os.path.dirname(env.get('CM_MLPERF_CUSTOM_MODEL_PATH', env.get('CM_ML_MODEL_FILE_WITH_PATH'))) @@ -304,14 +304,14 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ scenario_extra_options + mode_extra_options + \ " --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \ - ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ - " --model-path " + env['MODEL_DIR'] + ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] if env.get('CM_MLPERF_INFERENCE_API_SERVER', '') != '': env['CM_VLLM_SERVER_MODEL_NAME'] = "NousResearch/Meta-Llama-3-8B-Instruct" if env.get('CM_MLPERF_INFERENCE_API_SERVER') == '': env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" - cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " - + cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " + else: + cmd += f" --model-path {env['MODEL_DIR']}" cmd = cmd.replace("--count", "--total-sample-count") elif "mixtral-8x7b" in env['CM_MODEL']: diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 0c12f798f..d3041ea3a 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -280,6 +280,9 @@ variations: - CM_NVIDIA_TP_SIZE env: BUILD_TRTLLM: 1 + skip_if_env: + CM_MLPERF_INFERENCE_API_SERVER: + - "*" nvidia-original,r4.1_default,llama2-70b_: docker: @@ -290,6 +293,9 @@ variations: - CM_NVIDIA_TP_SIZE env: BUILD_TRTLLM: 1 + skip_if_env: + CM_MLPERF_INFERENCE_API_SERVER: + - "*" nvidia-original: docker: diff --git a/script/process-mlperf-accuracy/_cm.json b/script/process-mlperf-accuracy/_cm.json index 17a059483..9af1a4f99 100644 --- a/script/process-mlperf-accuracy/_cm.json +++ b/script/process-mlperf-accuracy/_cm.json @@ -323,7 +323,12 @@ "names": [ "llama2-model" ], - "tags": "get,ml-model,llama2" + "tags": "get,ml-model,llama2", + "skip_if_env": { + "CM_MLPERF_INFERENCE_API_SERVER": [ + "on" + ] + } } ], "env": { diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index beeab9204..d6536e13f 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -77,7 +77,11 @@ def preprocess(i): elif dataset == "openorca": accuracy_checker_file = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama2-70b", "evaluate-accuracy.py") - CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + accuracy_checker_file + "' --checkpoint-path '" + env['CM_ML_MODEL_LLAMA2_FILE_WITH_PATH'] + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \ + if env.get('CM_VLLM_SERVER_MODEL_NAME','') == '': + checkpoint_path = env['CM_ML_MODEL_LLAMA2_FILE_WITH_PATH'] + else: + checkpoint_path = env['CM_VLLM_SERVER_MODEL_NAME'] + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + accuracy_checker_file + "' --checkpoint-path '" + checkpoint_path + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \ "' --dataset-file '" + env['CM_DATASET_PREPROCESSED_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "int32") +" > '" + out_file + "'" elif dataset == "openorca-gsm8k-mbxp-combined": From 108b596b9b14d86ea1abc0c8af32ac342966ac04 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 01:49:49 +0530 Subject: [PATCH 2/2] reverted changes made for redhat api --- script/get-dataset-openorca/_cm.json | 12 ------------ script/get-dataset-openorca/customize.py | 16 +++------------- script/get-dataset-openorca/run.sh | 4 ---- 3 files changed, 3 insertions(+), 29 deletions(-) delete mode 100644 script/get-dataset-openorca/run.sh diff --git a/script/get-dataset-openorca/_cm.json b/script/get-dataset-openorca/_cm.json index 3808008a4..82d5f4dfd 100644 --- a/script/get-dataset-openorca/_cm.json +++ b/script/get-dataset-openorca/_cm.json @@ -13,11 +13,6 @@ "force_env_keys": [ "CM_GIT_*" ], - "skip_if_env": { - "CM_MLPERF_IMPLEMENTATION": [ - "redhat" - ] - }, "names": [ "openorca-src" ], @@ -80,13 +75,6 @@ "CM_DATASET_CALIBRATION": "no" }, "group": "dataset-type" - }, - "redhat": { - "deps":[ - { - "tags":"get,rclone" - } - ] } } } diff --git a/script/get-dataset-openorca/customize.py b/script/get-dataset-openorca/customize.py index 60aa98667..059c83826 100644 --- a/script/get-dataset-openorca/customize.py +++ b/script/get-dataset-openorca/customize.py @@ -6,24 +6,14 @@ def preprocess(i): env = i['env'] - if env.get("CM_MLPERF_IMPLEMENTATION", "") == "redhat": - auth_s3_bucket = "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com" - download_cmd = "rclone copy mlc-inference:mlcommons-inference-wg-public/open_orca . -P" - run_cmd = f"{auth_s3_bucket} && {download_cmd}" - - env['CM_RUN_CMD'] = run_cmd - return {'return': 0} def postprocess(i): env = i['env'] if env.get('CM_DATASET_CALIBRATION','') == "no": - if env.get("CM_MLPERF_IMPLEMENTATION", "") == "redhat": - env['CM_DATASET_OPENORCA_PATH'] = os.path.join(os.getcwd(), 'open_orca_gpt4_tokenized_llama.sampled_24576.pkl.gz') - else: - env['CM_DATASET_PATH_ROOT'] = env['CM_DATASET_OPENORCA_PATH'] - env['CM_DATASET_PATH'] = env['CM_DATASET_OPENORCA_PATH'] - env['CM_DATASET_OPENORCA_PARQUET'] = os.path.join(env['CM_DATASET_OPENORCA_PATH'], '1M-GPT4-Augmented.parquet') + env['CM_DATASET_PATH_ROOT'] = env['CM_DATASET_OPENORCA_PATH'] + env['CM_DATASET_PATH'] = env['CM_DATASET_OPENORCA_PATH'] + env['CM_DATASET_OPENORCA_PARQUET'] = os.path.join(env['CM_DATASET_OPENORCA_PATH'], '1M-GPT4-Augmented.parquet') else: env['CM_CALIBRATION_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'calibration', 'data') diff --git a/script/get-dataset-openorca/run.sh b/script/get-dataset-openorca/run.sh deleted file mode 100644 index a797ea9e4..000000000 --- a/script/get-dataset-openorca/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -cmd=${CM_RUN_CMD} -echo "${cmd}" -eval "${cmd}" -test $? -eq 0 || exit $?