diff --git a/autotest/benchmark/test_apiserver_performance.py b/autotest/benchmark/test_apiserver_performance.py index 69577218d..c41de2a20 100644 --- a/autotest/benchmark/test_apiserver_performance.py +++ b/autotest/benchmark/test_apiserver_performance.py @@ -22,12 +22,7 @@ def getModelList(tp_num): new_model_list = [] for model in model_list: if model['backend'] == 'pytorch': - if 'Llama-3' in model['model'] and '70B' in model['model']: - model[ - 'extra'] = '--max-batch-size 256 --cache-max-entry-count 0.65' # noqa: E501 - else: - model[ - 'extra'] = '--max-batch-size 256 --cache-max-entry-count 0.7' # noqa: E501 + model['extra'] = '--max-batch-size 256 --cache-max-entry-count 0.7' elif 'Llama-2' in model['model']: model[ 'extra'] = '--max-batch-size 256 --cache-max-entry-count 0.95' diff --git a/autotest/utils/benchmark_utils.py b/autotest/utils/benchmark_utils.py index 944beeda0..13e666d5f 100644 --- a/autotest/utils/benchmark_utils.py +++ b/autotest/utils/benchmark_utils.py @@ -248,10 +248,7 @@ def create_multi_level_directory(path): def get_max_cache_entry(model, backend): if backend == 'pytorch': - if 'Llama-3' in model and '70B' in model: - return '--cache-max-entry-count 0.65' - else: - return '--cache-max-entry-count 0.7' + return '--cache-max-entry-count 0.7' if 'Llama-2' in model: return '--cache-max-entry-count 0.95' elif 'internlm2' in model: