From f30ab0041736314e981eee93498a09ce58a88c46 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Thu, 13 Feb 2025 10:54:13 +0800 Subject: [PATCH 1/9] use benchmark configuration --- .github/workflows/_ascend_npu_benchmark.yml | 18 ++++++++---------- .../workflows/_ascend_npu_build_torch_npu.yml | 6 +----- .github/workflows/_ascend_npu_ut.yml | 6 +----- ascend_npu/matadata.yml | 10 ---------- ascend_npu/torchbenchmark-config.yml | 10 ++++++++++ 5 files changed, 20 insertions(+), 30 deletions(-) delete mode 100644 ascend_npu/matadata.yml create mode 100644 ascend_npu/torchbenchmark-config.yml diff --git a/.github/workflows/_ascend_npu_benchmark.yml b/.github/workflows/_ascend_npu_benchmark.yml index 3217f1f..eef3116 100644 --- a/.github/workflows/_ascend_npu_benchmark.yml +++ b/.github/workflows/_ascend_npu_benchmark.yml @@ -127,6 +127,11 @@ jobs: run: | pip install ${{ inputs.torch-npu-artifact }} + - name: Install torchvision and torchaudio + run: | + # pip install --pre torchvision torchaudio --no-deps --index-url https://download.pytorch.org/whl/nightly/cpu + pip install --pre torchvision torchaudio --no-deps --index-url https://download.pytorch.org/whl/test/cpu + - name: Install benchmark dependencies run: | pip install -r benchmark/requirements.txt \ @@ -137,20 +142,13 @@ jobs: run: | python benchmark/install.py --userbenchmark test_bench --continue_on_fail - - name: Install nightly torchvision and torchaudio - run: | - pip install --pre torchvision torchaudio --no-deps --index-url https://download.pytorch.org/whl/nightly/cpu - - name: Install project dependencies run: | pip install -r requirements.txt - - name: Show environment info + - name: List installed python packages + if: ${{ always() }} run: | - npu_is_available=$(python -c "import torch; print(torch.npu.is_available())") - npu_count=$(python -c "import torch; print(torch.npu.device_count())") - echo "NPU is available: ${npu_is_available}" - echo "NPU count: ${npu_count}" pip list | grep -E 'torch|numpy' - name: Run benchmarks @@ -166,7 +164,7 @@ jobs: name: ascend_npu_benchmark.json path: benchmark/ascend_npu_benchmark.json if-no-files-found: error - retention-days: 1 + retention-days: 3 overwrite: true - name: Write to workflow job summary diff --git a/.github/workflows/_ascend_npu_build_torch_npu.yml b/.github/workflows/_ascend_npu_build_torch_npu.yml index 00b1815..b6ded9c 100644 --- a/.github/workflows/_ascend_npu_build_torch_npu.yml +++ b/.github/workflows/_ascend_npu_build_torch_npu.yml @@ -73,11 +73,7 @@ jobs: - name: Checkout torch_npu uses: actions/checkout@v4 with: - # TODO(shink): Use Ascend/pytorch once this pr merged: - # https://gitee.com/ascend/pytorch/pulls/12854 - # repository: Ascend/pytorch - repository: shink/torchnpu - ref: feat/autoload + repository: Ascend/pytorch submodules: recursive path: torch_npu diff --git a/.github/workflows/_ascend_npu_ut.yml b/.github/workflows/_ascend_npu_ut.yml index f5ccc85..bfb0035 100644 --- a/.github/workflows/_ascend_npu_ut.yml +++ b/.github/workflows/_ascend_npu_ut.yml @@ -75,11 +75,7 @@ jobs: - name: Checkout torch_npu uses: actions/checkout@v4 with: - # TODO(shink): Use Ascend/pytorch once this pr merged: - # https://gitee.com/ascend/pytorch/pulls/12854 - # repository: Ascend/pytorch - repository: shink/torchnpu - ref: feat/autoload + repository: Ascend/pytorch path: torch_npu - name: Install pip dependencies diff --git a/ascend_npu/matadata.yml b/ascend_npu/matadata.yml deleted file mode 100644 index c5e2cca..0000000 --- a/ascend_npu/matadata.yml +++ /dev/null @@ -1,10 +0,0 @@ -device: "npu" -backend_extension: "torch_npu" -link: https://github.com/Ascend/pytorch -torchbenchmark: - test: - - train - - eval - models: - skip: - - llava diff --git a/ascend_npu/torchbenchmark-config.yml b/ascend_npu/torchbenchmark-config.yml new file mode 100644 index 0000000..28aaae6 --- /dev/null +++ b/ascend_npu/torchbenchmark-config.yml @@ -0,0 +1,10 @@ +device: "npu" +backend_extension: "torch_npu" +link: https://github.com/Ascend/pytorch +models: + - model: BERT_pytorch + tests: [ "eval" ] + batch_size: 1 + + - model: yolov3 + skip: true From f864601c63b3a9d94de051b9f1e04b969d7f3206 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Thu, 13 Feb 2025 11:21:36 +0800 Subject: [PATCH 2/9] use benchmark configuration --- ascend_npu/torchbenchmark-config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ascend_npu/torchbenchmark-config.yml b/ascend_npu/torchbenchmark-config.yml index 28aaae6..b51d5b0 100644 --- a/ascend_npu/torchbenchmark-config.yml +++ b/ascend_npu/torchbenchmark-config.yml @@ -3,8 +3,8 @@ backend_extension: "torch_npu" link: https://github.com/Ascend/pytorch models: - model: BERT_pytorch - tests: [ "eval" ] batch_size: 1 - model: yolov3 skip: true +extra_args: "--accuracy" From 88150df0f2235f23d785f046ec1157151044fbc3 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Thu, 13 Feb 2025 11:23:45 +0800 Subject: [PATCH 3/9] test --- .github/workflows/_ascend_npu_benchmark.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/_ascend_npu_benchmark.yml b/.github/workflows/_ascend_npu_benchmark.yml index eef3116..1d0c928 100644 --- a/.github/workflows/_ascend_npu_benchmark.yml +++ b/.github/workflows/_ascend_npu_benchmark.yml @@ -83,7 +83,10 @@ jobs: - name: Checkout benchmark uses: actions/checkout@v4 with: - repository: pytorch/benchmark + # TODO(shink): https://github.com/pytorch/benchmark/pull/2592 + # repository: pytorch/benchmark + repository: shink/benchmark + ref: feat/test_bench/config path: benchmark # TODO @@ -152,10 +155,8 @@ jobs: pip list | grep -E 'torch|numpy' - name: Run benchmarks - working-directory: benchmark run: | - python run_benchmark.py test_bench --accuracy --device npu --test eval \ - --output ascend_npu_benchmark.json + python benchmark/run_benchmark.py test_bench --config ascend_npu/torchbenchmark-config.yml - name: Upload the benchmark report file id: upload-report @@ -163,7 +164,7 @@ jobs: with: name: ascend_npu_benchmark.json path: benchmark/ascend_npu_benchmark.json - if-no-files-found: error + if-no-files-found: warn retention-days: 3 overwrite: true From 7de49293d122865f8942a94120a25d51eb32a9b1 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Thu, 13 Feb 2025 11:28:05 +0800 Subject: [PATCH 4/9] test --- ascend_npu/torchbenchmark-config.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ascend_npu/torchbenchmark-config.yml b/ascend_npu/torchbenchmark-config.yml index b51d5b0..e583c9a 100644 --- a/ascend_npu/torchbenchmark-config.yml +++ b/ascend_npu/torchbenchmark-config.yml @@ -7,4 +7,5 @@ models: - model: yolov3 skip: true -extra_args: "--accuracy" +extra_args: + - "--accuracy" From f9b8454cecfc9f8c4666281ad6af8ed9985501ce Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Fri, 14 Feb 2025 10:23:20 +0800 Subject: [PATCH 5/9] update --- .github/workflows/dispatch-event.yml | 6 +++--- ascend_npu/torchbenchmark-config.yml | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/dispatch-event.yml b/.github/workflows/dispatch-event.yml index 3722746..4ed367f 100644 --- a/.github/workflows/dispatch-event.yml +++ b/.github/workflows/dispatch-event.yml @@ -31,7 +31,7 @@ jobs: - name: Checkout uses: actions/checkout@v4 - # List PRs created in the past 24 hours + # List PRs created in the past 24 hours and labeled 'ciflow/out-of-tree' - name: List PyTorch PRs id: list-pr uses: ./.github/actions/list-pr @@ -39,7 +39,7 @@ jobs: token: ${{ secrets.COSDT_BOT_TOKEN }} owner: pytorch repository: pytorch - labels: ${{ github.event.inputs.labels || '' }} + labels: ${{ github.event.inputs.labels || 'ciflow/out-of-tree' }} hours: ${{ github.event.inputs.hours || '24' }} dispatch-pr: @@ -49,7 +49,7 @@ jobs: - list-pr strategy: fail-fast: false - max-parallel: 1 + max-parallel: 1 # TODO: We now only support running 1 job at the same time on NPU CI runer matrix: data: ${{ fromJSON(needs.list-pr.outputs.prs) }} steps: diff --git a/ascend_npu/torchbenchmark-config.yml b/ascend_npu/torchbenchmark-config.yml index e583c9a..0aec87b 100644 --- a/ascend_npu/torchbenchmark-config.yml +++ b/ascend_npu/torchbenchmark-config.yml @@ -1,11 +1,13 @@ -device: "npu" +devices: + - "npu" backend_extension: "torch_npu" link: https://github.com/Ascend/pytorch models: - - model: BERT_pytorch - batch_size: 1 + - model: yolov3 + skip: true - model: yolov3 skip: true +batch_size: 1 extra_args: - "--accuracy" From 3694a27001dc49e8b02adff7fe8158c425c42522 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Fri, 14 Feb 2025 11:15:18 +0800 Subject: [PATCH 6/9] proxy --- .github/workflows/_ascend_npu_benchmark.yml | 3 +++ .github/workflows/_ascend_npu_build_torch.yml | 1 + .github/workflows/_ascend_npu_build_torch_npu.yml | 3 +++ .github/workflows/_ascend_npu_ut.yml | 3 +++ 4 files changed, 10 insertions(+) diff --git a/.github/workflows/_ascend_npu_benchmark.yml b/.github/workflows/_ascend_npu_benchmark.yml index 1d0c928..ff69d87 100644 --- a/.github/workflows/_ascend_npu_benchmark.yml +++ b/.github/workflows/_ascend_npu_benchmark.yml @@ -79,12 +79,15 @@ jobs: - name: Checkout uses: actions/checkout@v4 + with: + github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' - name: Checkout benchmark uses: actions/checkout@v4 with: # TODO(shink): https://github.com/pytorch/benchmark/pull/2592 # repository: pytorch/benchmark + github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' repository: shink/benchmark ref: feat/test_bench/config path: benchmark diff --git a/.github/workflows/_ascend_npu_build_torch.yml b/.github/workflows/_ascend_npu_build_torch.yml index bd87f94..7de37c0 100644 --- a/.github/workflows/_ascend_npu_build_torch.yml +++ b/.github/workflows/_ascend_npu_build_torch.yml @@ -62,6 +62,7 @@ jobs: - name: Checkout PyTorch uses: actions/checkout@v4 with: + github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' repository: pytorch/pytorch ref: ${{ inputs.ref }} submodules: recursive diff --git a/.github/workflows/_ascend_npu_build_torch_npu.yml b/.github/workflows/_ascend_npu_build_torch_npu.yml index b6ded9c..a847758 100644 --- a/.github/workflows/_ascend_npu_build_torch_npu.yml +++ b/.github/workflows/_ascend_npu_build_torch_npu.yml @@ -69,10 +69,13 @@ jobs: - name: Checkout uses: actions/checkout@v4 + with: + github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' - name: Checkout torch_npu uses: actions/checkout@v4 with: + github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' repository: Ascend/pytorch submodules: recursive path: torch_npu diff --git a/.github/workflows/_ascend_npu_ut.yml b/.github/workflows/_ascend_npu_ut.yml index bfb0035..ac09927 100644 --- a/.github/workflows/_ascend_npu_ut.yml +++ b/.github/workflows/_ascend_npu_ut.yml @@ -71,10 +71,13 @@ jobs: - name: Checkout uses: actions/checkout@v4 + with: + github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' - name: Checkout torch_npu uses: actions/checkout@v4 with: + github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' repository: Ascend/pytorch path: torch_npu From 138b58b556afab7982e40f7a0f99f3ef6be28845 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Fri, 14 Feb 2025 11:23:27 +0800 Subject: [PATCH 7/9] revert --- .github/workflows/_ascend_npu_benchmark.yml | 3 --- .github/workflows/_ascend_npu_build_torch.yml | 1 - .github/workflows/_ascend_npu_build_torch_npu.yml | 3 --- .github/workflows/_ascend_npu_ut.yml | 3 --- 4 files changed, 10 deletions(-) diff --git a/.github/workflows/_ascend_npu_benchmark.yml b/.github/workflows/_ascend_npu_benchmark.yml index ff69d87..1d0c928 100644 --- a/.github/workflows/_ascend_npu_benchmark.yml +++ b/.github/workflows/_ascend_npu_benchmark.yml @@ -79,15 +79,12 @@ jobs: - name: Checkout uses: actions/checkout@v4 - with: - github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' - name: Checkout benchmark uses: actions/checkout@v4 with: # TODO(shink): https://github.com/pytorch/benchmark/pull/2592 # repository: pytorch/benchmark - github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' repository: shink/benchmark ref: feat/test_bench/config path: benchmark diff --git a/.github/workflows/_ascend_npu_build_torch.yml b/.github/workflows/_ascend_npu_build_torch.yml index 7de37c0..bd87f94 100644 --- a/.github/workflows/_ascend_npu_build_torch.yml +++ b/.github/workflows/_ascend_npu_build_torch.yml @@ -62,7 +62,6 @@ jobs: - name: Checkout PyTorch uses: actions/checkout@v4 with: - github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' repository: pytorch/pytorch ref: ${{ inputs.ref }} submodules: recursive diff --git a/.github/workflows/_ascend_npu_build_torch_npu.yml b/.github/workflows/_ascend_npu_build_torch_npu.yml index a847758..b6ded9c 100644 --- a/.github/workflows/_ascend_npu_build_torch_npu.yml +++ b/.github/workflows/_ascend_npu_build_torch_npu.yml @@ -69,13 +69,10 @@ jobs: - name: Checkout uses: actions/checkout@v4 - with: - github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' - name: Checkout torch_npu uses: actions/checkout@v4 with: - github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' repository: Ascend/pytorch submodules: recursive path: torch_npu diff --git a/.github/workflows/_ascend_npu_ut.yml b/.github/workflows/_ascend_npu_ut.yml index ac09927..bfb0035 100644 --- a/.github/workflows/_ascend_npu_ut.yml +++ b/.github/workflows/_ascend_npu_ut.yml @@ -71,13 +71,10 @@ jobs: - name: Checkout uses: actions/checkout@v4 - with: - github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' - name: Checkout torch_npu uses: actions/checkout@v4 with: - github-server-url: 'https://gh-proxy.test.osinfra.cn/https://github.com' repository: Ascend/pytorch path: torch_npu From 5f0b6ac7f98dc99a5f0a6ac517409200bbcf3443 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Fri, 14 Feb 2025 16:41:07 +0800 Subject: [PATCH 8/9] update --- .ci/benchmark.py | 1 - .github/workflows/_ascend_npu_benchmark.yml | 4 +++- ascend_npu/torchbenchmark-config.yml | 7 ++----- src/benchmark/utils.py | 6 ++++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.ci/benchmark.py b/.ci/benchmark.py index f6c9400..1b26b68 100644 --- a/.ci/benchmark.py +++ b/.ci/benchmark.py @@ -1,6 +1,5 @@ import argparse import os -import sys from src.benchmark.utils import read_metrics, to_markdown_table diff --git a/.github/workflows/_ascend_npu_benchmark.yml b/.github/workflows/_ascend_npu_benchmark.yml index 1d0c928..7857ef0 100644 --- a/.github/workflows/_ascend_npu_benchmark.yml +++ b/.github/workflows/_ascend_npu_benchmark.yml @@ -156,7 +156,9 @@ jobs: - name: Run benchmarks run: | - python benchmark/run_benchmark.py test_bench --config ascend_npu/torchbenchmark-config.yml + python benchmark/run_benchmark.py test_bench \ + --config ascend_npu/torchbenchmark-config.yml \ + --output ascend_npu_benchmark.json - name: Upload the benchmark report file id: upload-report diff --git a/ascend_npu/torchbenchmark-config.yml b/ascend_npu/torchbenchmark-config.yml index 0aec87b..84cba1e 100644 --- a/ascend_npu/torchbenchmark-config.yml +++ b/ascend_npu/torchbenchmark-config.yml @@ -3,11 +3,8 @@ devices: backend_extension: "torch_npu" link: https://github.com/Ascend/pytorch models: - - model: yolov3 - skip: true - - - model: yolov3 - skip: true + - model: llava + skip: true # Out of memory batch_size: 1 extra_args: - "--accuracy" diff --git a/src/benchmark/utils.py b/src/benchmark/utils.py index 983823f..096c70d 100644 --- a/src/benchmark/utils.py +++ b/src/benchmark/utils.py @@ -16,6 +16,7 @@ class TorchBenchModelConfig: extra_args: List[str] extra_env: Optional[Dict[str, str]] = None output_dir: Optional[pathlib.Path] = None + skip: bool = False @dataclasses.dataclass @@ -68,6 +69,7 @@ def read_metrics(path: str, *, metric=None) -> List[TorchBenchModelMetric]: extra_args=key_dict.get("extra_args"), extra_env=key_dict.get("extra_env"), output_dir=key_dict.get("output_dir"), + skip=key_dict.get("skip"), ) model_metric = TorchBenchModelMetric(config, metric_value) metrics.append(model_metric) @@ -79,7 +81,7 @@ def generate_table_rows(metrics: List[TorchBenchModelMetric]): models = list({metric.key.name for metric in metrics}) models = sorted(models, key=lambda x: x.lower()) - def filter_metric(metrics: List[TorchBenchModelMetric], *, model, device): + def _filter_metric(metrics: List[TorchBenchModelMetric], *, model, device): for metric in metrics: if metric.key.name == model and metric.key.device == device: return metric @@ -88,7 +90,7 @@ def filter_metric(metrics: List[TorchBenchModelMetric], *, model, device): for model in models: row = [model] for device in devices: - metric = filter_metric(metrics, model=model, device=device) + metric = _filter_metric(metrics, model=model, device=device) if metric is not None: if metric.value == "pass": cell = "✅" From 89fd90350d94f24df35f0bd1ceb239ba450cbbf8 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Fri, 14 Feb 2025 16:42:40 +0800 Subject: [PATCH 9/9] update --- .github/workflows/_ascend_npu_benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_ascend_npu_benchmark.yml b/.github/workflows/_ascend_npu_benchmark.yml index 7857ef0..2f99106 100644 --- a/.github/workflows/_ascend_npu_benchmark.yml +++ b/.github/workflows/_ascend_npu_benchmark.yml @@ -166,7 +166,7 @@ jobs: with: name: ascend_npu_benchmark.json path: benchmark/ascend_npu_benchmark.json - if-no-files-found: warn + if-no-files-found: error retention-days: 3 overwrite: true