From 53a8d960264ba200425f14147ce75e73ef84a526 Mon Sep 17 00:00:00 2001
From: xuedinge233 <damow890@gmail.com>
Date: Fri, 30 May 2025 02:52:19 +0000
Subject: [PATCH 1/6] Add workflow for torchtune

---
 .github/workflows/_ascend_npu_torchtune.yml | 128 ++++++++++++++++++++
 .github/workflows/ascend_npu_test.yml       |  53 ++++++++
 2 files changed, 181 insertions(+)
 create mode 100644 .github/workflows/_ascend_npu_torchtune.yml

diff --git a/.github/workflows/_ascend_npu_torchtune.yml b/.github/workflows/_ascend_npu_torchtune.yml
new file mode 100644
index 0000000..9562cb7
--- /dev/null
+++ b/.github/workflows/_ascend_npu_torchtune.yml
@@ -0,0 +1,128 @@
+name: "_ascend_npu_torchtune"
+
+on:
+  workflow_call:
+    inputs:
+      runner:
+        required: true
+        type: string
+        description: "The runner selected to run on"
+      image:
+        required: true
+        type: string
+        description: "The docker image which will be loaded"
+      device:
+        required: true
+        type: string
+        description: "The device selected to run on"
+      torch-artifact:
+        required: false
+        type: string
+        description: "The distribution artifact name of torch"
+      torch-npu-artifact:
+        required: true
+        type: string
+        description: "The distribution artifact name of torch_npu"
+
+defaults:
+  run:
+    shell: bash -el {0}
+
+jobs:
+  torchtune:
+    name: run torchtune for torch_npu
+    runs-on: ${{ inputs.runner }}
+    container:
+      image: ${{ inputs.image }}
+      env:
+        HF_ENDPOINT: https://hf-mirror.com
+
+    steps:
+      - name: Show NPU info
+        run: |
+          npu-smi info
+
+      - name: Config mirrors
+        run: |
+          sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+      - name: Install system dependencies
+        run: |
+          apt-get update
+          apt-get install -y \
+              git gcc g++ make cmake ninja-build curl \
+              libgl1 libglib2.0-0 libsndfile1
+
+      # See: https://github.com/actions/checkout/issues/363#issuecomment-1915075699
+      # See: https://github.com/hunshcn/gh-proxy/issues/28#issuecomment-773769630
+      - name: Config git
+        run: |
+          git config --global --add safe.directory "$GITHUB_WORKSPACE"
+          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
+
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Checkout torchtune
+        uses: actions/checkout@v4
+        with:
+          repository: pytorch/torchtune
+          path: torchtune
+     
+      - name: Install torchtune
+        working-directory: torchtune
+        run: |
+          pip install -e .
+
+      - name: Download torch artifact
+        if: ${{ inputs.torch-artifact }}
+        uses: actions/download-artifact@v4
+        with:
+          name: ${{ inputs.torch-artifact }}
+
+      - name: Install torch
+        if: ${{ inputs.torch-artifact }}
+        run: |
+          pip install ${{ inputs.torch-artifact }}
+
+      - name: Install torch_npu dependencies
+        if: ${{ !inputs.torch-artifact }}
+        run: |
+          pip install -r https://raw.githubusercontent.com/Ascend/pytorch/refs/heads/master/requirements.txt
+
+      - name: List torch version
+        id: list-torch-version
+        shell: bash
+        run: |
+          torch_version=$(python -c "import torch; print(torch.__version__)")
+          echo "torch-version=${torch_version}" >> $GITHUB_OUTPUT
+
+      - name: Download torch_npu artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: ${{ inputs.torch-npu-artifact }}
+          path: ascend_npu
+
+      - name: Install torch_npu
+        working-directory: ascend_npu
+        run: |
+          pip install ${{ inputs.torch-npu-artifact }}
+          
+      - name: Show environment info
+        run: |
+          pip list
+          
+      - name: Download Qwen2.5 model
+        run: |
+          export HF_ENDPOINT=https://hf-mirror.com
+          huggingface-cli download --resume-download Qwen/Qwen2.5-0.5B-Instruct \
+            --local-dir /tmp/Qwen2.5-0.5B-Instruct \
+
+      - name: Run torchtune with lora finetune
+        run: |
+          tune run lora_finetune_single_device --config qwen2_5/0.5B_lora_single_device
+      
+      - name: Run torchtune with full finetune
+        run: |
+          tune run full_finetune_single_device --config qwen2_5/0.5B_full_single_device
diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml
index e6eda7c..d03f115 100644
--- a/.github/workflows/ascend_npu_test.yml
+++ b/.github/workflows/ascend_npu_test.yml
@@ -11,6 +11,7 @@ on:
       - ".github/workflows/_ascend_npu_ut.yml"
       - ".github/workflows/_ascend_npu_benchmark.yml"
       - ".github/workflows/_ascend_npu_torchtitan.yml"
+      - ".github/workflows/_ascend_npu_torchtune.yml"
       - ".ci/**"
       - "ascend_npu/**"
       - "src/**"
@@ -25,6 +26,7 @@ on:
       - ".github/workflows/_ascend_npu_ut.yml"
       - ".github/workflows/_ascend_npu_benchmark.yml"
       - ".github/workflows/_ascend_npu_torchtitan.yml"
+      - ".github/workflows/_ascend_npu_torchtune.yml"
       - ".ci/**"
       - "ascend_npu/**"
       - "src/**"
@@ -120,6 +122,41 @@ jobs:
       image: ${{ needs.prepare.outputs.image }}
       torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
 
+  test:
+    name: Test torch_npu
+    needs:
+      - prepare
+      - build-torch
+      - build
+    if: |
+      !cancelled() && github.event_name != 'repository_dispatch' &&
+      (success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
+    uses: ./.github/workflows/_ascend_npu_ut.yml
+    with:
+      runner: ${{ needs.prepare.outputs.runner }}
+      image: ${{ needs.prepare.outputs.image }}
+      device: ${{ needs.prepare.outputs.device }}
+      torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
+      torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
+
+  benchmark:
+    name: Run benchmarks
+    needs:
+      - prepare
+      - build-torch
+      - build
+    if: |
+      !cancelled() && github.event_name != 'repository_dispatch' &&
+      (success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
+    uses: ./.github/workflows/_ascend_npu_benchmark.yml
+    with:
+      runner: ${{ needs.prepare.outputs.runner }}
+      image: ${{ needs.prepare.outputs.image }}
+      torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
+      torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
+    secrets:
+      pr-token: ${{ secrets.COSDT_BOT_TOKEN }}
+      
   torchtitan:
     name: Run torchtitan
     needs:
@@ -136,3 +173,19 @@ jobs:
       torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
       torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
 
+  torchtune:
+    name: Run torchtune for torch_npu
+    needs:
+      - prepare
+      - build-torch
+      - build
+    if: |
+      !cancelled() && github.event_name != 'repository_dispatch' &&
+      (success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
+    uses: ./.github/workflows/_ascend_npu_torchtune.yml
+    with:
+      runner: ${{ needs.prepare.outputs.runner }}
+      image: ${{ needs.prepare.outputs.image }}
+      torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
+      torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
+

From 054edb3e71577bdc0a63ed34a04f11df1d70864e Mon Sep 17 00:00:00 2001
From: Jiahao Su <damow890@gmail.com>
Date: Mon, 30 Jun 2025 16:59:04 +0800
Subject: [PATCH 2/6] update

---
 .github/workflows/ascend_npu_test.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml
index d03f115..a0b66be 100644
--- a/.github/workflows/ascend_npu_test.yml
+++ b/.github/workflows/ascend_npu_test.yml
@@ -135,7 +135,6 @@ jobs:
     with:
       runner: ${{ needs.prepare.outputs.runner }}
       image: ${{ needs.prepare.outputs.image }}
-      device: ${{ needs.prepare.outputs.device }}
       torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
       torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
 

From b412499ce232de91c6f7df7142115a8aef96244f Mon Sep 17 00:00:00 2001
From: Jiahao Su <damow890@gmail.com>
Date: Thu, 3 Jul 2025 16:14:27 +0800
Subject: [PATCH 3/6] remove device

---
 .github/workflows/_ascend_npu_torchtitan.yml | 4 ----
 .github/workflows/_ascend_npu_torchtune.yml  | 8 --------
 2 files changed, 12 deletions(-)

diff --git a/.github/workflows/_ascend_npu_torchtitan.yml b/.github/workflows/_ascend_npu_torchtitan.yml
index 07f554a..95d743e 100644
--- a/.github/workflows/_ascend_npu_torchtitan.yml
+++ b/.github/workflows/_ascend_npu_torchtitan.yml
@@ -11,10 +11,6 @@ on:
         required: true
         type: string
         description: "The docker image which will be loaded"
-      device:
-        required: true
-        type: string
-        description: "The device selected to run on"
       torch-artifact:
         required: false
         type: string
diff --git a/.github/workflows/_ascend_npu_torchtune.yml b/.github/workflows/_ascend_npu_torchtune.yml
index 9562cb7..825c7bd 100644
--- a/.github/workflows/_ascend_npu_torchtune.yml
+++ b/.github/workflows/_ascend_npu_torchtune.yml
@@ -11,10 +11,6 @@ on:
         required: true
         type: string
         description: "The docker image which will be loaded"
-      device:
-        required: true
-        type: string
-        description: "The device selected to run on"
       torch-artifact:
         required: false
         type: string
@@ -24,10 +20,6 @@ on:
         type: string
         description: "The distribution artifact name of torch_npu"
 
-defaults:
-  run:
-    shell: bash -el {0}
-
 jobs:
   torchtune:
     name: run torchtune for torch_npu

From c3d7970b6e0e9c7cbfeab6236a9ef1c467654cc8 Mon Sep 17 00:00:00 2001
From: Jiahao Su <damow890@gmail.com>
Date: Thu, 3 Jul 2025 16:24:12 +0800
Subject: [PATCH 4/6] update

---
 .github/workflows/_ascend_npu_torchtitan.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/_ascend_npu_torchtitan.yml b/.github/workflows/_ascend_npu_torchtitan.yml
index 95d743e..fe27ed9 100644
--- a/.github/workflows/_ascend_npu_torchtitan.yml
+++ b/.github/workflows/_ascend_npu_torchtitan.yml
@@ -19,10 +19,6 @@ on:
         required: true
         type: string
         description: "The distribution artifact name of torch_npu"
-    secrets:
-      pr-token:
-        description: "A token used to create a pull request"
-        required: true
 
 # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
 # declared as "shell: bash -el {0}" on steps that need to be properly activated.

From 723c8e3fdd676c274df20ceed31c8c8019a9a034 Mon Sep 17 00:00:00 2001
From: Jiahao Su <damow890@gmail.com>
Date: Mon, 7 Jul 2025 10:38:30 +0800
Subject: [PATCH 5/6] update

---
 .github/workflows/_ascend_npu_torchtune.yml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/_ascend_npu_torchtune.yml b/.github/workflows/_ascend_npu_torchtune.yml
index 825c7bd..7929e00 100644
--- a/.github/workflows/_ascend_npu_torchtune.yml
+++ b/.github/workflows/_ascend_npu_torchtune.yml
@@ -67,6 +67,12 @@ jobs:
         run: |
           pip install -e .
 
+      - name: Download Qwen2.5 model
+        run: |
+          export HF_ENDPOINT=https://hf-mirror.com
+          tune download Qwen/Qwen2.5-0.5B-Instruct \
+            --output-dir /tmp/Qwen2.5-0.5B-Instruct 
+
       - name: Download torch artifact
         if: ${{ inputs.torch-artifact }}
         uses: actions/download-artifact@v4
@@ -103,13 +109,7 @@ jobs:
           
       - name: Show environment info
         run: |
-          pip list
-          
-      - name: Download Qwen2.5 model
-        run: |
-          export HF_ENDPOINT=https://hf-mirror.com
-          huggingface-cli download --resume-download Qwen/Qwen2.5-0.5B-Instruct \
-            --local-dir /tmp/Qwen2.5-0.5B-Instruct \
+          pip list          
 
       - name: Run torchtune with lora finetune
         run: |

From 1fbdc82f1e38b62cff1f2dbc527d49958a3eb26a Mon Sep 17 00:00:00 2001
From: Jiahao Su <damow890@gmail.com>
Date: Mon, 7 Jul 2025 10:45:47 +0800
Subject: [PATCH 6/6] update

---
 .github/workflows/_ascend_npu_torchtune.yml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/_ascend_npu_torchtune.yml b/.github/workflows/_ascend_npu_torchtune.yml
index 7929e00..2d78d46 100644
--- a/.github/workflows/_ascend_npu_torchtune.yml
+++ b/.github/workflows/_ascend_npu_torchtune.yml
@@ -67,12 +67,6 @@ jobs:
         run: |
           pip install -e .
 
-      - name: Download Qwen2.5 model
-        run: |
-          export HF_ENDPOINT=https://hf-mirror.com
-          tune download Qwen/Qwen2.5-0.5B-Instruct \
-            --output-dir /tmp/Qwen2.5-0.5B-Instruct 
-
       - name: Download torch artifact
         if: ${{ inputs.torch-artifact }}
         uses: actions/download-artifact@v4
@@ -109,7 +103,13 @@ jobs:
           
       - name: Show environment info
         run: |
-          pip list          
+          pip list
+          
+      - name: Download Qwen2.5 model
+        run: |
+          export HF_ENDPOINT=https://hf-mirror.com
+          tune download Qwen/Qwen2.5-0.5B-Instruct \
+            --output-dir /tmp/Qwen2.5-0.5B-Instruct 
 
       - name: Run torchtune with lora finetune
         run: |