Skip to content

Add workflow for torchtune #62

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .github/workflows/_ascend_npu_torchtitan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ on:
required: true
type: string
description: "The docker image which will be loaded"
device:
required: true
type: string
description: "The device selected to run on"
torch-artifact:
required: false
type: string
Expand All @@ -23,10 +19,6 @@ on:
required: true
type: string
description: "The distribution artifact name of torch_npu"
secrets:
pr-token:
description: "A token used to create a pull request"
required: true

# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
Expand Down
120 changes: 120 additions & 0 deletions .github/workflows/_ascend_npu_torchtune.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
name: "_ascend_npu_torchtune"

on:
workflow_call:
inputs:
runner:
required: true
type: string
description: "The runner selected to run on"
image:
required: true
type: string
description: "The docker image which will be loaded"
torch-artifact:
required: false
type: string
description: "The distribution artifact name of torch"
torch-npu-artifact:
required: true
type: string
description: "The distribution artifact name of torch_npu"

jobs:
torchtune:
name: run torchtune for torch_npu
runs-on: ${{ inputs.runner }}
container:
image: ${{ inputs.image }}
env:
HF_ENDPOINT: https://hf-mirror.com

steps:
- name: Show NPU info
run: |
npu-smi info

- name: Config mirrors
run: |
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

- name: Install system dependencies
run: |
apt-get update
apt-get install -y \
git gcc g++ make cmake ninja-build curl \
libgl1 libglib2.0-0 libsndfile1

# See: https://github.com/actions/checkout/issues/363#issuecomment-1915075699
# See: https://github.com/hunshcn/gh-proxy/issues/28#issuecomment-773769630
- name: Config git
run: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/

- name: Checkout
uses: actions/checkout@v4

- name: Checkout torchtune
uses: actions/checkout@v4
with:
repository: pytorch/torchtune
path: torchtune

- name: Install torchtune
working-directory: torchtune
run: |
pip install -e .

- name: Download torch artifact
if: ${{ inputs.torch-artifact }}
uses: actions/download-artifact@v4
with:
name: ${{ inputs.torch-artifact }}

- name: Install torch
if: ${{ inputs.torch-artifact }}
run: |
pip install ${{ inputs.torch-artifact }}

- name: Install torch_npu dependencies
if: ${{ !inputs.torch-artifact }}
run: |
pip install -r https://github.com/Ascend/pytorch/refs/heads/master/requirements.txt

- name: List torch version
id: list-torch-version
shell: bash
run: |
torch_version=$(python -c "import torch; print(torch.__version__)")
echo "torch-version=${torch_version}" >> $GITHUB_OUTPUT

- name: Download torch_npu artifact
uses: actions/download-artifact@v4
with:
name: ${{ inputs.torch-npu-artifact }}
path: ascend_npu

- name: Install torch_npu
working-directory: ascend_npu
run: |
pip install ${{ inputs.torch-npu-artifact }}

- name: Show environment info
run: |
pip list

- name: Download Qwen2.5 model
run: |
export HF_ENDPOINT=https://hf-mirror.com
tune download Qwen/Qwen2.5-0.5B-Instruct \
--output-dir /tmp/Qwen2.5-0.5B-Instruct

- name: Run torchtune with lora finetune
run: |
tune run lora_finetune_single_device --config qwen2_5/0.5B_lora_single_device

- name: Run torchtune with full finetune
run: |
tune run full_finetune_single_device --config qwen2_5/0.5B_full_single_device
52 changes: 52 additions & 0 deletions .github/workflows/ascend_npu_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ on:
- ".github/workflows/_ascend_npu_ut.yml"
- ".github/workflows/_ascend_npu_benchmark.yml"
- ".github/workflows/_ascend_npu_torchtitan.yml"
- ".github/workflows/_ascend_npu_torchtune.yml"
- ".ci/**"
- "ascend_npu/**"
- "src/**"
Expand All @@ -25,6 +26,7 @@ on:
- ".github/workflows/_ascend_npu_ut.yml"
- ".github/workflows/_ascend_npu_benchmark.yml"
- ".github/workflows/_ascend_npu_torchtitan.yml"
- ".github/workflows/_ascend_npu_torchtune.yml"
- ".ci/**"
- "ascend_npu/**"
- "src/**"
Expand Down Expand Up @@ -120,6 +122,40 @@ jobs:
image: ${{ needs.prepare.outputs.image }}
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}

test:
name: Test torch_npu
needs:
- prepare
- build-torch
- build
if: |
!cancelled() && github.event_name != 'repository_dispatch' &&
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
uses: ./.github/workflows/_ascend_npu_ut.yml
with:
runner: ${{ needs.prepare.outputs.runner }}
image: ${{ needs.prepare.outputs.image }}
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}

benchmark:
name: Run benchmarks
needs:
- prepare
- build-torch
- build
if: |
!cancelled() && github.event_name != 'repository_dispatch' &&
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
uses: ./.github/workflows/_ascend_npu_benchmark.yml
with:
runner: ${{ needs.prepare.outputs.runner }}
image: ${{ needs.prepare.outputs.image }}
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}
secrets:
pr-token: ${{ secrets.COSDT_BOT_TOKEN }}

torchtitan:
name: Run torchtitan
needs:
Expand All @@ -136,3 +172,19 @@ jobs:
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}

torchtune:
name: Run torchtune for torch_npu
needs:
- prepare
- build-torch
- build
if: |
!cancelled() && github.event_name != 'repository_dispatch' &&
(success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success'))
uses: ./.github/workflows/_ascend_npu_torchtune.yml
with:
runner: ${{ needs.prepare.outputs.runner }}
image: ${{ needs.prepare.outputs.image }}
torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }}
torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }}

Loading