diff --git a/.github/workflows/_ascend_npu_torchtitan.yml b/.github/workflows/_ascend_npu_torchtitan.yml index 07f554a..914d7ca 100644 --- a/.github/workflows/_ascend_npu_torchtitan.yml +++ b/.github/workflows/_ascend_npu_torchtitan.yml @@ -11,10 +11,6 @@ on: required: true type: string description: "The docker image which will be loaded" - device: - required: true - type: string - description: "The device selected to run on" torch-artifact: required: false type: string @@ -28,14 +24,6 @@ on: description: "A token used to create a pull request" required: true -# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly -# declared as "shell: bash -el {0}" on steps that need to be properly activated. -# It's used to activate ascend-toolkit environment variables. - -defaults: - run: - shell: bash -el {0} - jobs: setup_environment: name: run torchtitan tests @@ -116,6 +104,10 @@ jobs: run: | pip install -r requirements.txt pip install pytest pytest-cov tyro + pip install torchdata>=0.8.0 + pip install datasets>=3.6.0 + pip install tomli>=1.1.0 + pip install tensorboard tiktoken blobfile tabulate wandb fsspec tyro - name: Show environment info run: | @@ -127,7 +119,7 @@ jobs: - name: Run torchtitan integration_test working-directory: torchtitan - run: | + run: | npu_count=$(python -c "import torch; print(torch.npu.device_count())") python ./tests/integration_tests.py artifacts-to-be-uploaded --ngpu ${npu_count} diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index e6eda7c..d7b1c9a 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -120,6 +120,40 @@ jobs: image: ${{ needs.prepare.outputs.image }} torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }} + test: + name: Test torch_npu + needs: + - prepare + - build-torch + - build + if: | + !cancelled() && github.event_name != 'repository_dispatch' && + (success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success')) + uses: ./.github/workflows/_ascend_npu_ut.yml + with: + runner: ${{ needs.prepare.outputs.runner }} + image: ${{ needs.prepare.outputs.image }} + torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }} + torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }} + + benchmark: + name: Run benchmarks + needs: + - prepare + - build-torch + - build + if: | + !cancelled() && github.event_name != 'repository_dispatch' && + (success() || (needs.build-torch.result == 'skipped' && needs.build.result == 'success')) + uses: ./.github/workflows/_ascend_npu_benchmark.yml + with: + runner: ${{ needs.prepare.outputs.runner }} + image: ${{ needs.prepare.outputs.image }} + torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }} + torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }} + secrets: + pr-token: ${{ secrets.COSDT_BOT_TOKEN }} + torchtitan: name: Run torchtitan needs: @@ -135,4 +169,5 @@ jobs: image: ${{ needs.prepare.outputs.image }} torch-artifact: ${{ needs.build-torch.outputs.torch-artifact }} torch-npu-artifact: ${{ needs.build.outputs.torch-npu-artifact }} - + secrets: + pr-token: ${{ secrets.COSDT_BOT_TOKEN }}