neuralmagic · bfineran · Jun 19, 2023 · Jul 12, 2023 · Jul 17, 2023 · Jul 27, 2023
diff --git a/.github/workflows/build-nightly.yml b/.github/workflows/build-nightly.yml
@@ -0,0 +1,22 @@
+name: build-nightly
+run-name: ${{ github.workflow }} is to create nightly wheel file for pypi
+on:
+  push:
+    branches:
+      - 'main'
+  schedule:
+    - cron: '0 0 * * *'
+  workflow_dispatch:
+
+
+jobs:
+
+    BUILD-TRANSFORMERS-NIGHTLY:
+
+      uses: ./.github/workflows/util.yml
+      with:
+        runs_on: ubuntu-22.04
+        run_id: ${{ github.run_id }}
+        build_type: nightly
+        testmo_project_id: 9
+      secrets: inherit
diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
@@ -0,0 +1,19 @@
+name: build-release
+run-name: ${{ github.workflow }} is to create release wheel file for pypi
+on:
+  push:
+    branches:
+      - 'release/[0-9]+.[0-9]+'
+    workflow_dispatch:
+
+jobs:
+
+    BUILD-TRANSFORMERS-RELEASE:
+
+      uses: ./.github/workflows/util.yml
+      with:
+        runs_on: ubuntu-22.04
+        run_id: ${{ github.run_id }}
+        build_type: release
+        testmo_project_id: 9
+      secrets: inherit
diff --git a/.github/workflows/util.yml b/.github/workflows/util.yml
@@ -0,0 +1,129 @@
+name: report-to-testmo
+on:
+  workflow_call:
+    inputs:
+      runs_on:
+        description: "runner label specifying instance running the job"
+        type: string
+        required: true 
+      run_id:
+        description: "run id provided by GHA"
+        required: true
+        type: string
+      build_type:
+        description: "build type: nightly or release"
+        type: string
+        required: true
+      testmo_project_id:
+        description: "testmo project id"
+        type: string
+        required: true
+
+jobs:
+
+  BUILD:
+    runs-on: ${{ inputs.runs_on }}
+    outputs:
+      status: ${{ steps.build.outputs.status }}
+      commitid: ${{ steps.build.outputs.commitid }}
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+
+      - name: repo checkout
+        uses: actions/checkout@v3
+
+      - name: s3
+        uses: aws-actions/configure-aws-credentials@v2
+        with:
+          role-to-assume: ${{ secrets.AWS_WEBIDENTITY_FOR_GITHUB_ACTIONS }}
+          aws-region: us-east-1
+
+      - name: build
+        id: build
+        run: |
+             pwd
+             sudo apt-get -y install python3-pip
+             pip3 --version
+             sudo pip3 install virtualenv
+             virtualenv venv
+             source venv/bin/activate
+             pip install -e .
+             if [[ "${{ inputs.build_type }}" = release ]]; then
+                 sed -i 's/is_release = False/is_release = True/g' src/${{ github.event.repository.name }}/version.py
+             fi
+             status=$(make -B build || echo 'FAILED')
+             deactivate
+             echo "=========== Build log ==========="
+             echo "${status}"
+             echo "commitid=${GITHUB_SHA:0:7}" >> "$GITHUB_OUTPUT"
+             echo "=========== Build status ==========="
+             if [[ "${status}" = "FAILED" ]]; then
+                 echo "${{ github.event.repository.name }} build failed"
+                 echo "status=failed" >> "$GITHUB_OUTPUT"
+                 exit 1
+             else
+                 echo "${{ github.event.repository.name }} build success"
+             fi
+             echo "=========== Generated build ==========="
+             ls dist/
+             echo "=========== Copy build to S3 ==========="
+             aws s3 cp dist/*.whl s3://nm-github-actions/${{ github.event.repository.name }}/
+             if [ $? -eq 0 ]; then
+                 echo "ok: copied to s3://nm-github-actions/${{ github.event.repository.name }}/"
+                 echo "status=success" >> "$GITHUB_OUTPUT"
+             else
+                 echo "failed: copied to s3://nm-github-actions/${{ github.event.repository.name }}/"
+                 echo "status=failed" >> "$GITHUB_OUTPUT"
+                 exit 1
+             fi
+             oldDate=`date --date='-2 month' +%Y%m%d`
+             oldWhl=`(aws s3 ls s3://nm-github-actions/${{ github.event.repository.name }}/ | grep nightly | grep "${oldDate}") || echo "notfound"`
+             if [[ "${oldWhl}" != 'notfound' ]]; then
+                 for oldwhl in $(echo "${oldWhl}" | awk '{print $4}')
+                 do
+                     echo "Remove old build ${oldwhl} in S3"
+                     aws s3 rm s3://nm-github-actions/${{ github.event.repository.name }}/${oldwhl}
+                 done
+             fi
+
+  TESTMO:
+    if: success() || failure()
+    needs: BUILD
+    runs-on: ${{ inputs.runs_on }}
+    steps:
+
+      - id: report
+        run: |
+          echo "node: $(node -v)"
+          echo "npm: $(npm -v)"
+          echo "Installing testmo cli..."
+          sudo npm install -g @testmo/testmo-cli    
+          export TESTMO_TOKEN=${{ secrets.TESTMO_TEST_TOKEN }}
+          TESTMO_URL="https://neuralmagic.testmo.net"
+          todaytime=`date +%Y%m%d`
+          name="${{ github.event.repository.name }} ${{ inputs.build_type }} ${todaytime} ${{ needs.BUILD.outputs.commitid }} RunID:${{ inputs.run_id }}"
+          echo "========== Build info ==========="
+          echo "name: ${name}"
+          echo "build status: ${{ needs.BUILD.outputs.status }}"
+          echo "<status>${{ needs.BUILD.outputs.status }}</status>" > result.xml
+          exit_code=1
+          if [[ "${{ needs.BUILD.outputs.status }}" = "success" ]]; then
+              exit_code=0
+          fi
+          echo "echo \"GHA job ${{ needs.BUILD.outputs.status }}: https://github.com/neuralmagic/${{ github.event.repository.name }}/actions/runs/${{ inputs.run_id }}\"; exit ${exit_code}" > result.sh
+          echo "========== Report to testmo ==========="
+          echo "testmo automation:run:submit \\"
+          echo "  --instance ${TESTMO_URL} \\"
+          echo "  --project-id ${{ inputs.testmo_project_id }} \\"
+          echo "  --name ${name} \\"
+          echo "  --source ${{ github.event.repository.name }} \\"
+          echo "  --results result.xml"
+          testmo automation:run:submit \
+                         --instance "${TESTMO_URL}" \
+                         --project-id ${{ inputs.testmo_project_id }} \
+                         --name "${name}" \
+                         --source "${{ github.event.repository.name }}" \
+                         --results result.xml \
+                         -- bash result.sh
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples
+.PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples build
 
 # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
 export PYTHONPATH = src
@@ -119,3 +119,7 @@ build-release:
 	python setup.py bdist_wheel
 	python setup.py sdist
 	python utils/check_build.py
+
+# neuralmagic: creates wheel file
+build:
+	python3 setup.py sdist bdist_wheel
diff --git a/setup.py b/setup.py
@@ -423,17 +423,22 @@ def run(self):
     deps["tqdm"],  # progress bars in model download and training scripts
 ]
 
+# default variable to be overwritten by the version.py file
+version = "unknown"
+# load and overwrite version and release info from version.py
+exec(open(os.path.join("src", "transformers", "version.py")).read())
+
 setup(
-    name="transformers",
-    version="4.34.1",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
+    name="nm-transformers" if is_release else "nm-transformers-nightly",
+    version=version,  # major.minor.patch to match NM repos, fourth entry is either transformers base version or nightly date
     author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
     author_email="transformers@huggingface.co",
     description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
     long_description=open("README.md", "r", encoding="utf-8").read(),
     long_description_content_type="text/markdown",
     keywords="NLP vision speech deep learning transformer pytorch tensorflow jax BERT GPT-2 Wav2Vec2 ViT",
     license="Apache 2.0 License",
-    url="https://github.com/huggingface/transformers",
+    url="https://github.com/neuralmagic/transformers",
     package_dir={"": "src"},
     packages=find_packages("src"),
     include_package_data=True,

diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -18,7 +18,7 @@
 # to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
 # in the namespace without actually importing anything (and especially none of the backends).
 
-__version__ = "4.34.1"
+from .version import *
 
 from typing import TYPE_CHECKING
 

diff --git a/src/transformers/hf_argparser.py b/src/transformers/hf_argparser.py
@@ -23,8 +23,16 @@
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Literal, NewType, Optional, Tuple, Union, get_type_hints
 
+import os
 import yaml
 
+from sparsezoo import Model
+
+from .utils.logging import get_logger
+
+
+logger = get_logger(__name__)
+
 
 DataClass = NewType("DataClass", Any)
 DataClassType = NewType("DataClassType", Any)
@@ -341,12 +349,17 @@ def parse_args_into_dataclasses(
             # additional namespace.
             outputs.append(namespace)
         if return_remaining_strings:
-            return (*outputs, remaining_args)
+            return tuple(
+                *[_download_dataclass_zoo_stub_files(output) for output in outputs],
+                remaining_args,
+            )
         else:
             if remaining_args:
                 raise ValueError(f"Some specified arguments are not used by the HfArgumentParser: {remaining_args}")
 
-            return (*outputs,)
+            return tuple(
+                [_download_dataclass_zoo_stub_files(output) for output in outputs]
+            )
 
     def parse_dict(self, args: Dict[str, Any], allow_extra_keys: bool = False) -> Tuple[DataClass, ...]:
         """
@@ -374,7 +387,9 @@ def parse_dict(self, args: Dict[str, Any], allow_extra_keys: bool = False) -> Tu
             outputs.append(obj)
         if not allow_extra_keys and unused_keys:
             raise ValueError(f"Some keys are not used by the HfArgumentParser: {sorted(unused_keys)}")
-        return tuple(outputs)
+        return tuple(
+            [_download_dataclass_zoo_stub_files(output) for output in outputs]
+        )
 
     def parse_json_file(self, json_file: str, allow_extra_keys: bool = False) -> Tuple[DataClass, ...]:
         """
@@ -417,3 +432,28 @@ def parse_yaml_file(self, yaml_file: str, allow_extra_keys: bool = False) -> Tup
         """
         outputs = self.parse_dict(yaml.safe_load(Path(yaml_file).read_text()), allow_extra_keys=allow_extra_keys)
         return tuple(outputs)
+
+def _download_dataclass_zoo_stub_files(data_class: DataClass):
+    for name, val in data_class.__dict__.items():
+        if not isinstance(val, str) or "recipe" in name or not val.startswith("zoo:"):
+            continue
+
+        logger.info(f"Downloading framework files for SparseZoo stub: {val}")
+
+        zoo_model = Model(val)
+        framework_file_paths = [file.path for file in zoo_model.training.default.files]
+        assert framework_file_paths, "Unable to download any framework files for SparseZoo stub {val}"
+        framework_file_names = [os.path.basename(path) for path in framework_file_paths]
+        if "pytorch_model.bin" not in framework_file_names or ("config.json" not in framework_file_names):
+            raise RuntimeError(
+                "Unable to find 'pytorch_model.bin' and 'config.json' in framework "
+                f"files downloaded from {val}. Found {framework_file_names}. Check "
+                "if the given stub is for a transformers repo model"
+            )
+        framework_dir_path = Path(framework_file_paths[0]).parent.absolute()
+
+        logger.info(f"Overwriting argument {name} to downloaded {framework_dir_path}")
+
+        data_class.__dict__[name] = str(framework_dir_path)
+
+    return data_class
diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py
@@ -241,6 +241,22 @@ def forward(
         return embeddings
 
 
+class QATMatMul(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+        # behaves like normal torch.matmul unless a SparseML QuantizationModifier
+        # is initialized
+        self.wrap_qat = True
+        self.qat_wrapper_kwargs = {
+            "num_inputs": 2,
+            "input_qconfigs": ["asymmetric", "symmetric"],
+        }
+
+    def forward(self, a: torch.Tensor, b: torch.Tensor):
+        return torch.matmul(a, b)
+
+
 class BertSelfAttention(nn.Module):
     def __init__(self, config, position_embedding_type=None):
         super().__init__()
@@ -258,6 +274,11 @@ def __init__(self, config, position_embedding_type=None):
         self.key = nn.Linear(config.hidden_size, self.all_head_size)
         self.value = nn.Linear(config.hidden_size, self.all_head_size)
 
+        # non-parameterized matmuls will behave as normal torch.matmul ops unless
+        # Quantization-Aware-Training is invoked
+        self.attention_scores_matmul = QATMatMul()
+        self.context_layer_matmul = QATMatMul()
+
         self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
         self.position_embedding_type = position_embedding_type or getattr(
             config, "position_embedding_type", "absolute"
@@ -322,7 +343,7 @@ def forward(
             past_key_value = (key_layer, value_layer)
 
         # Take the dot product between "query" and "key" to get the raw attention scores.
-        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = self.attention_scores_matmul(query_layer, key_layer.transpose(-1, -2))
 
         if self.position_embedding_type == "relative_key" or self.position_embedding_type == "relative_key_query":
             query_length, key_length = query_layer.shape[2], key_layer.shape[2]
@@ -362,7 +383,7 @@ def forward(
         if head_mask is not None:
             attention_probs = attention_probs * head_mask
 
-        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = self.context_layer_matmul(attention_probs, value_layer)
 
         context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
         new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)