server: bench: create a script to wrap all operations

phymbert · Mar 25, 2024 · ea0581b · ea0581b
1 parent cbe0cd2
commit ea0581b
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 26 deletions.
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -36,7 +36,7 @@ jobs:
     runs-on: Standard_NC4as_T4_v3
     env:
       RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
-    if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.event.push.ref == 'refs/heads/master' }}
+    if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request != '' || github.event.push.ref == 'refs/heads/master' }}
     steps:
       - name: Clone
         id: checkout
@@ -122,14 +122,21 @@ jobs:
           
            cat results.github.env >> $GITHUB_ENV
 
-      - name: Comment PR
-        uses: mshick/add-pr-comment@v2
-        id: comment_pr
-        if: ${{ github.event.pull_request }}
+#      - name: Comment PR
+#        uses: mshick/add-pr-comment@v2
+#        id: comment_pr
+#        if: ${{ github.event.pull_request != '' }}
+#        with:
+#          message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
+#          message: |
+#            $BENCH_PR_COMMENT
+      - name: Upload results
+        uses: edunad/actions-image@v2.0.0
         with:
-          message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
-          message: |
-            $BENCH_PR_COMMENT
+          path: '*.png'
+          title: |
+            llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
+          annotationLevel: 'success'
 
       - name: Commit status
         uses: Sibz/github-status-action@v1

diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py
@@ -69,7 +69,7 @@ def main(args_in: list[str] | None = None) -> None:
                 for metric_name in data['metrics']:
                     for metric_metric in data['metrics'][metric_name]:
                         github_env.write(
-                            f"{escape_metric_name(metric_name)}_{escape_metric_name(metric_metric)}={data['metrics'][metric_name][metric_metric]}\n")
+                            f"{escape_metric_name(metric_name)}_{escape_metric_name(metric_metric)}={round(data['metrics'][metric_name][metric_metric], 2)}\n")
                 token_seconds = data['metrics']['llamacpp_tokens_second']['avg']
                 bench_results_str = json.dumps(data)
 
@@ -143,25 +143,23 @@ def main(args_in: list[str] | None = None) -> None:
                 with open(f'{metric}.png', "rb") as image_file:
                     encoded_string = base64.b64encode(image_file.read()).decode()
                     image_data.append(f"data:image/png;base64,{encoded_string}")
-        pr_comment = f"""
-        llama.cpp server benchmark results for {args.name} on {args.runner_label}: {round(token_seconds, 2)}tk/s
-        <p align="center">
-            <img src="{image_data[0]}" alt="prompt_tokens_seconds" />
-            <img src="{image_data[1]}" alt="predicted_tokens_seconds"/>
-        </p>
-        <details>
-            <summary>Details</summary>
-            <p align="center">
-                <img src="{image_data[2]}" alt="kv_cache_usage_ratio" />
-                <img src="{image_data[3]}" alt="requests_processing"/>
-                <img src="{image_data[4]}" alt="requests_deferred"/>
-            </p>
-        </detail>
-        """
+        # pr_comment = f"""
+        # llama.cpp server benchmark results for {args.name} on {args.runner_label}: {round(token_seconds, 2)}tk/s
+        # <p align="center">
+        #     <img src="{image_data[0]}" alt="prompt_tokens_seconds" />
+        #     <img src="{image_data[1]}" alt="predicted_tokens_seconds"/>
+        # </p>
+        # <details>
+        #     <summary>Details</summary>
+        #     <p align="center">
+        #         <img src="{image_data[2]}" alt="kv_cache_usage_ratio" />
+        #         <img src="{image_data[3]}" alt="requests_processing"/>
+        #         <img src="{image_data[4]}" alt="requests_deferred"/>
+        #     </p>
+        # </detail>
+        # """
 
     with open("results.github.env", 'a') as github_env:
-        pr_comment = pr_comment.replace('\n', '<br/>')
-        github_env.write(f"BENCH_PR_COMMENT='{pr_comment}'")
         github_env.write(f"BENCH_RESULTS='{bench_results_str}'")