Skip to content

Commit

Permalink
server: bench: create a script to wrap all operations
Browse files Browse the repository at this point in the history
  • Loading branch information
phymbert committed Mar 25, 2024
1 parent 4c5d96d commit b3f0178
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 16 deletions.
13 changes: 7 additions & 6 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,13 @@ jobs:
id: server_bench
run: |
set -eux
cd examples/server/bench
source venv/bin/activate
BENCH_K6_BIN_PATH=./k6 python bench.py \
--runner-label $RUNNER_LABEL \
--name ${{ github.job }} \
--branch ${{ github.ref_name }} \
--commit ${{ github.sha }} \
--scenario script.js \
--duration 1m \
--hf-repo ggml-org/models \
Expand All @@ -122,8 +124,7 @@ jobs:
- name: Comment PR
uses: mshick/add-pr-comment@v2
if: ${{ github.event.pull_request }}
with:
message: |
**Hello** ${{ env.HTTP_REQ_DURATION_AVG }}
🌏
!
message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
message: ${{ env.BENCH_PR_COMMENT }}
47 changes: 37 additions & 10 deletions examples/server/bench/bench.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import base64
import json
import os
import re
Expand All @@ -18,7 +19,10 @@

def main(args_in: list[str] | None = None) -> None:
parser = argparse.ArgumentParser(description="Start server benchmark scenario")
parser.add_argument("--name", type=str, help="Bench name", required=True)
parser.add_argument("--runner-label", type=str, help="Runner label", required=True)
parser.add_argument("--branch", type=str, help="Branch name", default="detached")
parser.add_argument("--commit", type=str, help="Commit name", default="dirty")
parser.add_argument("--host", type=str, help="Server listen host", default="0.0.0.0")
parser.add_argument("--port", type=int, help="Server listen host", default="8080")
parser.add_argument("--model-path-prefix", type=str, help="Prefix where to store the model files", default="models")
Expand Down Expand Up @@ -92,10 +96,11 @@ def main(args_in: list[str] | None = None) -> None:

# Prometheus
end_time = time.time()
image_data = []
pr_comment = f"tk/s={round(token_seconds, 2)}"
if is_server_listening("0.0.0.0", 9090):
metrics = ['requests_processing', 'requests_deferred',
'kv_cache_usage_ratio', 'prompt_tokens_seconds',
'predicted_tokens_seconds']
metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds',
'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred']

for metric in metrics:
resp = requests.get(f"http://localhost:9090/api/v1/query_range",
Expand All @@ -113,14 +118,15 @@ def main(args_in: list[str] | None = None) -> None:
plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
plt.yticks(fontsize=12, alpha=.7)

plt.title(f"llamacpp:{metric} on {args.runner_label}\n"
f"duration={args.duration} tk/s={round(token_seconds, 2)}\n"
f"hf-repo={args.hf_repo} hf-file={args.hf_file}\n"
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
f" pp={args.max_prompt_tokens} pp+tg={args.max_tokens}",
plt.title(f"{args.name} on {args.runner_label}\n"
f"duration={args.duration} {round(token_seconds, 2)}tk/s\n"
f"branch={args.branch} commit={args.commit}",
fontsize=14, wrap=True)
plt.grid(axis='both', alpha=.3)
plt.ylabel(f"llamacpp:{metric}", fontsize=14)
plt.xlabel(f"hf-repo={args.hf_repo} hf-file={args.hf_file}\n"
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
f" pp={args.max_prompt_tokens} pp+tg={args.max_tokens}", fontsize=14, wrap=True)
plt.gcf().autofmt_xdate()

# Remove borders
Expand All @@ -132,6 +138,28 @@ def main(args_in: list[str] | None = None) -> None:
# Save the plot as a PNG image
plt.savefig(f'{metric}.png')
plt.close()
with open(f'{metric}.png', "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()
image_data.append(f"data:image/png;base64,{encoded_string}")
pr_comment = f"""
llama.cpp server benchmark results for {args.name} on {args.runner_label}: {round(token_seconds, 2)}tk/s
<p align="center">
<img src="{image_data[0]}" alt="prompt_tokens_seconds" />
<img src="{image_data[1]}" alt="predicted_tokens_seconds"/>
</p>
<details>
<summary>Details</summary>
<p align="center">
<img src="{image_data[2]}" alt="kv_cache_usage_ratio" />
<img src="{image_data[3]}" alt="requests_processing"/>
<img src="{image_data[4]}" alt="requests_deferred"/>
</p>
</detail>
"""

with open("results.github.env", 'a') as github_env:
pr_comment = pr_comment.replace('\n', '<br/>')
github_env.write(f"BENCH_PR_COMMENT={pr_comment}")


def start_benchmark(args):
Expand All @@ -149,8 +177,7 @@ def start_benchmark(args):
args = f"SERVER_BENCH_N_PROMPTS={args.n_prompts} SERVER_BENCH_MAX_PROMPT_TOKENS={args.max_prompt_tokens} SERVER_BENCH_MAX_CONTEXT={args.max_tokens} "
args = args + ' '.join([str(arg) for arg in [k6_path, *k6_args]])
print(f"bench: starting k6 with: {args}")
k6_completed = subprocess.run(args,
shell=True, stdout=sys.stdout, stderr=sys.stderr)
k6_completed = subprocess.run(args, shell=True, stdout=sys.stdout, stderr=sys.stderr)
if k6_completed.returncode != 0:
raise Exception("bench: unable to run k6")

Expand Down

0 comments on commit b3f0178

Please sign in to comment.