WIP server: bench: init #81

Workflow file for this run

	# Benchmark
	name: Benchmark

	on:
	workflow_dispatch:
	inputs:
	gpu-series:
	description: 'Azure GPU series to run with'
	required: true
	type: choice
	options:
	- Standard_NC4as_T4_v3
	- Standard_NC64as_T4_v3
	- Standard_NC24ads_A100_v4
	- Standard_NC48ads_A100_v4
	- Standard_ND96asr_A100_v4
	- Standard_NC40ads_H100_v5
	- Standard_NC80adis_H100_v5
	push:
	branches:
	- master
	paths: ['.github/workflows/bench.yml', '/CMakeLists.txt', '/Makefile', '*/.h', '*/.hpp', '*/.c', '*/.cpp', '*/.cu', '*/.swift', '*/.m', 'examples/server/bench/*.']
	pull_request:
	types: [opened, synchronize, reopened]
	paths: ['.github/workflows/bench.yml', '/CMakeLists.txt', '/Makefile', '*/.h', '*/.hpp', '*/.c', '*/.cpp', '*/.cu', '*/.swift', '*/.m', 'examples/server/bench/*.']
	schedule:
	- cron: '04 2 * * *'

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true


	jobs:
	bench-server-baseline:
	runs-on: Standard_NC4as_T4_v3
	env:
	RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
	#if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' \|\| github.event.schedule \|\| github.event.pull_request \|\| github.event.push.ref == 'refs/heads/master' }}
	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v3
	with:
	fetch-depth: 0

	- name: Install python env
	id: pipenv
	run: \|
	cd examples/server/bench
	python3 -m venv venv
	source venv/bin/activate
	pip install -r requirements.txt

	- name: Prometheus
	id: install_prometheus
	run: \|
	wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
	tar xzf prometheus*.tar.gz --strip-components=1
	./prometheus --config.file=examples/server/bench/prometheus.yml &
	while ! nc -z localhost 9090; do
	sleep 0.1
	done

	- name: Install k6
	id: k6_installation
	run: \|
	cd examples/server/bench
	wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz
	tar xzf k6*.tar.gz --strip-components=1

	- name: Build
	id: cmake_build
	run: \|
	set -eux
	mkdir build
	cd build
	cmake .. \
	-DLLAMA_NATIVE=OFF \
	-DLLAMA_BUILD_SERVER=ON \
	-DLLAMA_CURL=ON \
	-DLLAMA_CUBLAS=ON \
	-DCUDAToolkit_ROOT=/usr/local/cuda \
	-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
	-DCMAKE_CUDA_ARCHITECTURES=75 \
	-DLLAMA_FATAL_WARNINGS=OFF \
	-DLLAMA_ALL_WARNINGS=OFF \
	-DCMAKE_BUILD_TYPE=Release;
	cmake --build . --config Release -j $(nproc) --target server

	- name: Download the dataset
	id: download_dataset
	run: \|
	cd examples/server/bench
	wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json

	- name: Server bench
	id: server_bench
	run: \|
	set -eux

	cd examples/server/bench
	source venv/bin/activate
	BENCH_K6_BIN_PATH=./k6 python bench.py \
	--runner-label ${{ env.RUNNER_LABEL }} \
	--name ${{ github.job }} \
	--branch ${{ github.ref_name }} \
	--commit ${{ github.sha }} \
	--scenario script.js \
	--duration 30s \
	--hf-repo ggml-org/models \
	--hf-file phi-2/ggml-model-q4_0.gguf \
	--model-path-prefix /models \
	--parallel 8 \
	-ngl 33 \
	--batch-size 2048 \
	--ubatch-size 256 \
	--ctx-size 16384 \
	--n-prompts 1000 \
	--max-prompt-tokens 1024 \
	--max-tokens 2048

	cat results.github.env >> $GITHUB_ENV

	# - name: Comment PR
	# uses: mshick/add-pr-comment@v2
	# id: comment_pr
	# if: ${{ github.event.pull_request != '' }}
	# with:
	# message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
	# message: \|
	# $BENCH_PR_COMMENT

	- name: Commit status
	uses: Sibz/github-status-action@v1
	with:
	authToken: ${{secrets.GITHUB_TOKEN}}
	context: ${{ github.job }}
	description: \|
	${{ env.BENCH_RESULTS }}
	state: 'success'

	- name: Upload results
	if: ${{ github.event.pull_request }}
	uses: edunad/actions-image@v2.0.0
	with:
	path: '*.png'
	title: \|
	llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
	annotationLevel: 'success'

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

WIP server: bench: init #81

Workflow file

WIP server: bench: init #81

Jobs

Run details

Workflow file for this run