From 246912c61e947bb48b2a449357c739bbf7d3dfef Mon Sep 17 00:00:00 2001 From: greg pereira Date: Wed, 5 Jun 2024 07:55:39 -0700 Subject: [PATCH] fix passing of chat_format to llamacpp ms Signed-off-by: greg pereira --- .github/workflows/model_servers.yaml | 4 ++-- model_servers/common/Makefile.common | 13 +++++++++++-- model_servers/llamacpp_python/Makefile | 2 +- model_servers/llamacpp_python/base/Containerfile | 1 + model_servers/llamacpp_python/src/run.sh | 9 ++++++++- model_servers/llamacpp_python/tooling_options.ipynb | 2 +- 6 files changed, 24 insertions(+), 7 deletions(-) diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml index c9d6b58c..9178a45a 100644 --- a/.github/workflows/model_servers.yaml +++ b/.github/workflows/model_servers.yaml @@ -5,13 +5,13 @@ on: branches: - main paths: - - ./model_servers/** + - 'model_servers/**' - .github/workflows/model_servers.yaml push: branches: - main paths: - - ./model_servers/ + - 'model_servers/**' - .github/workflows/model_servers.yaml workflow_dispatch: diff --git a/model_servers/common/Makefile.common b/model_servers/common/Makefile.common index b0fff263..b363690d 100644 --- a/model_servers/common/Makefile.common +++ b/model_servers/common/Makefile.common @@ -1,6 +1,7 @@ REGISTRY ?= quay.io REGISTRY_ORG ?= ai-lab COMPONENT ?= model_servers +CHAT_FORMAT ?= BIND_MOUNT_OPTIONS := ro OS := $(shell uname -s) @@ -10,7 +11,7 @@ endif .PHONY: build build: - podman build --squash-all --build-arg PORT=$(PORT) -t $(IMAGE) . -f base/Containerfile + podman build --squash-all $(CHAT_FORMAT:%=--build-arg CHAT_FORMAT=%) --build-arg PORT=$(PORT) -t $(IMAGE) . -f base/Containerfile .PHONY: install install: @@ -35,7 +36,15 @@ clean: .PHONY: run run: cd ../../models && \ - podman run -it -d -p $(PORT):$(PORT) -v ./$(MODEL_NAME):$(MODELS_PATH)/$(MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/$(MODEL_NAME) -e HOST=0.0.0.0 -e PORT=$(PORT) $(IMAGE) + podman run -it \ + -d \ + -p $(PORT):$(PORT) \ + -v ./$(MODEL_NAME):$(MODELS_PATH)/$(MODEL_NAME):$(BIND_MOUNT_OPTIONS) \ + -e MODEL_PATH=$(MODELS_PATH)/$(MODEL_NAME) \ + -e HOST=0.0.0.0 \ + -e PORT=$(PORT) \ + $(CHAT_FORMAT:%=-e CHAT_FORMAT=${CHAT_FORMAT}) \ + $(IMAGE) .PHONY: podman-clean podman-clean: diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile index f2f24268..b232f332 100644 --- a/model_servers/llamacpp_python/Makefile +++ b/model_servers/llamacpp_python/Makefile @@ -1,6 +1,6 @@ APP := llamacpp_python PORT ?= 8001 -CHAT_FORMAT ?= openchat +CHAT_FORMAT ?= llama-2 include ../common/Makefile.common diff --git a/model_servers/llamacpp_python/base/Containerfile b/model_servers/llamacpp_python/base/Containerfile index e1a1516a..2b58c966 100644 --- a/model_servers/llamacpp_python/base/Containerfile +++ b/model_servers/llamacpp_python/base/Containerfile @@ -1,4 +1,5 @@ FROM registry.access.redhat.com/ubi9/python-311:1-62.1716478620 +ENV CHAT_FORMAT=llama-2 WORKDIR /locallm COPY src . USER root diff --git a/model_servers/llamacpp_python/src/run.sh b/model_servers/llamacpp_python/src/run.sh index c6f26875..fa38d447 100644 --- a/model_servers/llamacpp_python/src/run.sh +++ b/model_servers/llamacpp_python/src/run.sh @@ -5,7 +5,14 @@ if [ ${CONFIG_PATH} ] || [[ ${MODEL_PATH} && ${CONFIG_PATH} ]]; then fi if [ ${MODEL_PATH} ]; then - python -m llama_cpp.server --model ${MODEL_PATH} --host ${HOST:=0.0.0.0} --port ${PORT:=8001} --n_gpu_layers ${GPU_LAYERS:=0} --clip_model_path ${CLIP_MODEL_PATH:=None} --chat_format ${MODEL_CHAT_FORMAT:="llama-2"} --interrupt_requests ${INTERRUPT_REQUESTS:=False} + python -m llama_cpp.server \ + --model ${MODEL_PATH} \ + --host ${HOST:=0.0.0.0} \ + --port ${PORT:=8001} \ + --n_gpu_layers ${GPU_LAYERS:=0} \ + --clip_model_path ${CLIP_MODEL_PATH:=None} \ + --chat_format ${CHAT_FORMAT:="llama-2"} \ + --interrupt_requests ${INTERRUPT_REQUESTS:=False} exit 0 fi diff --git a/model_servers/llamacpp_python/tooling_options.ipynb b/model_servers/llamacpp_python/tooling_options.ipynb index 82a7dc38..41141902 100644 --- a/model_servers/llamacpp_python/tooling_options.ipynb +++ b/model_servers/llamacpp_python/tooling_options.ipynb @@ -203,7 +203,7 @@ "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", "\n", "\n", - "llm = OpenAI(temperature=0.9,model_name=\"llama2\", base_url=\"http://localhost:8000/v1\", \n", + "llm = OpenAI(temperature=0.9,model_name=\"instructlab/granite-7b-lab\", base_url=\"http://localhost:8000/v1\", \n", " openai_api_key=\"sk-no-key-required\", streaming=True,\n", " callbacks=[StreamingStdOutCallbackHandler()])\n", "text = \"What would be a good company name for a company that makes colorful socks?\"\n",