Skip to content

Commit

Permalink
Updated chat bot example to support llama3
Browse files Browse the repository at this point in the history
  • Loading branch information
mreso committed May 3, 2024
1 parent 8dacf69 commit c1db5c8
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 60 deletions.
9 changes: 6 additions & 3 deletions examples/LLM/llama/chat_app/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,23 @@ ARG BASE_IMAGE=pytorch/torchserve:latest-gpu
FROM $BASE_IMAGE as server
ARG BASE_IMAGE
ARG EXAMPLE_DIR
ARG MODEL_NAME
ARG HUGGINGFACE_TOKEN

USER root

ENV MODEL_NAME=$MODEL_NAME

RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
apt-get update && \
apt-get install libopenmpi-dev git -y

COPY $EXAMPLE_DIR/requirements.txt /home/model-server/chat_bot/requirements.txt
RUN pip install -r /home/model-server/chat_bot/requirements.txt && huggingface-cli login --token $HUGGINGFACE_TOKEN

WORKDIR /home/model-server/chat_bot
RUN git clone https://github.com/ggerganov/llama.cpp.git build && \
cd build && \
make && \
python -m pip install -r requirements.txt

COPY $EXAMPLE_DIR /home/model-server/chat_bot
COPY $EXAMPLE_DIR/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
COPY $EXAMPLE_DIR/config.properties /home/model-server/config.properties
Expand Down
52 changes: 52 additions & 0 deletions examples/LLM/llama/chat_app/docker/Download_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import argparse
import os

from huggingface_hub import HfApi, snapshot_download


def dir_path(path_str):
if os.path.isdir(path_str):
return path_str
elif input(f"{path_str} does not exist, create directory? [y/n]").lower() == "y":
os.makedirs(path_str)
return path_str
else:
raise NotADirectoryError(path_str)


class HFModelNotFoundError(Exception):
def __init__(self, model_str):
super().__init__(f"HuggingFace model not found: '{model_str}'")


def hf_model(model_str):
api = HfApi()
models = [m.modelId for m in api.list_models()]
if model_str in models:
return model_str
else:
raise HFModelNotFoundError(model_str)


parser = argparse.ArgumentParser()
parser.add_argument(
"--model_path",
"-o",
type=dir_path,
default="model",
help="Output directory for downloaded model files",
)
parser.add_argument(
"--model_name", "-m", type=hf_model, required=True, help="HuggingFace model name"
)
parser.add_argument("--revision", "-r", type=str, default="main", help="Revision")
args = parser.parse_args()

snapshot_path = snapshot_download(
repo_id=args.model_name,
revision=args.revision,
cache_dir=args.model_path,
use_auth_token=True,
ignore_patterns=["original/*", "pytorch_model*.bin"]
)
print(f"Files for '{args.model_name}' is downloaded to '{snapshot_path}'")
16 changes: 4 additions & 12 deletions examples/LLM/llama/chat_app/docker/build_image.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,8 @@
#!/bin/bash

# Check if there are enough arguments
if [ "$#" -eq 0 ] || [ "$#" -gt 1 ]; then
echo "Usage: $0 <HF Model>"
exit 1
fi

MODEL_NAME=$(echo "$1" | sed 's/\//---/g')
echo "Model: " $MODEL_NAME

BASE_IMAGE="pytorch/torchserve:latest-cpu"

DOCKER_TAG="pytorch/torchserve:${MODEL_NAME}"
DOCKER_TAG="pytorch/torchserve:chat_bot"

# Get relative path of example dir
EXAMPLE_DIR=$(dirname "$(readlink -f "$0")")
Expand All @@ -20,9 +11,10 @@ ROOT_DIR=$(realpath "$ROOT_DIR")
EXAMPLE_DIR=$(echo "$EXAMPLE_DIR" | sed "s|$ROOT_DIR|./|")

# Build docker image for the application
DOCKER_BUILDKIT=1 docker buildx build --platform=linux/amd64 --file ${EXAMPLE_DIR}/Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg EXAMPLE_DIR="${EXAMPLE_DIR}" --build-arg MODEL_NAME="${MODEL_NAME}" --build-arg HUGGINGFACE_TOKEN -t "${DOCKER_TAG}" .
DOCKER_BUILDKIT=1 docker buildx build --platform=linux/amd64 --file ${EXAMPLE_DIR}/Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg EXAMPLE_DIR="${EXAMPLE_DIR}" --build-arg HUGGINGFACE_TOKEN -t "${DOCKER_TAG}" .

echo "Run the following command to start the chat bot"
echo ""
echo docker run --rm -it --platform linux/amd64 -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 -p 127.0.0.1:8082:8082 -p 127.0.0.1:8084:8084 -p 127.0.0.1:8085:8085 -v $(pwd)/model_store_1:/home/model-server/model-store $DOCKER_TAG
echo docker run --rm -it --platform linux/amd64 -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 -p 127.0.0.1:8082:8082 -p 127.0.0.1:8084:8084 -p 127.0.0.1:8085:8085 -v $(pwd)/model_store_1:/home/model-server/model-store -e MODEL_NAME="meta-llama/Llama-2-7b-chat-hf" $DOCKER_TAG
echo ""
echo "Note: You can replace the model identifier as needed"
1 change: 1 addition & 0 deletions examples/LLM/llama/chat_app/docker/client_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import streamlit as st

MODEL_NAME = os.environ["MODEL_NAME"]
MODEL_NAME = MODEL_NAME.replace('/',"---")

# App title
st.set_page_config(page_title="TorchServe Chatbot")
Expand Down
66 changes: 33 additions & 33 deletions examples/LLM/llama/chat_app/docker/dockerd-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,75 +1,75 @@
#!/bin/bash
set -e

export LLAMA2_Q4_MODEL=/home/model-server/model-store/$MODEL_NAME/model/ggml-model-q4_0.gguf
MODEL_DIR=$(echo "$MODEL_NAME" | sed 's/\//---/g')

export LLAMA_Q4_MODEL=/home/model-server/model-store/$MODEL_DIR/model/ggml-model-q4_0.gguf


create_model_cfg_yaml() {
# Define the YAML content with a placeholder for the model name
yaml_content="# TorchServe frontend parameters\nminWorkers: 1\nmaxWorkers: 1\nresponseTimeout: 1200\n#deviceType: \"gpu\"\n#deviceIds: [0,1]\n#torchrun:\n# nproc-per-node: 1\n\nhandler:\n model_name: \"${2}\"\n manual_seed: 40"

# Create the YAML file with the specified model name
echo -e "$yaml_content" > "model-config-${1}.yaml"
# Create the YAML file
echo -e "$yaml_content" > "model-config.yaml"
}

create_model_archive() {
MODEL_NAME=$1
MODEL_CFG=$2
echo "Create model archive for ${MODEL_NAME} if it doesn't already exist"
if [ -d "/home/model-server/model-store/$MODEL_NAME" ]; then
echo "Model archive for $MODEL_NAME exists."
MODEL_DIR=$1
echo "Create model archive for ${MODEL_DIR} if it doesn't already exist"
if [ -d "/home/model-server/model-store/$MODEL_DIR" ]; then
echo "Model archive for $MODEL_DIR exists."
fi
if [ -d "/home/model-server/model-store/$MODEL_NAME/model" ]; then
if [ -d "/home/model-server/model-store/$MODEL_DIR/model" ]; then
echo "Model already download"
mv /home/model-server/model-store/$MODEL_NAME/model /home/model-server/model-store/
mv /home/model-server/model-store/$MODEL_DIR/model /home/model-server/model-store/
else
echo "Model needs to be downloaded"
fi
torch-model-archiver --model-name "$MODEL_NAME" --version 1.0 --handler llama_cpp_handler.py --config-file $MODEL_CFG -r requirements.txt --archive-format no-archive --export-path /home/model-server/model-store -f
torch-model-archiver --model-name "$MODEL_DIR" --version 1.0 --handler llama_cpp_handler.py --config-file "model-config.yaml" -r requirements.txt --archive-format no-archive --export-path /home/model-server/model-store -f
if [ -d "/home/model-server/model-store/model" ]; then
mv /home/model-server/model-store/model /home/model-server/model-store/$MODEL_NAME/
mv /home/model-server/model-store/model /home/model-server/model-store/$MODEL_DIR/
fi
}

download_model() {
MODEL_NAME=$1
HF_MODEL_NAME=$2
if [ -d "/home/model-server/model-store/$MODEL_NAME/model" ]; then
echo "Model $HF_MODEL_NAME already downloaded"
MODEL_DIR=$1
MODEL_NAME=$2
if [ -d "/home/model-server/model-store/$MODEL_DIR/model" ]; then
echo "Model $MODEL_NAME already downloaded"
else
echo "Downloading model $HF_MODEL_NAME"
python Download_model.py --model_path /home/model-server/model-store/$MODEL_NAME/model --model_name $HF_MODEL_NAME
echo "Downloading model $MODEL_NAME"
python Download_model.py --model_path /home/model-server/model-store/$MODEL_DIR/model --model_name $MODEL_NAME
fi
}

quantize_model() {
if [ ! -f "$LLAMA2_Q4_MODEL" ]; then
tmp_model_name=$(echo "$MODEL_NAME" | sed 's/---/--/g')
directory_path=/home/model-server/model-store/$MODEL_NAME/model/models--$tmp_model_name/snapshots/
if [ ! -f "$LLAMA_Q4_MODEL" ]; then
tmp_model_name=$(echo "$MODEL_DIR" | sed 's/---/--/g')
directory_path=/home/model-server/model-store/$MODEL_DIR/model/models--$tmp_model_name/snapshots/
HF_MODEL_SNAPSHOT=$(find $directory_path -type d -mindepth 1)
echo "Cleaning up previous build of llama-cpp"
git clone https://github.com/ggerganov/llama.cpp.git build
cd build
make
python -m pip install -r requirements.txt

echo "Convert the 7B model to ggml FP16 format"
python convert.py $HF_MODEL_SNAPSHOT --outfile ggml-model-f16.gguf
echo "Convert the model to ggml FP16 format"
if [[ $MODEL_NAME == *"Meta-Llama-3"* ]]; then
python convert.py $HF_MODEL_SNAPSHOT --vocab-type bpe,hfft --outfile ggml-model-f16.gguf
else
python convert.py $HF_MODEL_SNAPSHOT --outfile ggml-model-f16.gguf
fi

echo "Quantize the model to 4-bits (using q4_0 method)"
./quantize ggml-model-f16.gguf $LLAMA2_Q4_MODEL q4_0
./quantize ggml-model-f16.gguf $LLAMA_Q4_MODEL q4_0

cd ..
echo "Saved quantized model weights to $LLAMA2_Q4_MODEL"
echo "Saved quantized model weights to $LLAMA_Q4_MODEL"
fi
}

HF_MODEL_NAME=$(echo "$MODEL_NAME" | sed 's/---/\//g')
if [[ "$1" = "serve" ]]; then
shift 1
create_model_cfg_yaml $MODEL_NAME $HF_MODEL_NAME
create_model_archive $MODEL_NAME "model-config-$MODEL_NAME.yaml"
download_model $MODEL_NAME $HF_MODEL_NAME
create_model_cfg_yaml $MODEL_DIR $MODEL_NAME
create_model_archive $MODEL_DIR
download_model $MODEL_DIR $MODEL_NAME
quantize_model
streamlit run torchserve_server_app.py --server.port 8084 &
streamlit run client_app.py --server.port 8085
Expand Down
2 changes: 1 addition & 1 deletion examples/LLM/llama/chat_app/docker/llama_cpp_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def initialize(self, ctx):
ctx (context): It is a JSON Object containing information
pertaining to the model artifacts parameters.
"""
model_path = os.environ["LLAMA2_Q4_MODEL"]
model_path = os.environ["LLAMA_Q4_MODEL"]
model_name = ctx.model_yaml_config["handler"]["model_name"]
seed = int(ctx.model_yaml_config["handler"]["manual_seed"])
torch.manual_seed(seed)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import streamlit as st

MODEL_NAME = os.environ["MODEL_NAME"]
MODEL_NAME = MODEL_NAME.replace('/',"---")
MODEL = MODEL_NAME.split("---")[1]

# App title
Expand Down
25 changes: 14 additions & 11 deletions examples/LLM/llama/chat_app/package_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
# Check if the argument is empty or unset
if [ -z "$1" ]; then
echo "Missing Mandatory argument: Path to llama weights"
echo "Usage: ./package_llama.sh ./model/models--meta-llama--Llama-2-7b-chat-hf/snapshots/08751db2aca9bf2f7f80d2e516117a53d7450235"
echo "Usage: ./package_llama.sh ./models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/e5e23bbe8e749ef0efcf16cad411a7d23bd23298"
exit 1
fi

MODEL_GENERATION="true"
LLAMA2_WEIGHTS="$1"
LLAMA_WEIGHTS="$1"

if [ -n "$2" ]; then
MODEL_GENERATION="$2"
Expand All @@ -20,18 +20,22 @@ if [ "$MODEL_GENERATION" = "true" ]; then
rm -rf build
git clone https://github.com/ggerganov/llama.cpp.git build
cd build
make
make
python -m pip install -r requirements.txt

echo "Convert the 7B model to ggml FP16 format"
python convert.py $LLAMA2_WEIGHTS --outfile ggml-model-f16.gguf


echo "Convert the model to ggml FP16 format"
if [[ $MODEL_NAME == *"Meta-Llama-3"* ]]; then
python convert.py $HF_MODEL_SNAPSHOT --vocab-type bpe,hfft --outfile ggml-model-f16.gguf
else
python convert.py $HF_MODEL_SNAPSHOT --outfile ggml-model-f16.gguf
fi

echo "Quantize the model to 4-bits (using q4_0 method)"
./quantize ggml-model-f16.gguf ../ggml-model-q4_0.gguf q4_0

cd ..
export LLAMA2_Q4_MODEL=$PWD/ggml-model-q4_0.gguf
echo "Saved quantized model weights to $LLAMA2_Q4_MODEL"
export LLAMA_Q4_MODEL=$PWD/ggml-model-q4_0.gguf
echo "Saved quantized model weights to $LLAMA_Q4_MODEL"
fi

echo "Creating torchserve model archive"
Expand All @@ -43,4 +47,3 @@ if [ "$MODEL_GENERATION" = "true" ]; then
echo "Cleaning up build of llama-cpp"
rm -rf build
fi

0 comments on commit c1db5c8

Please sign in to comment.