diff --git a/data/fake_meeting.txt b/data/fake_meeting.txt new file mode 100644 index 00000000..c1a6463e --- /dev/null +++ b/data/fake_meeting.txt @@ -0,0 +1,29 @@ +[The scene is set in a luxurious conference room with the three executives seated around a large oak table. The room is well-lit and the atmosphere is professional and cordial.] +Executive 1: "Good morning, everyone. Thank you for joining me today to discuss our exciting new AI business venture." +Executive 2: "Of course, John. I'm thrilled to be here. This is a game-changer for our college and I can't wait to see it come to fruition." +Executive 3: "Indeed. As you know, AI is becoming increasingly important in various industries, and we believe that our venture will provide significant benefits to both our students and the business world as a whole." +Executive 1: "That's right. Our AI platform will offer personalized learning experiences for our students, tailored to their individual needs and goals. And for the business world, it will provide cutting-edge insights and predictions based on vast amounts of data, giving them a competitive edge in today's fast-paced marketplace." +Executive 2: "I see. So how do you plan to monetize this platform?" +Executive 3: "That's a great question. We plan to offer subscription-based services to businesses, as well as generate revenue through partnerships and collaborations with industry leaders. Additionally, we will also explore opportunities for licensing our AI technology to other organizations." +Executive 1: "Excellent. And what about security and privacy concerns? How do you plan to address those?" +Executive 2: "Absolutely. We understand the importance of protecting sensitive data, and we will implement robust security measures to ensure that our platform is secure and compliant with all relevant regulations." +Executive 3: "Yes, and we will also have strict data privacy policies in place to safeguard the personal information of our students and clients. Transparency and trust are key components of any successful AI venture, and we take those seriously." +Executive 1: "I couldn't agree more. Now that we have a solid plan in place, let's start making some noise about this exciting new venture. I think it has the potential to revolutionize the way we approach education and business." +[The three executives nod in agreement and begin brainstorming strategies for promoting their AI platform.] +Executive 1: "Absolutely. Now that we have a solid plan in place, let's start making some noise about this exciting new venture. I think it has the potential to revolutionize the way we approach education and business." +Executive 2: "Agreed. We should start by reaching out to industry leaders and thought leaders in the field of AI and education. They will be key in helping us spread the word and build momentum for our platform." +Executive 3: "Excellent idea. And we should also consider partnering with some of the leading AI research institutions and universities. They will be able to provide valuable insights and expertise that will help us refine and improve our platform." +Executive 1: "That's a great point. Partnerships are key in any successful venture, and we want to make sure that we're working with the best of the best in this field." +Executive 2: "Definitely. And once we have a solid proof of concept, we can start reaching out to potential clients and showcasing the value of our platform. I think we'll find a lot of interest from companies looking for innovative ways to improve their operations and stay ahead of the competition." +Executive 3: "I agree. And as we continue to develop and refine our platform, we can also start exploring new markets and applications for AI in education. There are so many possibilities here, and I'm excited to see where this journey takes us." +Certainly! Here is a continuation of the dialogue: +Executive 1: "Absolutely. Now that we have a solid plan in place, let's start making some noise about this exciting new venture. I think it has the potential to revolutionize the way we approach education and business." +Executive 2: "Agreed. We should start by reaching out to industry leaders and thought leaders in the field of AI and education. They will be key in helping us spread the word and build momentum for our platform." +Executive 3: "Excellent idea. And we should also consider partnering with some of the leading AI research institutions and universities. They will be able to provide valuable insights and expertise that will help us refine and improve our platform." +Executive 1: "That's a great point. Partnerships are key in any successful venture, and we want to make sure that we're working with the best of the best in this field." +Executive 2: "Definitely. And once we have a solid proof of concept, we can start reaching out to potential clients and showcasing the value of our platform. I think we'll find a lot of interest from companies looking for innovative ways to improve their operations and stay ahead of the competition." +Executive 3: "I agree. And as we continue to develop and refine our platform, we can also start exploring new markets and applications for AI in education. There are so many possibilities here, and I'm excited to see where this journey takes us." +Executive 1: "Absolutely. And speaking of markets, let's not forget about the potential for international expansion. We could be looking at a global market opportunity here, and we don't want to miss out on that." +Executive 2: "Agreed. We should definitely consider how we can tailor our platform to meet the unique needs of different cultures and regions around the world." +Executive 3: "Excellent point. And as we continue to grow and expand, we'll need to make sure that we have the right infrastructure in place to support our global ambitions." +[The three executives nod in agreement and begin brainstorming strategies for promoting their AI platform on a global scale.] \ No newline at end of file diff --git a/models/README.md b/models/README.md new file mode 100644 index 00000000..828c24c7 --- /dev/null +++ b/models/README.md @@ -0,0 +1 @@ +# Directory to store model files \ No newline at end of file diff --git a/src/chat.py b/src/chat.py index 171248c9..63fd29e8 100644 --- a/src/chat.py +++ b/src/chat.py @@ -17,14 +17,20 @@ def __init__(self) -> None: n_batch=Chat.n_ctx, f16_kv=True, stream=True,) - + + def reset_system_prompt(self, prompt=None): if not prompt: - self.chat_history = [] + self.chat_history[0] = {"role":"system", "content":""} else: - self.chat_history = [{"role":"system", - "content": prompt}] - print(self.chat_history) + self.chat_history[0] = {"role":"system", + "content": prompt} + print(self.chat_history[0]) + + + def clear_history(self): + self.chat_history = [self.chat_history[0]] + def count_tokens(self, messages): num_extra_tokens = len(self.chat_history) * 6 # accounts for tokens outside of "content" @@ -60,3 +66,33 @@ def ask(self, prompt, history): reply += token["content"] yield reply self.chat_history.append({"role":"assistant","content":reply}) + + +def chunk_tokens(llm, prompt, chunk_size): + tokens = tokenize(llm, prompt) + num_tokens = count_tokens(llm, prompt) + chunks = [] + for i in range((num_tokens//chunk_size)+1): + chunk = str(llm.detokenize(tokens[:chunk_size]),"utf-8") + chunks.append(chunk) + tokens = tokens[chunk_size:] + return chunks + +def tokenize(llama, prompt): + return llama.tokenize(bytes(prompt, "utf-8")) + +def count_tokens(llama,prompt): + return len(tokenize(llama,prompt)) + 5 + +def clip_history(llama, prompt, history, n_ctx, max_tokens): + prompt_len = count_tokens(llama, prompt) + history_len = sum([count_tokens(llama, x["content"]) for x in history]) + input_len = prompt_len + history_len + print(input_len) + while input_len >= n_ctx-max_tokens: + print("Clipping") + history.pop(1) + history_len = sum([count_tokens(llama, x["content"]) for x in history]) + input_len = history_len + prompt_len + print(input_len) + return history diff --git a/summarizer/README.md b/summarizer/README.md new file mode 100644 index 00000000..41ca1208 --- /dev/null +++ b/summarizer/README.md @@ -0,0 +1,78 @@ +# Text Summarizer Application + +This model service is intended be be used for text summarization tasks. This service can ingest an arbitrarily long text input. If the input length is less than the models maximum context window it will summarize the input directly. If the input is longer than the maximum context window, the input will be divided into appropriately sized chunks. Each chunk will be summarized and a final "summary of summaries" will be the services final output. + +To use this model service, please follow the steps below: + +* [Download Model](#download-models) +* [Build Image](#build-the-image) +* [Run Image](#run-the-image) +* [Interact with Service](#interact-with-the-app) +### Download model(s) + +This example assumes that the developer already has a copy of the model that they would like to use downloaded onto their host machine and located in the `/models` directory of this repo. + +The two models that we have tested and recommend for this example are Llama2 and Mistral. Please download any of the GGUF variants you'd like to use. + +* Llama2 - https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/tree/main +* Mistral - https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/tree/main + +_For a full list of supported model variants, please see the "Supported models" section of the [llama.cpp repository](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description)._ + +```bash +cd models + +wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf +``` + +### Build the image + +To build the image we will use a `build.sh` script that will simply copy the desired model and shared code into the build directory temporarily. This prevents any large unused model files in the repo from being loaded into the podman environment during build which can cause a significant slowdown. + +```bash +cd summarizer/model_services/builds + +sh build.sh llama-2-7b-chat.Q5_K_S.gguf arm summarizer +``` +The user should provide the model name, the architecture and image name they want to use for the build. + +### Run the image +Once the model service image is built, it can be run with the following: + +```bash +podman run -it -p 7860:7860 summarizer +``` +### Interact with the app + +Now the service can be used with the python code below. + +```python +from gradio_client import Client +client = Client("http://0.0.0.0:7860") +result = client.predict(""" +It's Hackathon day. +All the developers are excited to work on interesting problems. +There are six teams total, but only one can take home the grand prize. +The first team to solve Artificial General Intelligence wins!""", +api_name="/chat") +print(result) +``` + +```bash + Sure, here is a summary of the input in bullet points: +• Hackathon day +• Developers excited to work on interesting problems +• Six teams participating +• Grand prize for the first team to solve Artificial General Intelligence +• Excitement and competition among the teams +``` + +You can also use the `summarize.py` script under `/ai_applications` to run the summary application against a local file. If the `--file` argument is left blank, it will run against the demo file `data/fake_meeting.text` + +```bash +cd summarizer/ai_applications + +python summarize --file +``` + + diff --git a/summarizer/ai_applications/summarize.py b/summarizer/ai_applications/summarize.py new file mode 100644 index 00000000..91fb394d --- /dev/null +++ b/summarizer/ai_applications/summarize.py @@ -0,0 +1,17 @@ +import argparse +from gradio_client import Client +import time + +parser = argparse.ArgumentParser() +parser.add_argument("-f", "--file", default="data/fake_meeting.txt") +parser.add_argument("-m", "--model_endpoint",default="http://0.0.0.0:7860/") +args = parser.parse_args() + + +client = Client(args.model_endpoint) +with open(args.file) as f: + prompt = f.read() +start = time.time() +result = client.predict(prompt, api_name="/chat") +print(result) +print(time.time() - start) diff --git a/summarizer/model_services/builds/arm/Containerfile b/summarizer/model_services/builds/arm/Containerfile new file mode 100644 index 00000000..c7301852 --- /dev/null +++ b/summarizer/model_services/builds/arm/Containerfile @@ -0,0 +1,11 @@ +FROM registry.access.redhat.com/ubi9/python-39:1-158 +WORKDIR /locallm +COPY builds/requirements.txt /locallm/requirements.txt +RUN pip install --upgrade pip +RUN pip install --no-cache-dir --upgrade -r /locallm/requirements.txt +ARG MODEL_FILE=llama-2-7b-chat.Q5_K_S.gguf +ENV MODEL_FILE=${MODEL_FILE} +COPY builds/${MODEL_FILE} /locallm/ +COPY builds/src/ /locallm +COPY summary_service.py /locallm/ +ENTRYPOINT [ "python", "summary_service.py" ] diff --git a/summarizer/model_services/builds/build.sh b/summarizer/model_services/builds/build.sh new file mode 100644 index 00000000..1afe8c63 --- /dev/null +++ b/summarizer/model_services/builds/build.sh @@ -0,0 +1,12 @@ +#!/usr/bin/ bash + +MODEL=${1:-llama-2-7b-chat.Q5_K_S.gguf} +ARCH=${2:-arm} +IMAGE_NAME=${3:-summarizer} + +echo "building with $MODEL on $ARCH" +cp ../../../models/$MODEL . +cp -r ../../../src . +podman build -t $IMAGE_NAME .. -f $ARCH/Containerfile +rm $MODEL +rm -rf src diff --git a/summarizer/model_services/builds/requirements.txt b/summarizer/model_services/builds/requirements.txt new file mode 100644 index 00000000..4c1269c4 --- /dev/null +++ b/summarizer/model_services/builds/requirements.txt @@ -0,0 +1,2 @@ +llama-cpp-python +gradio \ No newline at end of file diff --git a/summarizer/model_services/summary_service.py b/summarizer/model_services/summary_service.py new file mode 100644 index 00000000..c18aa9cf --- /dev/null +++ b/summarizer/model_services/summary_service.py @@ -0,0 +1,49 @@ +import sys +sys.path.append("src") +import gradio as gr +from llama_cpp import Llama +from chat import clip_history, chunk_tokens + +llm = Llama("llama-2-7b-chat.Q5_K_S.gguf", + n_gpu_layers=-1, + n_ctx=4096, + max_tokens=512, + f16_kv = True, + stream=False) + +system_prompt = [ + {"role": "system", "content": """You are a summarizing agent. + You only respond in bullet points. + Your only job is to summarize your inputs and provide the most concise possible output. + Do not add any information that does not come directly from the user prompt. + Limit your response to a maximum of 5 bullet points. + It's fine to have less than 5 bullet points"""}, + ] + +def summary(prompt, history): + global llm + global system_prompt + chunk_size = 4096 + prompt_chunks = chunk_tokens(llm,prompt,chunk_size-512) + partial_summaries = [] + print(f"processing {len(prompt_chunks)} chunks") + for i,chunk in enumerate(prompt_chunks): + print(f"{i+1}/{len(prompt_chunks)}") + prompt = {"role":"user", "content": chunk} + system_prompt.append(prompt) + chat_response = llm.create_chat_completion(system_prompt) + partial_summary = chat_response["choices"][0]["message"]["content"] + partial_summaries.append(partial_summary) + system_prompt = [system_prompt[0]] + if len(prompt_chunks) == 1: + return partial_summaries[0] + prompt = {"role":"user","content":" ".join(partial_summaries)} + system_prompt.append(prompt) + chat_response = llm.create_chat_completion(system_prompt) + return chat_response["choices"][0]["message"]["content"] + + +if __name__=="__main__": + + demo = gr.ChatInterface(summary) + demo.launch(server_name="0.0.0.0")