From ad0243d1e8261dac176736f4c4bb1f1c68edcf6c Mon Sep 17 00:00:00 2001 From: Michael Clifford Date: Mon, 29 Jan 2024 19:52:52 -0500 Subject: [PATCH] WIP rag + langchain example --- rag-langchain/README.md | 35 +++++++++++ rag-langchain/ai-studio.yaml | 22 +++++++ rag-langchain/builds/Containerfile | 20 ++++++ rag-langchain/builds/requirements.txt | 5 ++ rag-langchain/rag_app.py | 89 +++++++++++++++++++++++++++ 5 files changed, 171 insertions(+) create mode 100644 rag-langchain/README.md create mode 100644 rag-langchain/ai-studio.yaml create mode 100644 rag-langchain/builds/Containerfile create mode 100644 rag-langchain/builds/requirements.txt create mode 100644 rag-langchain/rag_app.py diff --git a/rag-langchain/README.md b/rag-langchain/README.md new file mode 100644 index 000000000..96b5d5288 --- /dev/null +++ b/rag-langchain/README.md @@ -0,0 +1,35 @@ +# RAG + Langchain + +This example will deploy a local RAG application using a chromadb server, a llama.cpp model server and a python app built with langchain. + +# + +### Deploy ChromaDB Vector Database +Use the existing ChromaDB image to deploy a vector store service. + +* `podman pull chromadb/chroma` +* `podman run -it -p 8000:8000 chroma` + +### Deploy Model Service + +Deploy the LLM server and volume mount the model of choice. +* `podman run -it -p 8001:8001 -v Local/path/to/locallm/models:/locallm/models:Z -e MODEL_PATH=models/llama-2-7b-chat.Q5_K_S.gguf playground` + +### Build and Deploy RAG app +Deploy a small application that can populate the data base from the vectorDB and generate a response with the LLM. + +We will want to have an embedding model that we can volume mount into our running application container. You can use the code snippet below to pull a copy of the `BAAI/bge-base-en-v1.5` embedding model. + + +```python +from huggingface_hub import snapshot_download +snapshot_download(repo_id="BAAI/bge-base-en-v1.5", + cache_dir="../models/", + local_files_only=False) +``` + +Follow the instructions below to build you container image and run it locally. + +* `podman build -t ragapp rag-langchain -f rag-langchain/builds/Containerfile` +* `podman run -it -v Local/path/to/locallm/models/:/rag/models:Z -v Local/path/to/locallm/data:/rag/data:Z ragapp -H 10.88.0.1 -m http://10.88.0.1:8001/v1` + diff --git a/rag-langchain/ai-studio.yaml b/rag-langchain/ai-studio.yaml new file mode 100644 index 000000000..c18afb393 --- /dev/null +++ b/rag-langchain/ai-studio.yaml @@ -0,0 +1,22 @@ +application: + type: language + name: rag-demo + description: This is a RAG demo application. + containers: + - name: llamacpp-server + contextdir: playground + containerfile: Containerfile + model-service: true + backend: + - llama + arch: + - arm64 + - amd64 + - name: chromadb-server + image: docker.io/chromadb/chroma + - name: rag-inference-app + contextdir: rag-langchain + containerfile: builds/Containerfile + arch: + - arm64 + - amd64 \ No newline at end of file diff --git a/rag-langchain/builds/Containerfile b/rag-langchain/builds/Containerfile new file mode 100644 index 000000000..4730226bf --- /dev/null +++ b/rag-langchain/builds/Containerfile @@ -0,0 +1,20 @@ +FROM registry.access.redhat.com/ubi9/python-39:latest +### Update sqlite for chroma +USER root +RUN dnf remove sqlite3 -y +RUN wget https://www.sqlite.org/2023/sqlite-autoconf-3410200.tar.gz +RUN tar -xvzf sqlite-autoconf-3410200.tar.gz +WORKDIR sqlite-autoconf-3410200 +RUN ./configure +RUN make +RUN make install +RUN mv /usr/local/bin/sqlite3 /usr/bin/sqlite3 +ENV LD_LIBRARY_PATH="/usr/local/lib" +#### +WORKDIR /rag +COPY builds/requirements.txt . +RUN pip install --upgrade pip +RUN pip install --no-cache-dir --upgrade -r /rag/requirements.txt +COPY rag_app.py . +ENV HF_HUB_CACHE=/rag/models/ +ENTRYPOINT [ "python", "rag_app.py" ] \ No newline at end of file diff --git a/rag-langchain/builds/requirements.txt b/rag-langchain/builds/requirements.txt new file mode 100644 index 000000000..5b107e144 --- /dev/null +++ b/rag-langchain/builds/requirements.txt @@ -0,0 +1,5 @@ +langchain_openai +langchain +chromadb +sentence-transformers + diff --git a/rag-langchain/rag_app.py b/rag-langchain/rag_app.py new file mode 100644 index 000000000..38b708236 --- /dev/null +++ b/rag-langchain/rag_app.py @@ -0,0 +1,89 @@ +from langchain_openai import ChatOpenAI +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.runnables import RunnablePassthrough +from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings +from langchain.callbacks import StreamingStdOutCallbackHandler +from langchain.text_splitter import CharacterTextSplitter +from langchain_community.document_loaders import TextLoader +from langchain_community.vectorstores import Chroma + +from chromadb import HttpClient +from chromadb.config import Settings +import chromadb.utils.embedding_functions as embedding_functions + + +import uuid +import os +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("-d", "--docs", default="data/fake_meeting.txt") +parser.add_argument("-c", "--chunk_size", default=150) +parser.add_argument("-e", "--embedding_model", default="BAAI/bge-base-en-v1.5") +parser.add_argument("-H", "--vdb_host", default="0.0.0.0") +parser.add_argument("-p", "--vdb_port", default="8000") +parser.add_argument("-n", "--name", default="test_collection") +parser.add_argument("-m", "--model_url", default="http://0.0.0.0:8001/v1") + +args = parser.parse_args() +llm = ChatOpenAI(base_url=args.model_url, + api_key="EMPTY", + streaming=True, + callbacks=[StreamingStdOutCallbackHandler()]) + +prompt = ChatPromptTemplate.from_template("""Answer the question based only on the following context: +{context} + +Question: {input} +""" +) + +### populate the DB #### + +#os.environ["HF_HUB_CACHE"] = "./models/" +os.environ["TOKENIZERS_PARALLELISM"] = "false" + +embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=args.embedding_model) +e = SentenceTransformerEmbeddings(model_name=args.embedding_model) +client = HttpClient(host=args.vdb_host, + port=args.vdb_port, + settings=Settings(allow_reset=True,)) +collection = client.get_or_create_collection(args.name, + embedding_function=embedding_func) + +if collection.count() < 1: + print("populating db") + raw_documents = TextLoader(args.docs).load() + text_splitter = CharacterTextSplitter(separator = ".", + chunk_size=int(args.chunk_size), + chunk_overlap=0) + docs = text_splitter.split_documents(raw_documents) + for doc in docs: + collection.add( + ids=[str(uuid.uuid1())], + metadatas=doc.metadata, + documents=doc.page_content + ) +else: + print("DB already populated") +######################## + + +db = Chroma(client=client, + collection_name=args.name, + embedding_function=e + ) +retriever = db.as_retriever(threshold=0.75) +chain = ( + {"context": retriever, "input": RunnablePassthrough()} + | prompt + | llm +) + +print("Ask LLM a question:") +while True: + print("\nUser:") + prompt = input() + print("ChatBot:") + chain.invoke(prompt) +