From ad0243d1e8261dac176736f4c4bb1f1c68edcf6c Mon Sep 17 00:00:00 2001
From: Michael Clifford <mcliffor@redhat.com>
Date: Mon, 29 Jan 2024 19:52:52 -0500
Subject: [PATCH] WIP rag + langchain example

---
 rag-langchain/README.md               | 35 +++++++++++
 rag-langchain/ai-studio.yaml          | 22 +++++++
 rag-langchain/builds/Containerfile    | 20 ++++++
 rag-langchain/builds/requirements.txt |  5 ++
 rag-langchain/rag_app.py              | 89 +++++++++++++++++++++++++++
 5 files changed, 171 insertions(+)
 create mode 100644 rag-langchain/README.md
 create mode 100644 rag-langchain/ai-studio.yaml
 create mode 100644 rag-langchain/builds/Containerfile
 create mode 100644 rag-langchain/builds/requirements.txt
 create mode 100644 rag-langchain/rag_app.py

diff --git a/rag-langchain/README.md b/rag-langchain/README.md
new file mode 100644
index 000000000..96b5d5288
--- /dev/null
+++ b/rag-langchain/README.md
@@ -0,0 +1,35 @@
+# RAG + Langchain
+
+This example will deploy a local RAG application using a chromadb server, a llama.cpp model server and a python app built with langchain.  
+ 
+#
+
+### Deploy ChromaDB Vector Database 
+Use the existing ChromaDB image to deploy a vector store service.
+
+* `podman pull chromadb/chroma`
+* `podman run -it -p 8000:8000 chroma`
+
+### Deploy Model Service 
+
+Deploy the LLM server and volume mount the model of choice.
+* `podman run -it -p 8001:8001 -v Local/path/to/locallm/models:/locallm/models:Z -e MODEL_PATH=models/llama-2-7b-chat.Q5_K_S.gguf playground`
+
+### Build and Deploy RAG app
+Deploy a small application that can populate the data base from the vectorDB and generate a response with the LLM.
+
+We will want to have an embedding model that we can volume mount into our running application container. You can use the code snippet below to pull a copy of the `BAAI/bge-base-en-v1.5` embedding model. 
+
+
+```python 
+from huggingface_hub import snapshot_download
+snapshot_download(repo_id="BAAI/bge-base-en-v1.5",
+                cache_dir="../models/",
+                local_files_only=False)
+```
+
+Follow the instructions below to build you container image and run it locally. 
+
+* `podman build -t ragapp rag-langchain -f rag-langchain/builds/Containerfile`
+* `podman run -it -v Local/path/to/locallm/models/:/rag/models:Z -v Local/path/to/locallm/data:/rag/data:Z ragapp -H 10.88.0.1 -m http://10.88.0.1:8001/v1`
+
diff --git a/rag-langchain/ai-studio.yaml b/rag-langchain/ai-studio.yaml
new file mode 100644
index 000000000..c18afb393
--- /dev/null
+++ b/rag-langchain/ai-studio.yaml
@@ -0,0 +1,22 @@
+application:
+  type: language
+  name: rag-demo
+  description: This is a RAG demo application. 
+  containers:
+    - name: llamacpp-server
+      contextdir: playground
+      containerfile: Containerfile
+      model-service: true
+      backend: 
+        - llama
+      arch:
+        - arm64
+        - amd64
+    - name: chromadb-server
+      image: docker.io/chromadb/chroma
+    - name: rag-inference-app
+      contextdir: rag-langchain
+      containerfile: builds/Containerfile
+      arch:
+        - arm64
+        - amd64
\ No newline at end of file
diff --git a/rag-langchain/builds/Containerfile b/rag-langchain/builds/Containerfile
new file mode 100644
index 000000000..4730226bf
--- /dev/null
+++ b/rag-langchain/builds/Containerfile
@@ -0,0 +1,20 @@
+FROM registry.access.redhat.com/ubi9/python-39:latest
+### Update sqlite for chroma
+USER root
+RUN dnf remove sqlite3 -y
+RUN  wget https://www.sqlite.org/2023/sqlite-autoconf-3410200.tar.gz
+RUN tar -xvzf sqlite-autoconf-3410200.tar.gz
+WORKDIR sqlite-autoconf-3410200
+RUN ./configure
+RUN make
+RUN make install
+RUN mv /usr/local/bin/sqlite3 /usr/bin/sqlite3
+ENV LD_LIBRARY_PATH="/usr/local/lib"
+####
+WORKDIR /rag
+COPY builds/requirements.txt .
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir --upgrade -r /rag/requirements.txt
+COPY rag_app.py .
+ENV  HF_HUB_CACHE=/rag/models/
+ENTRYPOINT [ "python", "rag_app.py" ]
\ No newline at end of file
diff --git a/rag-langchain/builds/requirements.txt b/rag-langchain/builds/requirements.txt
new file mode 100644
index 000000000..5b107e144
--- /dev/null
+++ b/rag-langchain/builds/requirements.txt
@@ -0,0 +1,5 @@
+langchain_openai
+langchain
+chromadb
+sentence-transformers
+
diff --git a/rag-langchain/rag_app.py b/rag-langchain/rag_app.py
new file mode 100644
index 000000000..38b708236
--- /dev/null
+++ b/rag-langchain/rag_app.py
@@ -0,0 +1,89 @@
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
+from langchain.callbacks import StreamingStdOutCallbackHandler
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.document_loaders import TextLoader
+from langchain_community.vectorstores import Chroma
+
+from chromadb import HttpClient
+from chromadb.config import Settings
+import chromadb.utils.embedding_functions as embedding_functions
+
+
+import uuid
+import os
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-d", "--docs", default="data/fake_meeting.txt")
+parser.add_argument("-c", "--chunk_size", default=150)
+parser.add_argument("-e", "--embedding_model", default="BAAI/bge-base-en-v1.5")
+parser.add_argument("-H", "--vdb_host", default="0.0.0.0")
+parser.add_argument("-p", "--vdb_port", default="8000")
+parser.add_argument("-n", "--name", default="test_collection")
+parser.add_argument("-m", "--model_url", default="http://0.0.0.0:8001/v1")
+
+args = parser.parse_args()
+llm = ChatOpenAI(base_url=args.model_url, 
+                 api_key="EMPTY",
+                 streaming=True,
+                 callbacks=[StreamingStdOutCallbackHandler()])
+
+prompt = ChatPromptTemplate.from_template("""Answer the question based only on the following context:
+{context}
+
+Question: {input}
+"""
+)
+
+### populate the DB ####
+
+#os.environ["HF_HUB_CACHE"] = "./models/"
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=args.embedding_model)
+e = SentenceTransformerEmbeddings(model_name=args.embedding_model)
+client = HttpClient(host=args.vdb_host,
+                             port=args.vdb_port,
+                             settings=Settings(allow_reset=True,))
+collection = client.get_or_create_collection(args.name,
+                                      embedding_function=embedding_func)
+
+if collection.count() < 1:
+    print("populating db")
+    raw_documents = TextLoader(args.docs).load()
+    text_splitter = CharacterTextSplitter(separator = ".",
+                                          chunk_size=int(args.chunk_size),
+                                          chunk_overlap=0)
+    docs = text_splitter.split_documents(raw_documents) 
+    for doc in docs:
+        collection.add(
+            ids=[str(uuid.uuid1())],
+            metadatas=doc.metadata, 
+            documents=doc.page_content
+            )
+else:
+    print("DB already populated")
+########################
+
+
+db = Chroma(client=client,
+            collection_name=args.name,
+            embedding_function=e
+    )
+retriever = db.as_retriever(threshold=0.75)
+chain = (
+    {"context": retriever, "input": RunnablePassthrough()}
+    | prompt
+    | llm
+)
+
+print("Ask LLM a question:")
+while True:
+    print("\nUser:")
+    prompt = input()
+    print("ChatBot:")
+    chain.invoke(prompt)
+