-
Notifications
You must be signed in to change notification settings - Fork 106
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
bccd1c1
commit ad0243d
Showing
5 changed files
with
171 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# RAG + Langchain | ||
|
||
This example will deploy a local RAG application using a chromadb server, a llama.cpp model server and a python app built with langchain. | ||
|
||
# | ||
|
||
### Deploy ChromaDB Vector Database | ||
Use the existing ChromaDB image to deploy a vector store service. | ||
|
||
* `podman pull chromadb/chroma` | ||
* `podman run -it -p 8000:8000 chroma` | ||
|
||
### Deploy Model Service | ||
|
||
Deploy the LLM server and volume mount the model of choice. | ||
* `podman run -it -p 8001:8001 -v Local/path/to/locallm/models:/locallm/models:Z -e MODEL_PATH=models/llama-2-7b-chat.Q5_K_S.gguf playground` | ||
|
||
### Build and Deploy RAG app | ||
Deploy a small application that can populate the data base from the vectorDB and generate a response with the LLM. | ||
|
||
We will want to have an embedding model that we can volume mount into our running application container. You can use the code snippet below to pull a copy of the `BAAI/bge-base-en-v1.5` embedding model. | ||
|
||
|
||
```python | ||
from huggingface_hub import snapshot_download | ||
snapshot_download(repo_id="BAAI/bge-base-en-v1.5", | ||
cache_dir="../models/", | ||
local_files_only=False) | ||
``` | ||
|
||
Follow the instructions below to build you container image and run it locally. | ||
|
||
* `podman build -t ragapp rag-langchain -f rag-langchain/builds/Containerfile` | ||
* `podman run -it -v Local/path/to/locallm/models/:/rag/models:Z -v Local/path/to/locallm/data:/rag/data:Z ragapp -H 10.88.0.1 -m http://10.88.0.1:8001/v1` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
application: | ||
type: language | ||
name: rag-demo | ||
description: This is a RAG demo application. | ||
containers: | ||
- name: llamacpp-server | ||
contextdir: playground | ||
containerfile: Containerfile | ||
model-service: true | ||
backend: | ||
- llama | ||
arch: | ||
- arm64 | ||
- amd64 | ||
- name: chromadb-server | ||
image: docker.io/chromadb/chroma | ||
- name: rag-inference-app | ||
contextdir: rag-langchain | ||
containerfile: builds/Containerfile | ||
arch: | ||
- arm64 | ||
- amd64 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
FROM registry.access.redhat.com/ubi9/python-39:latest | ||
### Update sqlite for chroma | ||
USER root | ||
RUN dnf remove sqlite3 -y | ||
RUN wget https://www.sqlite.org/2023/sqlite-autoconf-3410200.tar.gz | ||
RUN tar -xvzf sqlite-autoconf-3410200.tar.gz | ||
WORKDIR sqlite-autoconf-3410200 | ||
RUN ./configure | ||
RUN make | ||
RUN make install | ||
RUN mv /usr/local/bin/sqlite3 /usr/bin/sqlite3 | ||
ENV LD_LIBRARY_PATH="/usr/local/lib" | ||
#### | ||
WORKDIR /rag | ||
COPY builds/requirements.txt . | ||
RUN pip install --upgrade pip | ||
RUN pip install --no-cache-dir --upgrade -r /rag/requirements.txt | ||
COPY rag_app.py . | ||
ENV HF_HUB_CACHE=/rag/models/ | ||
ENTRYPOINT [ "python", "rag_app.py" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
langchain_openai | ||
langchain | ||
chromadb | ||
sentence-transformers | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
from langchain_openai import ChatOpenAI | ||
from langchain_core.prompts import ChatPromptTemplate | ||
from langchain_core.runnables import RunnablePassthrough | ||
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings | ||
from langchain.callbacks import StreamingStdOutCallbackHandler | ||
from langchain.text_splitter import CharacterTextSplitter | ||
from langchain_community.document_loaders import TextLoader | ||
from langchain_community.vectorstores import Chroma | ||
|
||
from chromadb import HttpClient | ||
from chromadb.config import Settings | ||
import chromadb.utils.embedding_functions as embedding_functions | ||
|
||
|
||
import uuid | ||
import os | ||
import argparse | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("-d", "--docs", default="data/fake_meeting.txt") | ||
parser.add_argument("-c", "--chunk_size", default=150) | ||
parser.add_argument("-e", "--embedding_model", default="BAAI/bge-base-en-v1.5") | ||
parser.add_argument("-H", "--vdb_host", default="0.0.0.0") | ||
parser.add_argument("-p", "--vdb_port", default="8000") | ||
parser.add_argument("-n", "--name", default="test_collection") | ||
parser.add_argument("-m", "--model_url", default="http://0.0.0.0:8001/v1") | ||
|
||
args = parser.parse_args() | ||
llm = ChatOpenAI(base_url=args.model_url, | ||
api_key="EMPTY", | ||
streaming=True, | ||
callbacks=[StreamingStdOutCallbackHandler()]) | ||
|
||
prompt = ChatPromptTemplate.from_template("""Answer the question based only on the following context: | ||
{context} | ||
Question: {input} | ||
""" | ||
) | ||
|
||
### populate the DB #### | ||
|
||
#os.environ["HF_HUB_CACHE"] = "./models/" | ||
os.environ["TOKENIZERS_PARALLELISM"] = "false" | ||
|
||
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=args.embedding_model) | ||
e = SentenceTransformerEmbeddings(model_name=args.embedding_model) | ||
client = HttpClient(host=args.vdb_host, | ||
port=args.vdb_port, | ||
settings=Settings(allow_reset=True,)) | ||
collection = client.get_or_create_collection(args.name, | ||
embedding_function=embedding_func) | ||
|
||
if collection.count() < 1: | ||
print("populating db") | ||
raw_documents = TextLoader(args.docs).load() | ||
text_splitter = CharacterTextSplitter(separator = ".", | ||
chunk_size=int(args.chunk_size), | ||
chunk_overlap=0) | ||
docs = text_splitter.split_documents(raw_documents) | ||
for doc in docs: | ||
collection.add( | ||
ids=[str(uuid.uuid1())], | ||
metadatas=doc.metadata, | ||
documents=doc.page_content | ||
) | ||
else: | ||
print("DB already populated") | ||
######################## | ||
|
||
|
||
db = Chroma(client=client, | ||
collection_name=args.name, | ||
embedding_function=e | ||
) | ||
retriever = db.as_retriever(threshold=0.75) | ||
chain = ( | ||
{"context": retriever, "input": RunnablePassthrough()} | ||
| prompt | ||
| llm | ||
) | ||
|
||
print("Ask LLM a question:") | ||
while True: | ||
print("\nUser:") | ||
prompt = input() | ||
print("ChatBot:") | ||
chain.invoke(prompt) | ||
|