From 5cf5b4c71cdde8083e4aace13994b940d1397d78 Mon Sep 17 00:00:00 2001 From: Yuce Tekol Date: Wed, 2 Oct 2024 19:51:43 +0300 Subject: [PATCH 1/4] LangChain integration docs --- .../pages/langchain-integration.adoc | 257 ++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 docs/modules/integrate/pages/langchain-integration.adoc diff --git a/docs/modules/integrate/pages/langchain-integration.adoc b/docs/modules/integrate/pages/langchain-integration.adoc new file mode 100644 index 000000000..3ec9dee21 --- /dev/null +++ b/docs/modules/integrate/pages/langchain-integration.adoc @@ -0,0 +1,257 @@ += LangChain Integration +:description: The Hazelcast integration for LangChain provides a Vector Store implementation that enables using Hazecast Vector Search with LangChain. + +{description} + +== Introduction + +LangChain is a Python framework that makes it easier to create large language model (LLM) based solutions, such as chat bots by linking various components. + +LangChain `VectorStore` interface makes it easier to incorporate RAGs (Retrieval Augmented Generation) in LLM solutions. + +`langchain-hazelcast` package provides the Hazelcast `VectorStore` implementation for LangChain. + +== Installing LangChain/Hazelcast Vector Store + +[source,bash] +---- +pip install langchain-hazelcast +---- + +== Creating a Vector Store + +`Hazelcast` class is the Hazelcast vector store implementation that lives in the `langchain_hazelcast.vectorstore` package. + +The constructor for the `Hazelcast` vector store class takes the following arguments: + +* `embedding: Embeddings`: The embedding producer. This is a required argument. +* `collection_name: str`: Hazelcast `VectorCollection` to use. By default `"langchain"`. +* `client: Optional[HazelcastClient]`: A Hazelcast client object. +* `client_config: Optional[Config]`: A Hazelcast client configuration object. + +`client` and `client_config` arguments are mutually exclusive, they must not be set together. + +If you already have a Hazelcast client object, it is recommended to reuse it using the `client` argument. +Otherwise, you may prefer to create a Hazelcast configuration object first and pass it to the `Hazelcast` vector store constructor. + +The embedding producer must be an instance of LangChain `langchain_core.embeddings.Embeddings` class, such as `HuggingFaceEmbeddings`. +Here is an example: + +[source,python] +---- +from langchain_huggingface import HuggingFaceEmbeddings + +embeddings = HuggingFaceEmbeddings( + model_name="sentence-transformers/all-mpnet-base-v2", + model_kwargs={ + "device": "cpu", + "tokenizer_kwargs": { + "clean_up_tokenization_spaces": True, + }, + }, + encode_kwargs={"normalize_embeddings": False}, +) +---- + +Once you have the embedding producer, you can create the `Hazelcast` vector store instance. +Here's how to create a vector store which uses the default Hazelcast client that connects to the Hazelcast cluster `dev` at `localhost:5701`: + +[source,python] +---- +vector_store = Hazelcast(embeddings) +---- + +The same but with an explicitly created Hazelcast client: + +[source,python] +---- +from hazelcast import HazelcastClient +from hazelcast.config import Config + +config = Config() +config.cluster_members = ["localhost:5701"] +config.cluster_name = "dev" +client = HazelcastClient(config) +vector_store = Hazelcast(embeddings, client=client) +---- + +In case you would like to pass the client configuration without creating the client itself: +[source,python] +---- +from hazelcast import HazelcastClient +from hazelcast.config import Config + +config = Config() +config.cluster_members = ["localhost:5701"] +config.cluster_name = "dev" +vector_store = Hazelcast(embeddings, client_config=config) +---- + +You can find more about the various Hazelcast client configuration options in link:https://hazelcast.readthedocs.io/en/stable/client.html#hazelcast.client.HazelcastClient[Hazelcast Client documentation]. + +Although there is a default name for the underlying Hazelcast VectorCollection, you may want to use a different name. +You can do that by passing the name in the `collection_name` argument to the vector store constructor: +[source,python] +---- +name = "customer-docs" +vector_store = Hazelcast(embeddings, collection_name=name, client=client) +---- + +== Updating the Vector Store + +Once the vector store is created, you can start adding LangChain documents or string data into it. +While adding the data, you have the option to associate identifiers and metadata with them. + +Hazelcast vector store has two methods to add data, `add_documents` and `add_texts`. +Using the former, you can add `langchain_core.documents.Document` objects, and using the latter, you can add strings. + +In the simplest case, you would add one or more strings to the vector store: + +[source,python] +---- +texts = [ + "Hazelcast Platform uniquely combines a distributed compute engine and a fast data store in one runtime.", + "It offers unmatched performance, resilience and scale for real-time and AI-driven applications.", + "It allows you to quickly build resource-efficient, real-time applications.", + "You can deploy it at any scale from small edge devices to a large cluster of cloud instances.", +] +ids = vector_store.add_texts(texts) +for id in ids: + print(id) +---- + +Outputs: +[source,output] +---- +8c28f820-d4ed-4cfa-bac4-89b2d110b380 +b235643b-62c0-4039-9856-1493f921e1a4 +083cc0a4-9221-48bd-b734-0de2b4754bb3 +94b524bd-cdcb-4327-92e9-488ea5d915fd +---- + +`Hazelcast.add_texts` method returns the IDs of the added texts. +If the IDs were not provided to the `add_texts` method, then they are automatically genereated, like in the example above. + +You can provide the IDs manually by passing them in the `ids` parameter. +That may be useful in case you would like to update data instead of extending the vector store. + +[source,python] +---- +ids = vector_store.add_texts( + texts, + ids=["item1", "item2", "item3", "item4"] +) +for id in ids: + print(id) +---- + +If provided, the number of IDs must be equal to the number of texts. + +You can also pass metadata with the text or documents using the `metadatas` parameter. +Each item of the `metadatas` list must be a Python dictionary. +Like IDs, the number of metadata must be equal to the number of texts. + +[source,python] +---- +ids = vector_store.add_texts( + texts, + metadata=[ + {"page": 1}, + {"page": 1}, + {"page": 1}, + {"page": 2}, + ] +) +---- + +In case you have `langchain_core.documents.Document` objects, you can use the `add_documents` methods to add them to the vector store: + +[source,python] +---- +from langchain_core.documents import Document + +docs = [ + Document( + id="item1", + metadata={"page": 1}, + page_content="Hazelcast Platform uniquely combines a distributed compute engine and a fast data store in one runtime."), + Document( + id="item2", + metadata={"page": 1}, + page_content="It offers unmatched performance, resilience and scale for real-time and AI-driven applications."), + Document( + id="item3", + metadata={"page": 1}, + page_content="It allows you to quickly build resource-efficient, real-time applications."), + Document( + id="item4", + metadata={"page": 2}, + page_content="You can deploy it at any scale from small edge devices to a large cluster of cloud instances."), +] +ids = vector_store.add_documents(docs) +---- + +`Hazelcast` vector store has two class methods that combine creating the vector store and adding texts or documents to it. +Those are `Hazelcast.from_texts` and `Hazelcast.from_documents` methods respectively. +Calling these methods return the `Hazelcast` vector store instance. + +Here is an example that uses the `Hazelcast.from_texts` method: +[source,python] +---- +vector_store = Hazelcast.from_texts(texts, embedding=embeddings, client_config=config) +---- + +== Searching the Vector Store + +Once the vector store is populated, you can run vector similarity searches on it. +The `similarity_search` method of `Hazelcast` vector store takes a string to be used for the search and returns a list of Documents. + +[source,python] +---- +query = "Does Hazelcast enable real-time applications?" +docs = vector_store.similarity_search(query) +for doc in docs: + print(f"{doc.id}: {doc.page_content}") +---- + +You can optionally specify the maximum number of Documents to be returned using the `k` parameter: + +[source,python] +---- +docs = vector_store.similarity_search(query, k=10) +---- + +== Other Vector Store Operations + +You can retrieve Documents in the vector store using the `get_by_ids` method. +This method takes a sequence of IDs and returns the corresponding Documents if they exist. +Note that, the order of the IDs and the returned Documents may not be the same: + +[source,python] +---- +docs = vector_store.get_by_ids([ + "b235643b-62c0-4039-9856-1493f921e1a4", + "24d72bd3-e981-4701-a983-0a7800383fd1", +]) +---- + +To delete some or all Documents, you can use the `delete` method. +It deletes the Documents with the given IDs if one or more IDs provided, or deletes all Documents if no IDs are provided. +This method always returns `True`. +The example below deletes only two Documents: + +[source,python] +---- +vector_store.delete([ + "b235643b-62c0-4039-9856-1493f921e1a4", + "24d72bd3-e981-4701-a983-0a7800383fd1", +]) +---- + +And the following example deletes all Documents: + +[source,python] +---- +vector_store.delete() +---- + From 31d64e3604e6ba0f7bee3f14d9af7a6410b60173 Mon Sep 17 00:00:00 2001 From: Yuce Tekol Date: Wed, 2 Oct 2024 20:09:53 +0300 Subject: [PATCH 2/4] Updated nav --- docs/modules/ROOT/nav.adoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index 758fba3e1..f9535f988 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -179,6 +179,7 @@ include::wan:partial$nav.adoc[] ** xref:spring:hibernate.adoc[] ** xref:spring:transaction-manager.adoc[] ** xref:spring:best-practices.adoc[] +* xref:integrate:langchain-integration.adoc[] * xref:integrate:integrate-with-feast.adoc[] ** xref:integrate:install-connect.adoc[Install and connect Feast] ** xref:integrate:feast-config.adoc[] From 86f7c7877cc8e82655a287828cbc3a39a6708dd0 Mon Sep 17 00:00:00 2001 From: Yuce Tekol Date: Thu, 10 Oct 2024 15:57:57 +0300 Subject: [PATCH 3/4] Renamed the page to conform to other page names --- docs/modules/ROOT/nav.adoc | 2 +- ...langchain-integration.adoc => integrate-with-langchain.adoc} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename docs/modules/integrate/pages/{langchain-integration.adoc => integrate-with-langchain.adoc} (99%) diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index be665e4a7..085fc9770 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -179,7 +179,7 @@ include::wan:partial$nav.adoc[] ** xref:spring:hibernate.adoc[] ** xref:spring:transaction-manager.adoc[] ** xref:spring:best-practices.adoc[] -* xref:integrate:langchain-integration.adoc[] +* xref:integrate:integrate-with-langchain.adoc[] * xref:integrate:integrate-with-feast.adoc[] ** xref:integrate:install-connect.adoc[Install and connect Feast] ** xref:integrate:feast-config.adoc[] diff --git a/docs/modules/integrate/pages/langchain-integration.adoc b/docs/modules/integrate/pages/integrate-with-langchain.adoc similarity index 99% rename from docs/modules/integrate/pages/langchain-integration.adoc rename to docs/modules/integrate/pages/integrate-with-langchain.adoc index 3ec9dee21..d6235e989 100644 --- a/docs/modules/integrate/pages/langchain-integration.adoc +++ b/docs/modules/integrate/pages/integrate-with-langchain.adoc @@ -1,4 +1,4 @@ -= LangChain Integration += Integrate with LangChain :description: The Hazelcast integration for LangChain provides a Vector Store implementation that enables using Hazecast Vector Search with LangChain. {description} From cfce797f919233fd938130cf7394b6f76d8bc9c0 Mon Sep 17 00:00:00 2001 From: Yuce Tekol Date: Wed, 16 Oct 2024 17:16:49 +0300 Subject: [PATCH 4/4] Review comments --- .../integrate/pages/integrate-with-langchain.adoc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/modules/integrate/pages/integrate-with-langchain.adoc b/docs/modules/integrate/pages/integrate-with-langchain.adoc index d6235e989..41c82c42d 100644 --- a/docs/modules/integrate/pages/integrate-with-langchain.adoc +++ b/docs/modules/integrate/pages/integrate-with-langchain.adoc @@ -100,7 +100,7 @@ vector_store = Hazelcast(embeddings, collection_name=name, client=client) == Updating the Vector Store Once the vector store is created, you can start adding LangChain documents or string data into it. -While adding the data, you have the option to associate identifiers and metadata with them. +While adding the data, you have the option to associate identifiers and metadata with it. Hazelcast vector store has two methods to add data, `add_documents` and `add_texts`. Using the former, you can add `langchain_core.documents.Document` objects, and using the latter, you can add strings. @@ -130,10 +130,10 @@ b235643b-62c0-4039-9856-1493f921e1a4 ---- `Hazelcast.add_texts` method returns the IDs of the added texts. -If the IDs were not provided to the `add_texts` method, then they are automatically genereated, like in the example above. +If the IDs were not provided to the `add_texts` method, then they are automatically generated, like in the example above. You can provide the IDs manually by passing them in the `ids` parameter. -That may be useful in case you would like to update data instead of extending the vector store. +This is useful when you want to update data instead of extending the vector store. [source,python] ---- @@ -164,7 +164,7 @@ ids = vector_store.add_texts( ) ---- -In case you have `langchain_core.documents.Document` objects, you can use the `add_documents` methods to add them to the vector store: +If you have `langchain_core.documents.Document` objects, you can use the `add_documents` methods to add them to the vector store: [source,python] ---- @@ -192,8 +192,8 @@ ids = vector_store.add_documents(docs) ---- `Hazelcast` vector store has two class methods that combine creating the vector store and adding texts or documents to it. -Those are `Hazelcast.from_texts` and `Hazelcast.from_documents` methods respectively. -Calling these methods return the `Hazelcast` vector store instance. +These are the `Hazelcast.from_texts` and `Hazelcast.from_documents` methods respectively. +Calling these methods returns the `Hazelcast` vector store instance. Here is an example that uses the `Hazelcast.from_texts` method: [source,python] @@ -236,7 +236,7 @@ docs = vector_store.get_by_ids([ ---- To delete some or all Documents, you can use the `delete` method. -It deletes the Documents with the given IDs if one or more IDs provided, or deletes all Documents if no IDs are provided. +It deletes the Documents with the given IDs if one or more IDs are provided, or deletes all Documents if no IDs are provided. This method always returns `True`. The example below deletes only two Documents: