diff --git a/docs/chroma_db.zip b/docs/chroma_db.zip index 4176e4703..2726a919a 100644 Binary files a/docs/chroma_db.zip and b/docs/chroma_db.zip differ diff --git a/pr_agent/settings/pr_help_prompts.toml b/pr_agent/settings/pr_help_prompts.toml index cc8170f63..84ecb3efc 100644 --- a/pr_agent/settings/pr_help_prompts.toml +++ b/pr_agent/settings/pr_help_prompts.toml @@ -2,16 +2,21 @@ system="""You are Doc-helper, a language models designed to answer questions about a documentation website for an open-soure project called "PR-Agent". You will recieve a question, and a list of snippets that were collected for a documentation site using RAG as the retrieval method. Your goal is to provide the best answer to the question using the snippets provided. -Note that it is possible some of the snippets may not be relevant to the question. In that case, you should ignore them and focus on the ones that are relevant. -Try to be short and concise in your answers. +Additional instructions: +- Try to be short and concise in your answers. Give examples if needed. +- It is possible some of the snippets may not be relevant to the question. In that case, you should ignore them and focus on the ones that are relevant. +- The main tools of pr-agent are 'describe', 'review', 'improve'. If there is ambiguity to which tool the user is referring to, prioritize snippets of these tools over others. -The output must be a YAML object equivalent to type $doc_help, according to the following Pydantic definitions: -class doc_help(BaseModel): +The output must be a YAML object equivalent to type $DocHelper, according to the following Pydantic definitions: +===== +class DocHelper(BaseModel): user_question: str = Field(description="The user's question") response: str = Field(description="The response to the user's question") relevant_snippets: List[int] = Field(description="One-based index of the relevant snippets in the list of snippets provided. Order the by relevance, with the most relevant first. If a snippet was not relevant, do not include it in the list.") +===== + Example output: ```yaml diff --git a/pr_agent/tools/pr_help_message.py b/pr_agent/tools/pr_help_message.py index 72266e3e0..d28e847b8 100644 --- a/pr_agent/tools/pr_help_message.py +++ b/pr_agent/tools/pr_help_message.py @@ -158,7 +158,7 @@ async def run(self): # Initialize embeddings from langchain_openai import OpenAIEmbeddings - embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", + embeddings = OpenAIEmbeddings(model="text-embedding-3-small", api_key=get_settings().openai.key) # Get similar snippets via similarity search @@ -207,7 +207,7 @@ async def run(self): if get_settings().config.publish_output: self.git_provider.publish_comment(answer_str) else: - get_logger().info(f"Answer: {response}") + get_logger().info(f"Answer:\n{answer_str}") else: if not isinstance(self.git_provider, BitbucketServerProvider) and not self.git_provider.is_supported("gfm_markdown"): self.git_provider.publish_comment( @@ -325,7 +325,7 @@ async def prepare_relevant_snippets(self, sim_results): # build the snippets string relevant_snippets_str = "" for i, s in enumerate(relevant_snippets_full): - relevant_snippets_str += f"Snippet {i}:\n\n{s}\n\n" + relevant_snippets_str += f"Snippet {i+1}:\n\n{s}\n\n" relevant_snippets_str += "-------------------\n\n" return relevant_pages_full, relevant_snippets_full_header, relevant_snippets_str