Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add self ask #9

Merged
merged 1 commit into from
Oct 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions langchain/chains/self_ask_with_search/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Chain that does self ask with search.

Heavily borrowed from https://github.com/ofirpress/self-ask
"""
142 changes: 142 additions & 0 deletions langchain/chains/self_ask_with_search/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
"""Chain that does self ask with search."""
from typing import Any, Dict, List

from pydantic import BaseModel, Extra

from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain.chains.self_ask_with_search.prompt import PROMPT
from langchain.chains.serpapi import SerpAPIChain
from langchain.llms.base import LLM


def extract_answer(generated: str) -> str:
"""Extract answer from text."""
if "\n" not in generated:
last_line = generated
else:
last_line = generated.split("\n")[-1]

if ":" not in last_line:
after_colon = last_line
else:
after_colon = generated.split(":")[-1]

if " " == after_colon[0]:
after_colon = after_colon[1:]
if "." == after_colon[-1]:
after_colon = after_colon[:-1]

return after_colon


def extract_question(generated: str, followup: str) -> str:
"""Extract question from text."""
if "\n" not in generated:
last_line = generated
else:
last_line = generated.split("\n")[-1]

if followup not in last_line:
print("we probably should never get here..." + generated)

if ":" not in last_line:
after_colon = last_line
else:
after_colon = generated.split(":")[-1]

if " " == after_colon[0]:
after_colon = after_colon[1:]
if "?" != after_colon[-1]:
print("we probably should never get here..." + generated)

return after_colon


def get_last_line(generated: str) -> str:
"""Get the last line in text."""
if "\n" not in generated:
last_line = generated
else:
last_line = generated.split("\n")[-1]

return last_line


def greenify(_input: str) -> str:
"""Add green highlighting to text."""
return "\x1b[102m" + _input + "\x1b[0m"


def yellowfy(_input: str) -> str:
"""Add yellow highlighting to text."""
return "\x1b[106m" + _input + "\x1b[0m"


class SelfAskWithSearchChain(Chain, BaseModel):
"""Chain that does self ask with search."""

llm: LLM
search_chain: SerpAPIChain
input_key: str = "question"
output_key: str = "answer"

class Config:
"""Configuration for this pydantic object."""

extra = Extra.forbid
arbitrary_types_allowed = True

@property
def input_keys(self) -> List[str]:
"""Expect input key."""
return [self.input_key]

@property
def output_keys(self) -> List[str]:
"""Expect output key."""
return [self.output_key]

def _run(self, inputs: Dict[str, Any]) -> Dict[str, str]:
question = inputs[self.input_key]
llm_chain = LLMChain(llm=self.llm, prompt=PROMPT)
intermediate = "\nIntermediate answer:"
followup = "Follow up:"
finalans = "\nSo the final answer is:"
cur_prompt = f"{question}\nAre follow up questions needed here:"
print(cur_prompt, end="")
ret_text = llm_chain.predict(input=cur_prompt, stop=[intermediate])
print(greenify(ret_text), end="")
while followup in get_last_line(ret_text):
cur_prompt += ret_text
question = extract_question(ret_text, followup)
external_answer = self.search_chain.search(question)
if external_answer is not None:
cur_prompt += intermediate + " " + external_answer + "."
print(
intermediate + " " + yellowfy(external_answer) + ".",
end="",
)
ret_text = llm_chain.predict(
input=cur_prompt, stop=["\nIntermediate answer:"]
)
print(greenify(ret_text), end="")
else:
# We only get here in the very rare case that Google returns no answer.
cur_prompt += intermediate
print(intermediate + " ")
cur_prompt += llm_chain.predict(
input=cur_prompt, stop=["\n" + followup, finalans]
)

if finalans not in ret_text:
cur_prompt += finalans
print(finalans, end="")
ret_text = llm_chain.predict(input=cur_prompt, stop=["\n"])
print(greenify(ret_text), end="")

return {self.output_key: cur_prompt + ret_text}

def run(self, question: str) -> str:
"""More user-friendly interface for interfacing with self ask with search."""
return self({self.input_key: question})[self.output_key]
44 changes: 44 additions & 0 deletions langchain/chains/self_ask_with_search/prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# flake8: noqa
from langchain.prompt import Prompt

_DEFAULT_TEMPLATE = """Question: Who lived longer, Muhammad Ali or Alan Turing?
Are follow up questions needed here: Yes.
Follow up: How old was Muhammad Ali when he died?
Intermediate answer: Muhammad Ali was 74 years old when he died.
Follow up: How old was Alan Turing when he died?
Intermediate answer: Alan Turing was 41 years old when he died.
So the final answer is: Muhammad Ali

Question: When was the founder of craigslist born?
Are follow up questions needed here: Yes.
Follow up: Who was the founder of craigslist?
Intermediate answer: Craigslist was founded by Craig Newmark.
Follow up: When was Craig Newmark born?
Intermediate answer: Craig Newmark was born on December 6, 1952.
So the final answer is: December 6, 1952

Question: Who was the maternal grandfather of George Washington?
Are follow up questions needed here: Yes.
Follow up: Who was the mother of George Washington?
Intermediate answer: The mother of George Washington was Mary Ball Washington.
Follow up: Who was the father of Mary Ball Washington?
Intermediate answer: The father of Mary Ball Washington was Joseph Ball.
So the final answer is: Joseph Ball

Question: Are both the directors of Jaws and Casino Royale from the same country?
Are follow up questions needed here: Yes.
Follow up: Who is the director of Jaws?
Intermediate Answer: The director of Jaws is Steven Spielberg.
Follow up: Where is Steven Spielberg from?
Intermediate Answer: The United States.
Follow up: Who is the director of Casino Royale?
Intermediate Answer: The director of Casino Royale is Martin Campbell.
Follow up: Where is Martin Campbell from?
Intermediate Answer: New Zealand.
So the final answer is: No

Question: {input}"""
PROMPT = Prompt(
input_variables=["input"],
template=_DEFAULT_TEMPLATE,
)
18 changes: 18 additions & 0 deletions tests/integration_tests/chains/test_self_ask_with_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Integration test for self ask with search."""
from langchain.chains.self_ask_with_search.base import SelfAskWithSearchChain
from langchain.chains.serpapi import SerpAPIChain
from langchain.llms.openai import OpenAI


def test_self_ask_with_search() -> None:
"""Test functionality on a prompt."""
question = "What is the hometown of the reigning men's U.S. Open champion?"
chain = SelfAskWithSearchChain(
llm=OpenAI(temperature=0),
search_chain=SerpAPIChain(),
input_key="q",
output_key="a",
)
answer = chain.run(question)
final_answer = answer.split("\n")[-1]
assert final_answer == "So the final answer is: El Palmar, Murcia, Spain"