Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for structured outputs #938

Merged
merged 20 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions instructor/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ def from_openai(
instructor.Mode.FUNCTIONS,
instructor.Mode.PARALLEL_TOOLS,
instructor.Mode.MD_JSON,
instructor.Mode.STRUCTURED_OUTPUTS,
}

if isinstance(client, openai.OpenAI):
Expand Down
2 changes: 1 addition & 1 deletion instructor/function_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def from_response(
Mode.warn_mode_functions_deprecation()
return cls.parse_functions(completion, validation_context, strict)

if mode in {Mode.TOOLS, Mode.MISTRAL_TOOLS}:
if mode in {Mode.TOOLS, Mode.MISTRAL_TOOLS, Mode.STRUCTURED_OUTPUTS}:
ivanleomk marked this conversation as resolved.
Show resolved Hide resolved
return cls.parse_tools(completion, validation_context, strict)

if mode in {Mode.JSON, Mode.JSON_SCHEMA, Mode.MD_JSON}:
Expand Down
1 change: 1 addition & 0 deletions instructor/mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Mode(enum.Enum):
VERTEXAI_JSON = "vertexai_json"
GEMINI_JSON = "gemini_json"
COHERE_JSON_SCHEMA = "json_object"
STRUCTURED_OUTPUTS = "structured_output"
ivanleomk marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
def warn_mode_functions_deprecation(cls):
Expand Down
11 changes: 11 additions & 0 deletions instructor/process_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from instructor.utils import merge_consecutive_messages
from instructor.validators import AsyncValidationError
from openai.types.chat import ChatCompletion
from openai import pydantic_function_tool
from pydantic import BaseModel, create_model
import json
import inspect
Expand Down Expand Up @@ -249,6 +250,15 @@ def handle_response_model(
Mode.warn_mode_functions_deprecation()
new_kwargs["functions"] = [response_model.openai_schema]
new_kwargs["function_call"] = {"name": response_model.openai_schema["name"]}
elif mode in {Mode.STRUCTURED_OUTPUTS}:
ivanleomk marked this conversation as resolved.
Show resolved Hide resolved
response_model_schema = pydantic_function_tool(response_model)
ivanleomk marked this conversation as resolved.
Show resolved Hide resolved
response_model_schema["function"]["strict"] = True
new_kwargs["tools"] = [response_model_schema]

new_kwargs["tool_choice"] = {
"type": "function",
"function": {"name": response_model_schema["function"]["name"]},
}
elif mode in {Mode.TOOLS, Mode.MISTRAL_TOOLS}:
new_kwargs["tools"] = [
{
Expand All @@ -263,6 +273,7 @@ def handle_response_model(
"type": "function",
"function": {"name": response_model.openai_schema["name"]},
}

elif mode in {Mode.JSON, Mode.MD_JSON, Mode.JSON_SCHEMA}:
# If its a JSON Mode we need to massage the prompt a bit
# in order to get the response we want in a json format
Expand Down
2 changes: 1 addition & 1 deletion instructor/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def reask_messages(response: ChatCompletion, mode: Mode, exception: Exception):

yield dump_message(response.choices[0].message)
# TODO: Give users more control on configuration
if mode == Mode.TOOLS:
if mode == Mode.TOOLS or mode == Mode.STRUCTURED_OUTPUTS:
for tool_call in response.choices[0].message.tool_calls:
yield {
"role": "tool",
Expand Down
8 changes: 8 additions & 0 deletions instructor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,14 @@ def dump_message(message: ChatCompletionMessage) -> ChatCompletionMessageParam:
and message.function_call is not None
and ret["content"]
):
if not isinstance(ret["content"], str):
response_message: str = ""
for content_message in ret["content"]:
if "text" in content_message:
response_message += content_message["text"]
elif "refusal" in content_message:
response_message += content_message["refusal"]
ret["content"] = response_message
ret["content"] += json.dumps(message.model_dump()["function_call"])
return ret

Expand Down
2,145 changes: 1,123 additions & 1,022 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "instructor"
version = "1.3.7"
version = "1.4.0"
description = "structured outputs for llm"
authors = ["Jason Liu <jason@jxnl.co>"]
license = "MIT"
Expand All @@ -10,7 +10,7 @@ repository = "https://github.com/jxnl/instructor"

[tool.poetry.dependencies]
python = "^3.9"
openai = "^1.1.0"
openai = "^1.40.0"
pydantic = "^2.8.0"
docstring-parser = "^0.16"
typer = ">=0.9.0,<1.0.0"
Expand Down
1 change: 1 addition & 0 deletions tests/llm/test_openai/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
models = ["gpt-4o-mini"]
modes = [
instructor.Mode.TOOLS,
instructor.Mode.STRUCTURED_OUTPUTS,
]
Loading