Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add VLM support to Claude 3 #2440

Merged
merged 4 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 48 additions & 4 deletions src/helm/clients/anthropic_client.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from typing import Any, Dict, List, Optional, TypedDict, cast
from typing import Any, Dict, List, Optional, TypedDict, Union, cast
import json
import requests
import time
import urllib.parse

from helm.common.cache import CacheConfig
from helm.common.hierarchical_logger import htrack_block, hlog
from helm.common.media_object import IMAGE_TYPE, TEXT_TYPE
from helm.common.optional_dependencies import handle_module_not_found_error
from helm.common.request import (
wrap_request_time,
Expand All @@ -27,6 +28,8 @@
try:
from anthropic import Anthropic
from anthropic.types import MessageParam
from anthropic.types.image_block_param import ImageBlockParam
from anthropic.types.text_block_param import TextBlockParam
import websocket
except ModuleNotFoundError as e:
handle_module_not_found_error(e, ["anthropic"])
Expand Down Expand Up @@ -242,13 +245,54 @@ def make_request(self, request: Request) -> RequestResult:

messages: List[MessageParam] = []
system_message: Optional[MessageParam] = None
if request.messages and request.prompt:
raise AnthropicMessagesRequestError("Exactly one of Request.messages and Request.prompt should be set")
if request.messages:

if request.messages is not None:
# TODO(#2439): Refactor out Request validation
if request.multimodal_prompt is not None or request.prompt:
raise AnthropicMessagesRequestError(
"Exactly one of Request.messages, Request.prompt or Request.multimodel_prompt should be set"
)
messages = cast(List[MessageParam], request.messages)
if messages[0]["role"] == "system":
system_message = messages[0]
messages = messages[1:]

elif request.multimodal_prompt is not None:
# TODO(#2439): Refactor out Request validation
if request.messages is not None or request.prompt:
raise AnthropicMessagesRequestError(
"Exactly one of Request.messages, Request.prompt or Request.multimodel_prompt should be set"
)
blocks: List[Union[TextBlockParam, ImageBlockParam]] = []
for media_object in request.multimodal_prompt.media_objects:
if media_object.is_type(IMAGE_TYPE):
# TODO(#2439): Refactor out Request validation
if not media_object.location:
raise Exception("MediaObject of image type has missing location field value")

from helm.common.images_utils import encode_base64

base64_image: str = encode_base64(media_object.location, format="JPEG")
image_block: ImageBlockParam = {
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": base64_image,
},
}
blocks.append(image_block)
if media_object.is_type(TEXT_TYPE):
# TODO(#2439): Refactor out Request validation
if media_object.text is None:
raise ValueError("MediaObject of text type has missing text field value")
text_block: TextBlockParam = {
"type": "text",
"text": media_object.text,
}
blocks.append(text_block)
messages = [{"role": "user", "content": blocks}]

else:
messages = [{"role": "user", "content": request.prompt}]

Expand Down
1 change: 1 addition & 0 deletions src/helm/common/media_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import List, Optional


IMAGE_TYPE = "image"
TEXT_TYPE = "text"


Expand Down
4 changes: 2 additions & 2 deletions src/helm/config/model_metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -235,15 +235,15 @@ models:
creator_organization_name: Anthropic
access: limited
release_date: 2024-03-04
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

- name: anthropic/claude-3-opus-20240229
display_name: Claude 3 Opus (20240229)
description: TBD
creator_organization_name: Anthropic
access: limited
release_date: 2024-03-04
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# DEPRECATED: Please do not use.
- name: anthropic/stanford-online-all-v4-s3
Expand Down