Skip to content

Commit

Permalink
Patch wrong Modality Error (#973)
Browse files Browse the repository at this point in the history
Catches languagebind model unsupported modality error with "Model {model} does not support {modality}"
  • Loading branch information
RaynorChavez committed Sep 18, 2024
1 parent a0084a8 commit 5c87d70
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 11 deletions.
6 changes: 0 additions & 6 deletions .github/workflows/unit_test_200gb_CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,6 @@ jobs:
repository: marqo-ai/marqo-base
path: marqo-base

- name: Install FFmpeg and libmagic
run: |
sudo apt-get update
sudo apt-get install -y ffmpeg libmagic1
ffmpeg -version # Verify installation
file --version # Verify libmagic installation and version
- name: Install dependencies
run: |
pip install -r marqo-base/requirements.txt
Expand Down
10 changes: 6 additions & 4 deletions src/marqo/tensor_search/add_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,12 @@ def threaded_download_and_preprocess_content(allocated_docs: List[dict],

elif (inferred_modality in [Modality.VIDEO, Modality.AUDIO] and is_unstructured_index) or (
is_structured_index and media_field_types_mapping[field] in [FieldType.AudioPointer, FieldType.VideoPointer] and inferred_modality in [Modality.AUDIO, Modality.VIDEO]):

if marqo_index_model.properties.get('type') not in [
ModelType.LanguageBind] and inferred_modality not in marqo_index_model.properties.get(
'supported_modalities'):
if marqo_index_model.properties.get('type') not in [ModelType.LanguageBind]:
media_repo[doc[field]] = UnsupportedModalityError(
f"Model {marqo_index_model.name} does not support {inferred_modality}")
continue

if inferred_modality not in marqo_index_model.properties.get('supported_modalities'):
media_repo[doc[field]] = UnsupportedModalityError(
f"Model {marqo_index_model.name} does not support {inferred_modality}")
continue
Expand Down
34 changes: 33 additions & 1 deletion tests/tensor_search/test_modalities_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,4 +372,36 @@ def test_media_download_error(self, mock_infer_modality):

self.assertIn(self.mock_video_url, media_repo)
self.assertIsInstance(media_repo[self.mock_video_url], MediaDownloadError)
self.assertIn("Network error while inferring modality", str(media_repo[self.mock_video_url]))
self.assertIn("Network error while inferring modality", str(media_repo[self.mock_video_url]))

@patch("marqo.tensor_search.add_docs.download_and_chunk_media")
@patch("marqo.tensor_search.add_docs.infer_modality")
def test_audio_with_video_only_model(self, mock_infer_modality, mock_download_and_chunk):
# Set up the mock model to support only video
self.mock_model.properties["type"] = ModelType.LanguageBind
self.mock_model.properties["supported_modalities"] = [Modality.VIDEO, Modality.TEXT]
self.mock_model.name = "LanguageBind/Video_V1.5_FT"

# Test data
docs = [{"field1": self.mock_audio_url}]
media_repo = {}
tensor_fields = ["field1"]

# Mock the infer_modality function to return AUDIO
mock_infer_modality.return_value = Modality.AUDIO

# Call the function
threaded_download_and_preprocess_content(
docs, media_repo, tensor_fields, {}, device="cpu",
marqo_index_type=self.mock_marqo_index.type,
marqo_index_model=self.mock_marqo_index.model,
)

# Assertions
self.assertIn(self.mock_audio_url, media_repo)
self.assertIsInstance(media_repo[self.mock_audio_url], UnsupportedModalityError)
self.assertIn(f"Model LanguageBind/Video_V1.5_FT does not support {Modality.AUDIO}",
str(media_repo[self.mock_audio_url]))

# Verify that download_and_chunk_media was not called
mock_download_and_chunk.assert_not_called()

0 comments on commit 5c87d70

Please sign in to comment.