From d4cfcd9137ed32a852cbf51bd4fbcd86e37295d1 Mon Sep 17 00:00:00 2001 From: Yifan Mai Date: Mon, 26 Feb 2024 14:07:14 -0800 Subject: [PATCH] Add Mistral Small and Mistral Large (#2399) --- src/helm/config/model_deployments.yaml | 18 ++++++++++++++++-- src/helm/config/model_metadata.yaml | 25 ++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml index 4e5b71c789..a2583ab126 100644 --- a/src/helm/config/model_deployments.yaml +++ b/src/helm/config/model_deployments.yaml @@ -938,8 +938,22 @@ model_deployments: args: mistral_model: "mistral-tiny" - - name: mistralai/mistral-medium - model_name: mistralai/mistral-medium + - name: mistralai/mistral-small-2402 + model_name: mistralai/mistral-small-2402 + tokenizer_name: mistralai/Mistral-7B-v0.1 + max_sequence_length: 32000 + client_spec: + class_name: "helm.proxy.clients.mistral_client.MistralAIClient" + + - name: mistralai/mistral-medium-2312 + model_name: mistralai/mistral-medium-2312 + tokenizer_name: mistralai/Mistral-7B-v0.1 + max_sequence_length: 32000 + client_spec: + class_name: "helm.proxy.clients.mistral_client.MistralAIClient" + + - name: mistralai/mistral-large-2402 + model_name: mistralai/mistral-large-2402 tokenizer_name: mistralai/Mistral-7B-v0.1 max_sequence_length: 32000 client_spec: diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml index 6e1504b41d..221f89c91a 100644 --- a/src/helm/config/model_metadata.yaml +++ b/src/helm/config/model_metadata.yaml @@ -1205,14 +1205,33 @@ models: release_date: 2023-10-16 tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG] - - name: mistralai/mistral-medium - display_name: Mistral medium + - name: mistralai/mistral-small-2402 + display_name: Mistral Small (2402) + # TODO: Fill in description + description: TBD + creator_organization_name: Mistral AI + access: closed + # Blog post: https://mistral.ai/news/mistral-large/ + release_date: 2023-02-26 + tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + + - name: mistralai/mistral-medium-2312 + display_name: Mistral Medium (2312) description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). creator_organization_name: Mistral AI access: closed release_date: 2023-12-11 - tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + - name: mistralai/mistral-large-2402 + display_name: Mistral Large + # TODO: Fill in description + description: TBD + creator_organization_name: Mistral AI + access: closed + # Blog post: https://mistral.ai/news/mistral-large/ + release_date: 2023-02-26 + tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] # MosaicML