From c18d427f5249ec371e647d3bcfacc6988028b876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?= <852750+sozercan@users.noreply.github.com> Date: Sat, 7 Sep 2024 12:41:36 -0700 Subject: [PATCH] feat: add phi3.5 model (#374) Signed-off-by: Sertac Ozercan --- .github/workflows/patch-models.yaml | 2 +- .github/workflows/update-models-self.yaml | 5 ++-- .github/workflows/update-models.yaml | 9 ++++--- README.md | 4 +-- models/codestral-22b.yaml | 1 - models/mixtral-8x7b-instruct.yaml | 1 - ...3-3.8b.yaml => phi-3.5-3.8b-instruct.yaml} | 12 ++++----- scripts/parse-models.sh | 10 +++---- website/docs/premade-models.md | 26 ++++++++++--------- 9 files changed, 34 insertions(+), 36 deletions(-) rename models/{phi-3-3.8b.yaml => phi-3.5-3.8b-instruct.yaml} (65%) diff --git a/.github/workflows/patch-models.yaml b/.github/workflows/patch-models.yaml index a989a5ca..a5e1d728 100644 --- a/.github/workflows/patch-models.yaml +++ b/.github/workflows/patch-models.yaml @@ -21,7 +21,7 @@ jobs: - ghcr.io/sozercan/llama3.1:8b - ghcr.io/sozercan/llama3.1:70b - ghcr.io/sozercan/mixtral:8x7b - - ghcr.io/sozercan/phi3:3.8b + - ghcr.io/sozercan/phi3.5:3.8b - ghcr.io/sozercan/gemma2:2b - ghcr.io/sozercan/codestral:22b - ghcr.io/sozercan/flux1:dev diff --git a/.github/workflows/update-models-self.yaml b/.github/workflows/update-models-self.yaml index 7ea874b9..b718484e 100644 --- a/.github/workflows/update-models-self.yaml +++ b/.github/workflows/update-models-self.yaml @@ -23,7 +23,6 @@ jobs: - llama-3.1-70b-instruct - mixtral-8x7b-instruct - codestral-22b - - flux-1-dev runs-on: self-hosted timeout-minutes: 360 steps: @@ -50,8 +49,8 @@ jobs: - name: parse matrix run: | - echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV - echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+-([0-9]+\.?[0-9]*b).*/\1/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV + echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^phi-(3\.5)-([0-9]+\.?[0-9]*b)-.*/phi\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV + echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+(\.[0-9]+)?-([0-9]+\.?[0-9]*b).*/\2/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV echo "MODEL_TYPE=-$(echo ${{ matrix.model }} | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p')" >> $GITHUB_ENV - name: Build and push diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml index a85d8f62..376402a8 100644 --- a/.github/workflows/update-models.yaml +++ b/.github/workflows/update-models.yaml @@ -21,8 +21,9 @@ jobs: matrix: model: - llama-3.1-8b-instruct - - phi-3-3.8b + - phi-3.5-3.8b-instruct - gemma-2-2b-instruct + - flux-1-dev runs-on: ubuntu-latest timeout-minutes: 360 steps: @@ -76,9 +77,9 @@ jobs: - name: parse matrix run: | - echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV - echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+-([0-9]+\.?[0-9]*b).*/\1/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV - echo "MODEL_TYPE=-$(echo ${{ matrix.model }} | sed -n -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p')" >> $GITHUB_ENV + echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^phi-(3\.5)-([0-9]+\.?[0-9]*b)-.*/phi\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV + echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+(\.[0-9]+)?-([0-9]+\.?[0-9]*b).*/\2/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV + echo "MODEL_TYPE=-$(echo ${{ matrix.model }} | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p')" >> $GITHUB_ENV - name: Build and push run: | diff --git a/README.md b/README.md index 0dc2cd8b..4aa7e1ab 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ If it doesn't include a specific model, you can always [create your own images]( | 🦙 Llama 3.1 | Instruct | 8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:8b` | `llama-3.1-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | 🦙 Llama 3.1 | Instruct | 70B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b` | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | | Ⓜ️ Mixtral | Instruct | 8x7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b` | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/) | -| 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) | +| 🅿️ Phi 3.5 | Instruct | 3.8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) | | 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | @@ -101,7 +101,7 @@ If it doesn't include a specific model, you can always [create your own images]( | 🦙 Llama 3.1 | Instruct | 8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:8b` | `llama-3.1-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | 🦙 Llama 3.1 | Instruct | 70B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b` | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | | Ⓜ️ Mixtral | Instruct | 8x7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b` | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/) | -| 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) | +| 🅿️ Phi 3.5 | Instruct | 3.8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) | | 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | | ⌨️ Flux 1 Dev | Text to image | 12B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev` | `flux-1-dev` | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) | diff --git a/models/codestral-22b.yaml b/models/codestral-22b.yaml index 55d779ea..e68ddb64 100644 --- a/models/codestral-22b.yaml +++ b/models/codestral-22b.yaml @@ -25,6 +25,5 @@ config: | - \"[PREFIX]\" - \"[MIDDLE]\" - \"[SUFFIX]\" - gpu_layers: 50 f16: true mmap: true diff --git a/models/mixtral-8x7b-instruct.yaml b/models/mixtral-8x7b-instruct.yaml index 7c45b4e4..6c100382 100644 --- a/models/mixtral-8x7b-instruct.yaml +++ b/models/mixtral-8x7b-instruct.yaml @@ -12,6 +12,5 @@ config: | parameters: model: mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf context_size: 4096 - gpu_layers: 15 f16: true mmap: true diff --git a/models/phi-3-3.8b.yaml b/models/phi-3.5-3.8b-instruct.yaml similarity index 65% rename from models/phi-3-3.8b.yaml rename to models/phi-3.5-3.8b-instruct.yaml index 5a0cda94..4ffe48d9 100644 --- a/models/phi-3-3.8b.yaml +++ b/models/phi-3.5-3.8b-instruct.yaml @@ -3,9 +3,9 @@ apiVersion: v1alpha1 debug: true runtime: cuda models: - - name: phi-3-3.8b - source: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf - sha256: 8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef + - name: phi-3.5-3.8b-instruct + source: https://huggingface.co/MaziyarPanahi/Phi-3.5-mini-instruct-GGUF/resolve/main/Phi-3.5-mini-instruct.Q4_K_M.gguf + sha256: 3f68916e850b107d8641d18bcd5548f0d66beef9e0a9077fe84ef28943eb7e88 promptTemplates: - name: chatMsg template: | @@ -19,10 +19,10 @@ models: template: | {{.Input}} config: | - - name: phi-3-3.8b + - name: phi-3.5-3.8b-instruct backend: llama parameters: - model: Phi-3-mini-4k-instruct-q4.gguf + model: Phi-3.5-mini-instruct.Q4_K_M.gguf context_size: 4096 template: chat_message: chatMsg @@ -32,7 +32,5 @@ config: | - <|user|> - <|assistant|> - <|end|> - - <|endoftext|> - gpu_layers: 33 f16: true mmap: true diff --git a/scripts/parse-models.sh b/scripts/parse-models.sh index cdcdcba9..55363e81 100755 --- a/scripts/parse-models.sh +++ b/scripts/parse-models.sh @@ -2,13 +2,13 @@ # Define the functions to extract each part extract_model_name() { - # Capture the base name, handling the special cases for llama-3.1 and flux-1-dev - echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/' + # Capture the base name, handling the special cases for llama-3.1, flux-1-dev, and phi-3.5 + echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^phi-(3\.5)-([0-9]+\.?[0-9]*b)-.*/phi\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/' } extract_model_size() { # Capture the size part, handling the special cases for llama-3.1 and flux-1-dev - echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+-([0-9]+\.?[0-9]*b).*/\1/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/' + echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+(\.[0-9]+)?-([0-9]+\.?[0-9]*b).*/\2/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/' } extract_model_type() { @@ -16,8 +16,8 @@ extract_model_type() { echo "$1" | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p' } -# Run and display results for each example -for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev"; do +# Run and display results for each example, including phi-3.5-3.8b-instruct +for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "phi-3.5-3.8b-instruct" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev"; do echo "Model: $MODEL" echo " Name: $(extract_model_name "$MODEL")" echo " Size: $(extract_model_size "$MODEL")" diff --git a/website/docs/premade-models.md b/website/docs/premade-models.md index 3a135d4a..491ac791 100644 --- a/website/docs/premade-models.md +++ b/website/docs/premade-models.md @@ -13,7 +13,7 @@ If it doesn't include a specific model, you can always [create your own images]( | 🦙 Llama 3.1 | Instruct | 8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:8b` | `llama-3.1-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | 🦙 Llama 3.1 | Instruct | 70B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b` | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | Ⓜ️ Mixtral | Instruct | 8x7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b` | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/) | -| 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) | +| 🅿️ Phi 3.5 | Instruct | 3.8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) | | 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | @@ -24,7 +24,7 @@ If it doesn't include a specific model, you can always [create your own images]( | 🦙 Llama 3.1 | Instruct | 8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:8b` | `llama-3.1-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | 🦙 Llama 3.1 | Instruct | 70B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b` | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | | Ⓜ️ Mixtral | Instruct | 8x7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b` | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/) | -| 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) | +| 🅿️ Phi 3.5 | Instruct | 3.8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) | | 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | | ⌨️ Flux 1 Dev | Text to image | 12B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev` | `flux-1-dev` | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) | @@ -49,11 +49,12 @@ If you need to use these specific models, you can always [create your own images | ----------- | ------------ | ---------- | ------------------------------------------------------------- | ----------------------------------------------------------------------------------- | | 🐬 Orca 2 | | 13B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/orca2:13b` | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) | | 🅿️ Phi 2 | Instruct | 2.7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi2:2.7b` | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE) | -| 🦙 Llama 3 | Instruct | 8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:8b` | `llama-3-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 3 | Instruct | 70B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 2 | Chat | 7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:7b` | `llama-2-7b-chat` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 2 | Chat | 13B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat` | [Llama](https://ai.meta.com/llama/license/) | -| 🔡 Gemma 1.1 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma:2b` | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | +| 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) | +| 🦙 Llama 3 | Instruct | 8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:8b` | `llama-3-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 3 | Instruct | 70B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 2 | Chat | 7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:7b` | `llama-2-7b-chat` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 2 | Chat | 13B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat` | [Llama](https://ai.meta.com/llama/license/) | +| 🔡 Gemma 1.1 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma:2b` | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | ### NVIDIA CUDA @@ -62,8 +63,9 @@ If you need to use these specific models, you can always [create your own images | ----------- | ------------ | ---------- | ---------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | | 🐬 Orca 2 | | 13B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/orca2:13b-cuda` | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) | | 🅿️ Phi 2 | Instruct | 2.7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi2:2.7b-cuda` | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE) | -| 🦙 Llama 3 | Instruct | 8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:8b` | `llama-3-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 3 | Instruct | 70B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 2 | Chat | 7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:7b` | `llama-2-7b-chat` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 2 | Chat | 13B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat` | [Llama](https://ai.meta.com/llama/license/) | -| 🔡 Gemma 1.1 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma:2b` | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | +| 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) | +| 🦙 Llama 3 | Instruct | 8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:8b` | `llama-3-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 3 | Instruct | 70B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 2 | Chat | 7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:7b` | `llama-2-7b-chat` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 2 | Chat | 13B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat` | [Llama](https://ai.meta.com/llama/license/) | +| 🔡 Gemma 1.1 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma:2b` | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) |