From c18d427f5249ec371e647d3bcfacc6988028b876 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?=
 <852750+sozercan@users.noreply.github.com>
Date: Sat, 7 Sep 2024 12:41:36 -0700
Subject: [PATCH] feat: add phi3.5 model (#374)

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/patch-models.yaml           |  2 +-
 .github/workflows/update-models-self.yaml     |  5 ++--
 .github/workflows/update-models.yaml          |  9 ++++---
 README.md                                     |  4 +--
 models/codestral-22b.yaml                     |  1 -
 models/mixtral-8x7b-instruct.yaml             |  1 -
 ...3-3.8b.yaml => phi-3.5-3.8b-instruct.yaml} | 12 ++++-----
 scripts/parse-models.sh                       | 10 +++----
 website/docs/premade-models.md                | 26 ++++++++++---------
 9 files changed, 34 insertions(+), 36 deletions(-)
 rename models/{phi-3-3.8b.yaml => phi-3.5-3.8b-instruct.yaml} (65%)

diff --git a/.github/workflows/patch-models.yaml b/.github/workflows/patch-models.yaml
index a989a5ca..a5e1d728 100644
--- a/.github/workflows/patch-models.yaml
+++ b/.github/workflows/patch-models.yaml
@@ -21,7 +21,7 @@ jobs:
               - ghcr.io/sozercan/llama3.1:8b
               - ghcr.io/sozercan/llama3.1:70b
               - ghcr.io/sozercan/mixtral:8x7b
-              - ghcr.io/sozercan/phi3:3.8b
+              - ghcr.io/sozercan/phi3.5:3.8b
               - ghcr.io/sozercan/gemma2:2b
               - ghcr.io/sozercan/codestral:22b
               - ghcr.io/sozercan/flux1:dev
diff --git a/.github/workflows/update-models-self.yaml b/.github/workflows/update-models-self.yaml
index 7ea874b9..b718484e 100644
--- a/.github/workflows/update-models-self.yaml
+++ b/.github/workflows/update-models-self.yaml
@@ -23,7 +23,6 @@ jobs:
          - llama-3.1-70b-instruct
          - mixtral-8x7b-instruct
          - codestral-22b
-         - flux-1-dev
     runs-on: self-hosted
     timeout-minutes: 360
     steps:
@@ -50,8 +49,8 @@ jobs:
 
       - name: parse matrix
         run: |
-          echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
-          echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+-([0-9]+\.?[0-9]*b).*/\1/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
+          echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^phi-(3\.5)-([0-9]+\.?[0-9]*b)-.*/phi\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
+          echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+(\.[0-9]+)?-([0-9]+\.?[0-9]*b).*/\2/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
           echo "MODEL_TYPE=-$(echo ${{ matrix.model }} | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p')" >> $GITHUB_ENV
 
       - name: Build and push
diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml
index a85d8f62..376402a8 100644
--- a/.github/workflows/update-models.yaml
+++ b/.github/workflows/update-models.yaml
@@ -21,8 +21,9 @@ jobs:
       matrix:
         model:
          - llama-3.1-8b-instruct
-         - phi-3-3.8b
+         - phi-3.5-3.8b-instruct
          - gemma-2-2b-instruct
+         - flux-1-dev
     runs-on: ubuntu-latest
     timeout-minutes: 360
     steps:
@@ -76,9 +77,9 @@ jobs:
 
       - name: parse matrix
         run: |
-          echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
-          echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+-([0-9]+\.?[0-9]*b).*/\1/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
-          echo "MODEL_TYPE=-$(echo ${{ matrix.model }} | sed -n -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p')" >> $GITHUB_ENV
+          echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^phi-(3\.5)-([0-9]+\.?[0-9]*b)-.*/phi\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
+          echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+(\.[0-9]+)?-([0-9]+\.?[0-9]*b).*/\2/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
+          echo "MODEL_TYPE=-$(echo ${{ matrix.model }} | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p')" >> $GITHUB_ENV
 
       - name: Build and push
         run: |
diff --git a/README.md b/README.md
index 0dc2cd8b..4aa7e1ab 100644
--- a/README.md
+++ b/README.md
@@ -86,7 +86,7 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🦙 Llama 3.1     | Instruct     | 8B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:8b`   | `llama-3.1-8b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                         |
 | 🦙 Llama 3.1     | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                         |  |
 | Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                           |
-| 🅿️ Phi 3         | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`     | `phi-3-3.8b`             | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
+| 🅿️ Phi 3.5       | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`   | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) |
 | 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                          |
 | ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                     |
 
@@ -101,7 +101,7 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🦙 Llama 3.1     | Instruct      | 8B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:8b`   | `llama-3.1-8b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                                                                 |
 | 🦙 Llama 3.1     | Instruct      | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                                                                 |  |
 | Ⓜ️ Mixtral       | Instruct      | 8x7B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                                                                   |
-| 🅿️ Phi 3         | Instruct      | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`     | `phi-3-3.8b`             | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE)                                         |
+| 🅿️ Phi 3.5       | Instruct      | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`              | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE)                                         |
 | 🔡 Gemma 2       | Instruct      | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                                                                  |
 | ⌨️ Codestral 0.1 | Code          | 22B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                                                             |
 | ⌨️ Flux 1 Dev    | Text to image | 12B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev`     | `flux-1-dev`             | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) |
diff --git a/models/codestral-22b.yaml b/models/codestral-22b.yaml
index 55d779ea..e68ddb64 100644
--- a/models/codestral-22b.yaml
+++ b/models/codestral-22b.yaml
@@ -25,6 +25,5 @@ config: |
      - \"[PREFIX]\"
      - \"[MIDDLE]\"
      - \"[SUFFIX]\"
-    gpu_layers: 50
     f16: true
     mmap: true
diff --git a/models/mixtral-8x7b-instruct.yaml b/models/mixtral-8x7b-instruct.yaml
index 7c45b4e4..6c100382 100644
--- a/models/mixtral-8x7b-instruct.yaml
+++ b/models/mixtral-8x7b-instruct.yaml
@@ -12,6 +12,5 @@ config: |
     parameters:
       model: mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf
     context_size: 4096
-    gpu_layers: 15
     f16: true
     mmap: true
diff --git a/models/phi-3-3.8b.yaml b/models/phi-3.5-3.8b-instruct.yaml
similarity index 65%
rename from models/phi-3-3.8b.yaml
rename to models/phi-3.5-3.8b-instruct.yaml
index 5a0cda94..4ffe48d9 100644
--- a/models/phi-3-3.8b.yaml
+++ b/models/phi-3.5-3.8b-instruct.yaml
@@ -3,9 +3,9 @@ apiVersion: v1alpha1
 debug: true
 runtime: cuda
 models:
-  - name: phi-3-3.8b
-    source: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf
-    sha256: 8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef
+  - name: phi-3.5-3.8b-instruct
+    source: https://huggingface.co/MaziyarPanahi/Phi-3.5-mini-instruct-GGUF/resolve/main/Phi-3.5-mini-instruct.Q4_K_M.gguf
+    sha256: 3f68916e850b107d8641d18bcd5548f0d66beef9e0a9077fe84ef28943eb7e88
     promptTemplates:
       - name: chatMsg
         template: |
@@ -19,10 +19,10 @@ models:
         template: |
           {{.Input}}
 config: |
-  - name: phi-3-3.8b
+  - name: phi-3.5-3.8b-instruct
     backend: llama
     parameters:
-      model: Phi-3-mini-4k-instruct-q4.gguf
+      model: Phi-3.5-mini-instruct.Q4_K_M.gguf
     context_size: 4096
     template:
       chat_message: chatMsg
@@ -32,7 +32,5 @@ config: |
     - <|user|>
     - <|assistant|>
     - <|end|>
-    - <|endoftext|>
-    gpu_layers: 33
     f16: true
     mmap: true
diff --git a/scripts/parse-models.sh b/scripts/parse-models.sh
index cdcdcba9..55363e81 100755
--- a/scripts/parse-models.sh
+++ b/scripts/parse-models.sh
@@ -2,13 +2,13 @@
 
 # Define the functions to extract each part
 extract_model_name() {
-    # Capture the base name, handling the special cases for llama-3.1 and flux-1-dev
-    echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/'
+    # Capture the base name, handling the special cases for llama-3.1, flux-1-dev, and phi-3.5
+    echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^phi-(3\.5)-([0-9]+\.?[0-9]*b)-.*/phi\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/'
 }
 
 extract_model_size() {
     # Capture the size part, handling the special cases for llama-3.1 and flux-1-dev
-    echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+-([0-9]+\.?[0-9]*b).*/\1/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/'
+    echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+(\.[0-9]+)?-([0-9]+\.?[0-9]*b).*/\2/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/'
 }
 
 extract_model_type() {
@@ -16,8 +16,8 @@ extract_model_type() {
     echo "$1" | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p'
 }
 
-# Run and display results for each example
-for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev"; do
+# Run and display results for each example, including phi-3.5-3.8b-instruct
+for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "phi-3.5-3.8b-instruct" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev"; do
     echo "Model: $MODEL"
     echo "  Name: $(extract_model_name "$MODEL")"
     echo "  Size: $(extract_model_size "$MODEL")"
diff --git a/website/docs/premade-models.md b/website/docs/premade-models.md
index 3a135d4a..491ac791 100644
--- a/website/docs/premade-models.md
+++ b/website/docs/premade-models.md
@@ -13,7 +13,7 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🦙 Llama 3.1     | Instruct     | 8B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:8b`   | `llama-3.1-8b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                         |
 | 🦙 Llama 3.1     | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                         |
 | Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                           |
-| 🅿️ Phi 3         | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`     | `phi-3-3.8b`             | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
+| 🅿️ Phi 3.5       | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`   | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) |
 | 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                          |
 | ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                     |
 
@@ -24,7 +24,7 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🦙 Llama 3.1     | Instruct      | 8B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:8b`   | `llama-3.1-8b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                                                                 |
 | 🦙 Llama 3.1     | Instruct      | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                                                                 |  |
 | Ⓜ️ Mixtral       | Instruct      | 8x7B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                                                                   |
-| 🅿️ Phi 3         | Instruct      | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`     | `phi-3-3.8b`             | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE)                                         |
+| 🅿️ Phi 3.5       | Instruct      | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`   | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE)                                         |
 | 🔡 Gemma 2       | Instruct      | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                                                                  |
 | ⌨️ Codestral 0.1 | Code          | 22B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                                                             |
 | ⌨️ Flux 1 Dev    | Text to image | 12B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev`     | `flux-1-dev`             | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) |
@@ -49,11 +49,12 @@ If you need to use these specific models, you can always [create your own images
 | ----------- | ------------ | ---------- | ------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
 | 🐬 Orca 2    |              | 13B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/orca2:13b`  | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) |
 | 🅿️ Phi 2     | Instruct     | 2.7B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi2:2.7b`  | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE)                  |
-| 🦙 Llama 3   | Instruct     | 8B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:8b`  | `llama-3-8b-instruct`                                                               | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 3   | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct`                                                              | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 2   | Chat         | 7B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:7b`  | `llama-2-7b-chat`                                                                   | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 2   | Chat         | 13B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat`                                                                  | [Llama](https://ai.meta.com/llama/license/) |
-| 🔡 Gemma 1.1 | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma:2b`   | `gemma-2b-instruct`                                                                 | [Gemma](https://ai.google.dev/gemma/terms)  |
+| 🅿️ Phi 3     | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`  | `phi-3-3.8b`                                                                        | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
+| 🦙 Llama 3   | Instruct     | 8B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:8b`  | `llama-3-8b-instruct`                                                               | [Llama](https://ai.meta.com/llama/license/)                                         |
+| 🦙 Llama 3   | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct`                                                              | [Llama](https://ai.meta.com/llama/license/)                                         |
+| 🦙 Llama 2   | Chat         | 7B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:7b`  | `llama-2-7b-chat`                                                                   | [Llama](https://ai.meta.com/llama/license/)                                         |
+| 🦙 Llama 2   | Chat         | 13B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat`                                                                  | [Llama](https://ai.meta.com/llama/license/)                                         |
+| 🔡 Gemma 1.1 | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma:2b`   | `gemma-2b-instruct`                                                                 | [Gemma](https://ai.google.dev/gemma/terms)                                          |
 
 
 ### NVIDIA CUDA
@@ -62,8 +63,9 @@ If you need to use these specific models, you can always [create your own images
 | ----------- | ------------ | ---------- | ---------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
 | 🐬 Orca 2    |              | 13B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/orca2:13b-cuda` | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) |
 | 🅿️ Phi 2     | Instruct     | 2.7B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi2:2.7b-cuda` | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE)                  |
-| 🦙 Llama 3   | Instruct     | 8B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:8b`      | `llama-3-8b-instruct`                                                               | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 3   | Instruct     | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:70b`     | `llama-3-70b-instruct`                                                              | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 2   | Chat         | 7B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:7b`      | `llama-2-7b-chat`                                                                   | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 2   | Chat         | 13B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:13b`     | `llama-2-13b-chat`                                                                  | [Llama](https://ai.meta.com/llama/license/) |
-| 🔡 Gemma 1.1 | Instruct     | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma:2b`       | `gemma-2b-instruct`                                                                 | [Gemma](https://ai.google.dev/gemma/terms)  |
+| 🅿️ Phi 3     | Instruct     | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`      | `phi-3-3.8b`                                                                        | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
+| 🦙 Llama 3   | Instruct     | 8B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:8b`      | `llama-3-8b-instruct`                                                               | [Llama](https://ai.meta.com/llama/license/)                                         |
+| 🦙 Llama 3   | Instruct     | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:70b`     | `llama-3-70b-instruct`                                                              | [Llama](https://ai.meta.com/llama/license/)                                         |
+| 🦙 Llama 2   | Chat         | 7B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:7b`      | `llama-2-7b-chat`                                                                   | [Llama](https://ai.meta.com/llama/license/)                                         |
+| 🦙 Llama 2   | Chat         | 13B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:13b`     | `llama-2-13b-chat`                                                                  | [Llama](https://ai.meta.com/llama/license/)                                         |
+| 🔡 Gemma 1.1 | Instruct     | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma:2b`       | `gemma-2b-instruct`                                                                 | [Gemma](https://ai.google.dev/gemma/terms)                                          |