Skip to content

Commit

Permalink
feat: add phi3.5 model (#374)
Browse files Browse the repository at this point in the history
Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
  • Loading branch information
sozercan committed Sep 7, 2024
1 parent b2d6722 commit c18d427
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 36 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/patch-models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- ghcr.io/sozercan/llama3.1:8b
- ghcr.io/sozercan/llama3.1:70b
- ghcr.io/sozercan/mixtral:8x7b
- ghcr.io/sozercan/phi3:3.8b
- ghcr.io/sozercan/phi3.5:3.8b
- ghcr.io/sozercan/gemma2:2b
- ghcr.io/sozercan/codestral:22b
- ghcr.io/sozercan/flux1:dev
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/update-models-self.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ jobs:
- llama-3.1-70b-instruct
- mixtral-8x7b-instruct
- codestral-22b
- flux-1-dev
runs-on: self-hosted
timeout-minutes: 360
steps:
Expand All @@ -50,8 +49,8 @@ jobs:

- name: parse matrix
run: |
echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+-([0-9]+\.?[0-9]*b).*/\1/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^phi-(3\.5)-([0-9]+\.?[0-9]*b)-.*/phi\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+(\.[0-9]+)?-([0-9]+\.?[0-9]*b).*/\2/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
echo "MODEL_TYPE=-$(echo ${{ matrix.model }} | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p')" >> $GITHUB_ENV
- name: Build and push
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/update-models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ jobs:
matrix:
model:
- llama-3.1-8b-instruct
- phi-3-3.8b
- phi-3.5-3.8b-instruct
- gemma-2-2b-instruct
- flux-1-dev
runs-on: ubuntu-latest
timeout-minutes: 360
steps:
Expand Down Expand Up @@ -76,9 +77,9 @@ jobs:

- name: parse matrix
run: |
echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+-([0-9]+\.?[0-9]*b).*/\1/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
echo "MODEL_TYPE=-$(echo ${{ matrix.model }} | sed -n -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p')" >> $GITHUB_ENV
echo "MODEL_NAME=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^phi-(3\.5)-([0-9]+\.?[0-9]*b)-.*/phi\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
echo "MODEL_SIZE=$(echo ${{ matrix.model }} | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+(\.[0-9]+)?-([0-9]+\.?[0-9]*b).*/\2/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/')" >> $GITHUB_ENV
echo "MODEL_TYPE=-$(echo ${{ matrix.model }} | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p')" >> $GITHUB_ENV
- name: Build and push
run: |
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ If it doesn't include a specific model, you can always [create your own images](
| 🦙 Llama 3.1 | Instruct | 8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:8b` | `llama-3.1-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) |
| 🦙 Llama 3.1 | Instruct | 70B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b` | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | |
| Ⓜ️ Mixtral | Instruct | 8x7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b` | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/) |
| 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
| 🅿️ Phi 3.5 | Instruct | 3.8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) |
| 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) |
| ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) |

Expand All @@ -101,7 +101,7 @@ If it doesn't include a specific model, you can always [create your own images](
| 🦙 Llama 3.1 | Instruct | 8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:8b` | `llama-3.1-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) |
| 🦙 Llama 3.1 | Instruct | 70B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b` | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | |
| Ⓜ️ Mixtral | Instruct | 8x7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b` | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/) |
| 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
| 🅿️ Phi 3.5 | Instruct | 3.8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) |
| 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) |
| ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) |
| ⌨️ Flux 1 Dev | Text to image | 12B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev` | `flux-1-dev` | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) |
Expand Down
1 change: 0 additions & 1 deletion models/codestral-22b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,5 @@ config: |
- \"[PREFIX]\"
- \"[MIDDLE]\"
- \"[SUFFIX]\"
gpu_layers: 50
f16: true
mmap: true
1 change: 0 additions & 1 deletion models/mixtral-8x7b-instruct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,5 @@ config: |
parameters:
model: mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf
context_size: 4096
gpu_layers: 15
f16: true
mmap: true
12 changes: 5 additions & 7 deletions models/phi-3-3.8b.yaml → models/phi-3.5-3.8b-instruct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ apiVersion: v1alpha1
debug: true
runtime: cuda
models:
- name: phi-3-3.8b
source: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf
sha256: 8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef
- name: phi-3.5-3.8b-instruct
source: https://huggingface.co/MaziyarPanahi/Phi-3.5-mini-instruct-GGUF/resolve/main/Phi-3.5-mini-instruct.Q4_K_M.gguf
sha256: 3f68916e850b107d8641d18bcd5548f0d66beef9e0a9077fe84ef28943eb7e88
promptTemplates:
- name: chatMsg
template: |
Expand All @@ -19,10 +19,10 @@ models:
template: |
{{.Input}}
config: |
- name: phi-3-3.8b
- name: phi-3.5-3.8b-instruct
backend: llama
parameters:
model: Phi-3-mini-4k-instruct-q4.gguf
model: Phi-3.5-mini-instruct.Q4_K_M.gguf
context_size: 4096
template:
chat_message: chatMsg
Expand All @@ -32,7 +32,5 @@ config: |
- <|user|>
- <|assistant|>
- <|end|>
- <|endoftext|>
gpu_layers: 33
f16: true
mmap: true
10 changes: 5 additions & 5 deletions scripts/parse-models.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,22 @@

# Define the functions to extract each part
extract_model_name() {
# Capture the base name, handling the special cases for llama-3.1 and flux-1-dev
echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/'
# Capture the base name, handling the special cases for llama-3.1, flux-1-dev, and phi-3.5
echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; s/^flux-([0-9]+)-dev$/flux\1/;t; s/^phi-(3\.5)-([0-9]+\.?[0-9]*b)-.*/phi\1/;t; s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^([a-z]+)-([0-9]+)-.*/\1\2/; s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/'
}

extract_model_size() {
# Capture the size part, handling the special cases for llama-3.1 and flux-1-dev
echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+-([0-9]+\.?[0-9]*b).*/\1/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/'
echo "$1" | sed -E 's/^llama-(3\.1)-([0-9]+\.?[0-9]*b)-.*/\2/;t; s/^flux-[0-9]+-dev$/dev/;t; s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; s/^[a-z]+-[0-9]+(\.[0-9]+)?-([0-9]+\.?[0-9]*b).*/\2/; s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/'
}

extract_model_type() {
# Capture the type part if present, otherwise return an empty string, handling the special case for flux-1-dev
echo "$1" | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p'
}

# Run and display results for each example
for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev"; do
# Run and display results for each example, including phi-3.5-3.8b-instruct
for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "phi-3.5-3.8b-instruct" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev"; do
echo "Model: $MODEL"
echo " Name: $(extract_model_name "$MODEL")"
echo " Size: $(extract_model_size "$MODEL")"
Expand Down
Loading

0 comments on commit c18d427

Please sign in to comment.