Skip to content

chore(deps): update starlette-context requirement from <0.4,>=0.3.6 to >=0.3.6,<0.5 #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 32 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
a08a754
feat: add streaming tool use
lsorber Dec 25, 2024
f1da6e9
fix: remove strict=True to support Python 3.9
lsorber Dec 25, 2024
e9fa51e
feat: improve tool use robustness
lsorber Jan 5, 2025
a41d866
test: skip if insufficient resources on macOS
lsorber Jan 12, 2025
72b0b51
fix: apply missing _convert_text_completion_logprobs_to_chat
lsorber Mar 14, 2025
69dd574
feat: Add Gemma3 chat handler (#1976)
kossum Mar 30, 2025
81bdaf1
resolve the image embedding issue in gemma3
kossum Apr 2, 2025
a107bd5
fix: added n_ctx check for prompt requirements when embedding images …
kossum Apr 3, 2025
0ea0d58
fix: modify the gemma3 chat template to be compatible with openai api
kossum Apr 4, 2025
77ea816
feat: Update llama.cpp
abetlen Apr 11, 2025
597b162
feat: Update llama.cpp
abetlen May 8, 2025
5f7cd9c
chore: Bump version
abetlen May 8, 2025
73ac3be
hotfix: Disable curl support
abetlen May 8, 2025
9921396
fix: add compatibility with v0.3.9 for Gemma3ChatHandler
kossum Jun 4, 2025
63fc309
feat: abstract context creation and expose for recreation
okaris Jun 20, 2025
5fcd220
feat: add usage to streamin response
okaris Jun 23, 2025
d214754
switch to llama.cpp fork and llama : expose C API to get layer device…
okaris Jun 24, 2025
aaf1777
chore: empty commit to trigger rebuild downstream
okaris Jun 24, 2025
00c1bae
c definitions
okaris Jun 24, 2025
3000f7d
chore: bump empty commit
okaris Jun 24, 2025
54691ca
change to inferencesh/llama.cpp
okaris Jun 24, 2025
1d23ae0
migrate llava to mtmd
okaris Jun 24, 2025
525255a
port kv_cache to new memory
okaris Jun 24, 2025
f65fbe7
cleanup
okaris Jun 24, 2025
8524429
fixes
okaris Jun 24, 2025
c66b6e9
migrate clip to mtmd
okaris Jun 25, 2025
646beb7
migrate clip to mtmd
okaris Jun 25, 2025
f9471b6
add general purpose function calling handler
okaris Jun 26, 2025
6f59c3e
add general purpose function calling handler
okaris Jun 26, 2025
bc3c56d
add general purpose function calling handler
okaris Jun 26, 2025
ec8e022
update llama.cpp
okaris Jun 27, 2025
7c7248a
chore(deps): update starlette-context requirement
dependabot[bot] Jun 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "vendor/llama.cpp"]
path = vendor/llama.cpp
url = https://github.com/ggerganov/llama.cpp.git
url = http://github.com/inference-sh/llama.cpp
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.3.9]

- feat: Update llama.cpp to ggerganov/llama.cpp@8733e0cf6eefc7c7752297cc22d0836706f4222c

## [0.3.8]

- feat: Update llama.cpp to ggerganov/llama.cpp@7841fc723e059d1fd9640e5c0ef19050fcc7c698
Expand Down
35 changes: 14 additions & 21 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.21)
project(llama_cpp)

option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python package" ON)
option(LLAVA_BUILD "Build llava shared library and install alongside python package" ON)
option(MTMD_BUILD "Build multimodal (mtmd) shared library and install alongside python package" ON)

function(llama_cpp_python_install_target target)
if(NOT TARGET ${target})
Expand Down Expand Up @@ -62,6 +62,9 @@ if (LLAMA_BUILD)
# Enable building of the common library
set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build llama.cpp common library" FORCE)

# Disable building curl support
set(LLAMA_CURL OFF CACHE BOOL "llama.cpp: enable curl" FORCE)

# Architecture detection and settings for Apple platforms
if (APPLE)
# Get the target architecture
Expand Down Expand Up @@ -132,7 +135,7 @@ if (LLAMA_BUILD)
)
endif()

if (LLAVA_BUILD)
if (MTMD_BUILD)
if (LLAMA_CUBLAS OR LLAMA_CUDA)
add_compile_definitions(GGML_USE_CUBLAS)
add_compile_definitions(GGML_USE_CUDA)
Expand All @@ -142,36 +145,26 @@ if (LLAMA_BUILD)
add_compile_definitions(GGML_USE_METAL)
endif()

# Building llava
add_subdirectory(vendor/llama.cpp/examples/llava)
set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
# Building multimodal support using mtmd
add_subdirectory(vendor/llama.cpp/tools/mtmd)

if (WIN32)
set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
set_target_properties(mtmd PROPERTIES CUDA_ARCHITECTURES OFF)
endif()
llama_cpp_python_install_target(llava_shared)
llama_cpp_python_install_target(mtmd)
if (WIN32)
install(
FILES $<TARGET_RUNTIME_DLLS:llava_shared>
FILES $<TARGET_RUNTIME_DLLS:mtmd>
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
)
install(
FILES $<TARGET_RUNTIME_DLLS:llava_shared>
FILES $<TARGET_RUNTIME_DLLS:mtmd>
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
)
endif()

# Fix for llava build: Add include directory for llama.h
# Move these commands after the add_subdirectory call
target_include_directories(llava PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
target_include_directories(llava PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/ggml/include)

if (BUILD_SHARED_LIBS)
target_include_directories(llava_shared PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
target_include_directories(llava_shared PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/ggml/include)
endif()

target_include_directories(llama-llava-cli PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
target_include_directories(llama-minicpmv-cli PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
# Add include directories for mtmd
target_include_directories(mtmd PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
target_include_directories(mtmd PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/ggml/include)
endif()
endif()
2 changes: 1 addition & 1 deletion examples/notebooks/Batching.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@
"outputs": [],
"source": [
"for i in range(n_parallel):\n",
" llama_cpp.llama_kv_cache_seq_cp(ctx, 0, i, 0, batch.n_tokens)"
" llama_cpp.llama_kv_self_seq_cp(ctx, 0, i, 0, batch.n_tokens)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion llama_cpp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .llama_cpp import *
from .llama import *

__version__ = "0.3.8"
__version__ = "0.3.9"
2 changes: 1 addition & 1 deletion llama_cpp/_ctypes_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,4 @@ def _byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCD
...


byref = _byref if TYPE_CHECKING else ctypes.byref
byref = _byref if TYPE_CHECKING else ctypes.byref
35 changes: 24 additions & 11 deletions llama_cpp/_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import os
import ctypes
from enum import Enum

from typing import (
Dict,
Expand All @@ -24,7 +25,13 @@


# Python wrappers over llama.h structs

class LlamaBackendDev(Enum):
# CPU device using system memory
CPU = 0
# GPU device using dedicated memory
GPU = 1
# accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
ACCEL = 2

class LlamaModel:
"""Intermediate Python wrapper for a llama.cpp llama_model.
Expand Down Expand Up @@ -88,6 +95,12 @@ def n_ctx_train(self) -> int:

def n_embd(self) -> int:
return llama_cpp.llama_n_embd(self.model)

def n_layer(self) -> int:
return llama_cpp.llama_n_layer(self.model)

def dev_layer(self, il: int) -> LlamaBackendDev:
return LlamaBackendDev(llama_cpp.llama_model_dev_layer(self.model, il))

def rope_freq_scale_train(self) -> float:
return llama_cpp.llama_model_rope_freq_scale_train(self.model)
Expand Down Expand Up @@ -276,20 +289,20 @@ def n_ctx(self) -> int:
def pooling_type(self) -> int:
return llama_cpp.llama_pooling_type(self.ctx)

def kv_cache_clear(self):
llama_cpp.llama_kv_cache_clear(self.ctx)
def kv_self_clear(self):
llama_cpp.llama_kv_self_clear(self.ctx)

def kv_cache_seq_rm(self, seq_id: int, p0: int, p1: int):
llama_cpp.llama_kv_cache_seq_rm(self.ctx, seq_id, p0, p1)
def kv_self_seq_rm(self, seq_id: int, p0: int, p1: int):
llama_cpp.llama_kv_self_seq_rm(self.ctx, seq_id, p0, p1)

def kv_cache_seq_cp(self, seq_id_src: int, seq_id_dst: int, p0: int, p1: int):
llama_cpp.llama_kv_cache_seq_cp(self.ctx, seq_id_src, seq_id_dst, p0, p1)
def kv_self_seq_cp(self, seq_id_src: int, seq_id_dst: int, p0: int, p1: int):
llama_cpp.llama_kv_self_seq_cp(self.ctx, seq_id_src, seq_id_dst, p0, p1)

def kv_cache_seq_keep(self, seq_id: int):
llama_cpp.llama_kv_cache_seq_keep(self.ctx, seq_id)
def kv_self_seq_keep(self, seq_id: int):
llama_cpp.llama_kv_self_seq_keep(self.ctx, seq_id)

def kv_cache_seq_shift(self, seq_id: int, p0: int, p1: int, shift: int):
llama_cpp.llama_kv_cache_seq_add(self.ctx, seq_id, p0, p1, shift)
def kv_self_seq_shift(self, seq_id: int, p0: int, p1: int, shift: int):
llama_cpp.llama_kv_self_seq_add(self.ctx, seq_id, p0, p1, shift)

def get_state_size(self) -> int:
return llama_cpp.llama_get_state_size(self.ctx)
Expand Down
Loading
Loading