From f99320933b160eecff1cc2a8a12718be7a31a3c4 Mon Sep 17 00:00:00 2001 From: Xu Song Date: Sun, 25 Aug 2024 09:24:38 +0800 Subject: [PATCH] Fix memory allocation of ndarray --- llama_cpp/llama.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 9b1a3d263..65f39831b 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -2049,7 +2049,8 @@ def load_state(self, state: LlamaState) -> None: assert self._ctx.ctx is not None # Only filling in up to `n_tokens` and then zero-ing out the rest self.scores[: state.n_tokens, :] = state.scores.copy() - self.scores[state.n_tokens :, :] = 0.0 + rest = self.scores[state.n_tokens :, :] + rest[rest > 0] = 0.0 self.input_ids = state.input_ids.copy() self.n_tokens = state.n_tokens state_size = state.llama_state_size