From f99320933b160eecff1cc2a8a12718be7a31a3c4 Mon Sep 17 00:00:00 2001
From: Xu Song <xusong.vip@gmail.com>
Date: Sun, 25 Aug 2024 09:24:38 +0800
Subject: [PATCH] Fix memory allocation of ndarray

---
 llama_cpp/llama.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 9b1a3d263..65f39831b 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -2049,7 +2049,8 @@ def load_state(self, state: LlamaState) -> None:
         assert self._ctx.ctx is not None
         # Only filling in up to `n_tokens` and then zero-ing out the rest
         self.scores[: state.n_tokens, :] = state.scores.copy()
-        self.scores[state.n_tokens :, :] = 0.0
+        rest = self.scores[state.n_tokens :, :]
+        rest[rest > 0] = 0.0
         self.input_ids = state.input_ids.copy()
         self.n_tokens = state.n_tokens
         state_size = state.llama_state_size