Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed bad memory access exception on ios 17 #3527

Closed
wants to merge 9 commits into from
52 changes: 44 additions & 8 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9001,7 +9001,21 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
const size_t elt_size = ggml_element_size(kv_self.k);

ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
ggml_cgraph gf{};

// create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph)));

(*gf).n_nodes = 0;
(*gf).n_leafs = 0;
(*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
(*gf).perf_runs = 0;
(*gf).perf_cycles = 0;
(*gf).perf_time_us = 0;

memset((*gf).nodes, 0, sizeof((*gf).nodes));
memset((*gf).grads, 0, sizeof((*gf).grads));
memset((*gf).leafs, 0, sizeof((*gf).leafs));
memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table));

ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
std::vector<uint8_t> kout3d_data(ggml_nbytes(kout3d), 0);
Expand All @@ -9019,16 +9033,20 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
kv_head, n_embd, n_layer,
elt_size*n_ctx, elt_size*n_ctx*n_embd, 0);

ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, k3d, kout3d));
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d));
ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, k3d, kout3d));
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, v3d, vout3d));
ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1);

ggml_free(cpy_ctx);

// our data is now in the kout3d_data and vout3d_data buffers
// write them to file
data_ctx->write(kout3d_data.data(), kout3d_data.size());
data_ctx->write(vout3d_data.data(), vout3d_data.size());

// free our allocated graph
free(gf);
gf = NULL;
}

for (uint32_t i = 0; i < kv_size; ++i) {
Expand Down Expand Up @@ -9129,7 +9147,21 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
const size_t elt_size = ggml_element_size(kv_self.k);

ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
ggml_cgraph gf{};

// create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph)));

(*gf).n_nodes = 0;
(*gf).n_leafs = 0;
(*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
(*gf).perf_runs = 0;
(*gf).perf_cycles = 0;
(*gf).perf_time_us = 0;

memset((*gf).nodes, 0, sizeof((*gf).nodes));
memset((*gf).grads, 0, sizeof((*gf).grads));
memset((*gf).leafs, 0, sizeof((*gf).leafs));
memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table));

ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
kin3d->data = (void *) inp;
Expand All @@ -9147,11 +9179,15 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
kv_head, n_embd, n_layer,
elt_size*n_ctx, elt_size*n_ctx*n_embd, 0);

ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, kin3d, k3d));
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d));
ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, kin3d, k3d));
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, vin3d, v3d));
ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1);

ggml_free(cpy_ctx);

// free our allocated graph
free(gf);
gf = NULL;
}

ctx->kv_self.head = kv_head;
Expand Down
Loading