Skip to content

Commit 12d7a79

Browse files
ggerganovarthw
authored andcommitted
context : fix init of n_outputs (ggml-org#12397)
ggml-ci
1 parent d63d47e commit 12d7a79

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

src/llama-context.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -285,11 +285,15 @@ llama_context::llama_context(
285285

286286
// reserve worst-case graph
287287
if (!hparams.vocab_only) {
288-
uint32_t n_seqs = 1; // TODO: worst-case number of sequences
289-
uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch);
288+
const uint32_t n_seqs = 1; // TODO: worst-case number of sequences
289+
const uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch);
290290

291291
llama_token token = model.vocab.token_bos(); // not actually used by llama_build_graph, but required to choose between token and embedding inputs graph
292292

293+
// restore later
294+
// TODO: something cleaner
295+
const auto n_outputs_save = n_outputs;
296+
293297
// max number of outputs
294298
n_outputs = n_tokens;
295299

@@ -341,6 +345,8 @@ llama_context::llama_context(
341345
}
342346
}
343347

348+
n_outputs = n_outputs_save;
349+
344350
for (size_t i = 0; i < backend_ptrs.size(); ++i) {
345351
ggml_backend_t backend = backend_ptrs[i];
346352
ggml_backend_buffer_type_t buft = backend_buft[i];

0 commit comments

Comments
 (0)