@@ -11912,12 +11912,13 @@ static void llama_grammar_advance_stack(
11912
11912
// be positioned at a character range (see `llama_grammar_advance_stack`), and
11913
11913
// produces the N possible stacks if the given char is accepted at those
11914
11914
// positions
11915
- std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
11915
+ void llama_grammar_accept(
11916
11916
const std::vector<std::vector<llama_grammar_element>> & rules,
11917
11917
const std::vector<std::vector<const llama_grammar_element *>> & stacks,
11918
- const uint32_t chr) {
11918
+ const uint32_t chr,
11919
+ std::vector<std::vector<const llama_grammar_element *>> & new_stacks) {
11919
11920
11920
- std::vector<std::vector<const llama_grammar_element *>> new_stacks;
11921
+ new_stacks.clear() ;
11921
11922
11922
11923
for (const auto & stack : stacks) {
11923
11924
if (stack.empty()) {
@@ -11936,8 +11937,6 @@ std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
11936
11937
llama_grammar_advance_stack(rules, new_stack, new_stacks);
11937
11938
}
11938
11939
}
11939
-
11940
- return new_stacks;
11941
11940
}
11942
11941
11943
11942
static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates(
@@ -11951,6 +11950,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
11951
11950
const std::vector<llama_grammar_candidate> & candidates) {
11952
11951
11953
11952
std::vector<llama_grammar_candidate> rejects;
11953
+ rejects.reserve(candidates.size());
11954
11954
11955
11955
if (stack.empty()) {
11956
11956
for (const auto & tok : candidates) {
@@ -11964,6 +11964,8 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
11964
11964
const llama_grammar_element * stack_pos = stack.back();
11965
11965
11966
11966
std::vector<llama_grammar_candidate> next_candidates;
11967
+ next_candidates.reserve(candidates.size());
11968
+
11967
11969
for (const auto & tok : candidates) {
11968
11970
if (*tok.code_points == 0) {
11969
11971
// reached end of full codepoints in token, reject iff it ended in a partial sequence
@@ -12771,8 +12773,10 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
12771
12773
// Note terminating 0 in decoded string
12772
12774
const auto decoded = decode_utf8(piece, grammar->partial_utf8);
12773
12775
const auto & code_points = decoded.first;
12776
+ std::vector<std::vector<const llama_grammar_element *>> tmp_new_stacks;
12774
12777
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
12775
- grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
12778
+ llama_grammar_accept(grammar->rules, grammar->stacks, *it, tmp_new_stacks);
12779
+ grammar->stacks = tmp_new_stacks;
12776
12780
}
12777
12781
grammar->partial_utf8 = decoded.second;
12778
12782
GGML_ASSERT(!grammar->stacks.empty());
0 commit comments