@@ -11912,12 +11912,13 @@ static void llama_grammar_advance_stack(
1191211912// be positioned at a character range (see `llama_grammar_advance_stack`), and
1191311913// produces the N possible stacks if the given char is accepted at those
1191411914// positions
11915- std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
11915+ void llama_grammar_accept(
1191611916 const std::vector<std::vector<llama_grammar_element>> & rules,
1191711917 const std::vector<std::vector<const llama_grammar_element *>> & stacks,
11918- const uint32_t chr) {
11918+ const uint32_t chr,
11919+ std::vector<std::vector<const llama_grammar_element *>> & new_stacks) {
1191911920
11920- std::vector<std::vector<const llama_grammar_element *>> new_stacks;
11921+ new_stacks.clear() ;
1192111922
1192211923 for (const auto & stack : stacks) {
1192311924 if (stack.empty()) {
@@ -11936,8 +11937,6 @@ std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
1193611937 llama_grammar_advance_stack(rules, new_stack, new_stacks);
1193711938 }
1193811939 }
11939-
11940- return new_stacks;
1194111940}
1194211941
1194311942static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates(
@@ -11951,6 +11950,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
1195111950 const std::vector<llama_grammar_candidate> & candidates) {
1195211951
1195311952 std::vector<llama_grammar_candidate> rejects;
11953+ rejects.reserve(candidates.size());
1195411954
1195511955 if (stack.empty()) {
1195611956 for (const auto & tok : candidates) {
@@ -11964,6 +11964,8 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
1196411964 const llama_grammar_element * stack_pos = stack.back();
1196511965
1196611966 std::vector<llama_grammar_candidate> next_candidates;
11967+ next_candidates.reserve(candidates.size());
11968+
1196711969 for (const auto & tok : candidates) {
1196811970 if (*tok.code_points == 0) {
1196911971 // reached end of full codepoints in token, reject iff it ended in a partial sequence
@@ -12771,8 +12773,10 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
1277112773 // Note terminating 0 in decoded string
1277212774 const auto decoded = decode_utf8(piece, grammar->partial_utf8);
1277312775 const auto & code_points = decoded.first;
12776+ std::vector<std::vector<const llama_grammar_element *>> tmp_new_stacks;
1277412777 for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
12775- grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
12778+ llama_grammar_accept(grammar->rules, grammar->stacks, *it, tmp_new_stacks);
12779+ grammar->stacks = tmp_new_stacks;
1277612780 }
1277712781 grammar->partial_utf8 = decoded.second;
1277812782 GGML_ASSERT(!grammar->stacks.empty());
0 commit comments