From 7a0a88d7cb6d6c9806e1e8c3bd1f394dfc7432dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= <johannesg@5d6.de>
Date: Sat, 29 Mar 2025 12:47:35 +0100
Subject: [PATCH] server: apply grammar before other samplers

---
 examples/server/server.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 17a292da153c1..554e659478c76 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -3249,7 +3249,7 @@ struct server_context {
 
                 const int tok_idx = slot.i_batch - i;
 
-                llama_token id = common_sampler_sample(slot.smpl, ctx, tok_idx);
+                llama_token id = common_sampler_sample(slot.smpl, ctx, tok_idx, true);
 
                 slot.i_batch = -1;
 
@@ -3347,7 +3347,7 @@ struct server_context {
                 llama_decode(ctx, slot.batch_spec);
 
                 // the accepted tokens from the speculation
-                const auto ids = common_sampler_sample_and_accept_n(slot.smpl, ctx, draft);
+                const auto ids = common_sampler_sample_and_accept_n(slot.smpl, ctx, draft, true);
 
                 slot.n_past    += ids.size();
                 slot.n_decoded += ids.size();