We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 97ad402 commit 38328bbCopy full SHA for 38328bb
llama.cpp
@@ -9037,7 +9037,7 @@ static int llama_decode_internal(
9037
9038
// decide if we need to defrag the kv cache
9039
if (cparams.defrag_thold >= 0.0f) {
9040
- const float fragmentation = kv_self.n >= 128 ? 1.0f - float(kv_self.used + n_tokens_all)/float(kv_self.n) : 0.0f;
+ const float fragmentation = kv_self.n >= 128 ? 1.0f - float(kv_self.used)/float(kv_self.n) : 0.0f;
9041
9042
// queue defragmentation for next llama_kv_cache_update
9043
if (fragmentation > cparams.defrag_thold) {
0 commit comments