From fa8412835dbab99ecc3d903c56981e1b52dbaf76 Mon Sep 17 00:00:00 2001
From: AinL <gmlwns5176@gmail.com>
Date: Mon, 14 Apr 2025 16:51:08 +0000
Subject: [PATCH] fix: change default value of `attn_temperature_tuning`

---
 src/transformers/models/llama4/configuration_llama4.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/transformers/models/llama4/configuration_llama4.py b/src/transformers/models/llama4/configuration_llama4.py
index 0013f6b33387..c4cef4d4ab55 100644
--- a/src/transformers/models/llama4/configuration_llama4.py
+++ b/src/transformers/models/llama4/configuration_llama4.py
@@ -228,7 +228,9 @@ class Llama4TextConfig(PretrainedConfig):
         no_rope_layer_interval (`int`, *optional*, defaults to 4): TODO
         attention_chunk_size (`int`, *optional*, defaults to 8192):
             <TODO>
-        attn_temperature_tuning (`int`, *optional*, defaults to 4): TODO
+        attn_temperature_tuning (`bool`, *optional*, defaults to `True`):
+            Whether to dynamically scale the attention temperature for each query token based on sequence length.
+            Recommended for long sequences (e.g., >32k tokens) to maintain stable output results.
         floor_scale (`int`, *optional*, defaults to 8192): TODO
         attn_scale (`int`, *optional*, defaults to 0.1): TODO
         cache_implementation (`<fill_type>`, *optional*, defaults to `"hybrid"`): <fill_docstring>
@@ -291,7 +293,7 @@ def __init__(
         no_rope_layers=None,
         no_rope_layer_interval=4,
         attention_chunk_size=8192,
-        attn_temperature_tuning=4,
+        attn_temperature_tuning=True,
         floor_scale=8192,
         attn_scale=0.1,
         cache_implementation="hybrid",