@@ -760,19 +760,22 @@ def get_hf_text_config(config: PretrainedConfig):
760760 """Get the "sub" config relevant to llm for multi modal models.
761761 No op for pure text models.
762762 """
763- if hasattr (config , "text_config" ):
764- # The code operates under the assumption that text_config should have
765- # `num_attention_heads` (among others). Assert here to fail early
766- # if transformers config doesn't align with this assumption.
767- assert hasattr (config .text_config , "num_attention_heads" )
768- return config .text_config
769- elif hasattr (config , "thinker_config" ):
763+ # This block should be unnecessary after https://github.com/huggingface/transformers/pull/37517
764+ if hasattr (config , "thinker_config" ):
770765 # TODO(suyang.fy): Refactor code.
771766 # For Qwen2.5-Omni, change hf_text_config to
772767 # thinker_config.text_config.
773768 return config .thinker_config .text_config
774- else :
775- return config
769+
770+ text_config = config .get_text_config ()
771+
772+ if text_config is not config :
773+ # The code operates under the assumption that text_config should have
774+ # `num_attention_heads` (among others). Assert here to fail early
775+ # if transformers config doesn't align with this assumption.
776+ assert hasattr (text_config , "num_attention_heads" )
777+
778+ return text_config
776779
777780
778781def try_get_generation_config (
0 commit comments