File tree Expand file tree Collapse file tree 2 files changed +7
-0
lines changed Expand file tree Collapse file tree 2 files changed +7
-0
lines changed Original file line number Diff line number Diff line change @@ -366,6 +366,8 @@ extern "C" {
366366        bool  no_perf;     //  measure performance timings
367367        bool  op_offload;  //  offload host tensor operations to device
368368        bool  swa_full;    //  use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
369+                           //  NOTE: setting to false when n_seq_max > 1 can cause bad perforamnce in some cases
370+                           //        ref: https://github.com/ggml-org/llama.cpp/pull/13845#issuecomment-2924800573
369371    };
370372
371373    //  model quantization parameters
Original file line number Diff line number Diff line change @@ -123,6 +123,11 @@ llama_context::llama_context(
123123                __func__, n_ctx_per_seq, hparams.n_ctx_train );
124124    }
125125
126+     if  (!params.swa_full  && cparams.n_seq_max  > 1 ) {
127+         LLAMA_LOG_WARN (" %s: requested n_seq_max (%u) > 1, but swa_full is not enabled -- performance may be degraded: %s\n " 
128+                 __func__, cparams.n_seq_max , " https://github.com/ggml-org/llama.cpp/pull/13845#issuecomment-2924800573" 
129+     }
130+ 
126131    if  (!hparams.vocab_only ) {
127132        //  GPU backends
128133        for  (auto  * dev : model.devices ) {
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments