File tree Expand file tree Collapse file tree 1 file changed +4
-0
lines changed Expand file tree Collapse file tree 1 file changed +4
-0
lines changed Original file line number Diff line number Diff line change @@ -360,6 +360,8 @@ class GPTQConfig(QuantizationConfigMixin):
360360        max_input_length (`int`, *optional*): 
361361            The maximum input length. This is needed to initialize a buffer that depends on the maximum expected input 
362362            length. It is specific to the exllama backend with act-order. 
363+         cache_block_outputs (`bool`, *optional*, defaults to `True`): 
364+                 Whether to cache block outputs to reuse as inputs for the succeeding block. 
363365    """ 
364366
365367    def  __init__ (
@@ -380,6 +382,7 @@ def __init__(
380382        pad_token_id : Optional [int ] =  None ,
381383        disable_exllama : bool  =  False ,
382384        max_input_length : Optional [int ] =  None ,
385+         cache_block_outputs : bool  =  True ,
383386        ** kwargs ,
384387    ):
385388        self .quant_method  =  QuantizationMethod .GPTQ 
@@ -399,6 +402,7 @@ def __init__(
399402        self .pad_token_id  =  pad_token_id 
400403        self .disable_exllama  =  disable_exllama 
401404        self .max_input_length  =  max_input_length 
405+         self .cache_block_outputs  =  cache_block_outputs 
402406        self .post_init ()
403407
404408    def  get_loading_attributes (self ):
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments