@@ -207,7 +207,7 @@ def get_user_model():
207
207
trust_remote_code = args .trust_remote_code ,
208
208
revision = args .revision ,
209
209
)
210
- tokenizer = AutoTokenizer .from_pretrained (args .model )
210
+ tokenizer = AutoTokenizer .from_pretrained (args .model , trust_remote_code = args . trust_remote_code )
211
211
if args .approach == 'weight_only' :
212
212
user_model = user_model .float ()
213
213
@@ -380,7 +380,7 @@ def eval_func(model):
380
380
if args .code_generation :
381
381
from intel_extension_for_transformers .llm .evaluation .lm_code_eval import evaluate
382
382
from transformers import AutoTokenizer
383
- tokenizer = AutoTokenizer .from_pretrained (args .model )
383
+ tokenizer = AutoTokenizer .from_pretrained (args .model , trust_remote_code = args . trust_remote_code )
384
384
results = evaluate (
385
385
model = user_model ,
386
386
tokenizer = tokenizer ,
@@ -419,7 +419,8 @@ def eval_func(model):
419
419
start = time .time ()
420
420
results = evaluate (
421
421
model = "hf-causal" ,
422
- model_args = 'pretrained=' + args .model + ',tokenizer=' + args .model + ',dtype=float32' ,
422
+ model_args = 'pretrained=' + args .model + ',tokenizer=' + args .model \
423
+ + ',dtype=float32' + ",trust_remote_code=" + str (args .trust_remote_code ),
423
424
user_model = user_model ,
424
425
batch_size = args .batch_size ,
425
426
tasks = args .tasks ,
@@ -429,6 +430,8 @@ def eval_func(model):
429
430
for task_name in args .tasks :
430
431
if task_name == "wikitext" :
431
432
acc = results ["results" ][task_name ]["word_perplexity" ]
433
+ elif task_name == "truthfulqa_mc" :
434
+ acc = results ["results" ][task_name ]["mc1" ]
432
435
else :
433
436
acc = results ["results" ][task_name ]["acc" ]
434
437
print ("Accuracy: %.5f" % acc )
0 commit comments