diff --git a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py index 9e94859a..d379aac2 100644 --- a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py +++ b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py @@ -181,9 +181,9 @@ "mammoth-coder": {"max_model_len": 16384, "max_num_batched_tokens": 16384}, # Based on config here: https://huggingface.co/TIGER-Lab/MAmmoTH-Coder-7B/blob/main/config.json#L12 # Can also see 13B, 34B there too - "codellama": {"max_model_len": 16384, "max_num_batched_tokens": 16384}, + "code-llama": {"max_model_len": 16384, "max_num_batched_tokens": 16384}, # Based on config here: https://huggingface.co/codellama/CodeLlama-7b-hf/blob/main/config.json#L12 - # Can also see 13B, 34B there too. Note, codellama is one word. + # Can also see 13B, 34B there too "llama-2": {"max_model_len": None, "max_num_batched_tokens": 4096}, "mistral": {"max_model_len": 8000, "max_num_batched_tokens": 8000}, }