From d3f16a906f42c9b3b31ec8a32da39a5cc4f6d113 Mon Sep 17 00:00:00 2001
From: Yunfeng Bai <83252681+yunfeng-scale@users.noreply.github.com>
Date: Thu, 16 Nov 2023 17:42:41 -0800
Subject: [PATCH] Revert "Found a bug in the codellama vllm model_len logic.
 (#380)"

This reverts commit 5b6aeff6b6636838d31c90d7f3f3f6d915390a6f.
---
 .../domain/use_cases/llm_model_endpoint_use_cases.py          | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py
index 9e94859a..d379aac2 100644
--- a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py
+++ b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py
@@ -181,9 +181,9 @@
     "mammoth-coder": {"max_model_len": 16384, "max_num_batched_tokens": 16384},
     # Based on config here: https://huggingface.co/TIGER-Lab/MAmmoTH-Coder-7B/blob/main/config.json#L12
     # Can also see 13B, 34B there too
-    "codellama": {"max_model_len": 16384, "max_num_batched_tokens": 16384},
+    "code-llama": {"max_model_len": 16384, "max_num_batched_tokens": 16384},
     # Based on config here: https://huggingface.co/codellama/CodeLlama-7b-hf/blob/main/config.json#L12
-    # Can also see 13B, 34B there too. Note, codellama is one word.
+    # Can also see 13B, 34B there too
     "llama-2": {"max_model_len": None, "max_num_batched_tokens": 4096},
     "mistral": {"max_model_len": 8000, "max_num_batched_tokens": 8000},
 }