From ae09632cfefd32bede1ebb0160529444a6f3985a Mon Sep 17 00:00:00 2001 From: Michael Choi Date: Mon, 4 Nov 2024 17:52:24 +0000 Subject: [PATCH] Remove restricte model name check --- .../use_cases/llm_model_endpoint_use_cases.py | 5 +-- .../inference/vllm/build_and_upload_image.sh | 2 +- .../tests/unit/domain/test_llm_use_cases.py | 35 ------------------- 3 files changed, 4 insertions(+), 38 deletions(-) diff --git a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py index d2d10b9d..afdfb5ab 100644 --- a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py +++ b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py @@ -381,9 +381,10 @@ def _model_endpoint_entity_to_get_llm_model_endpoint_response( def validate_model_name(model_name: str, inference_framework: LLMInferenceFramework) -> None: + # TODO: replace this logic to check if the model architecture is supported instead if model_name not in _SUPPORTED_MODELS_BY_FRAMEWORK[inference_framework]: - raise ObjectHasInvalidValueException( - f"Model name {model_name} is not supported for inference framework {inference_framework}." + logger.warning( + f"Model name {model_name} may not be supported by inference framework {inference_framework}." ) diff --git a/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh b/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh index 10765cc0..3b1ab4cb 100755 --- a/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh +++ b/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh @@ -43,7 +43,7 @@ aws ecr get-login-password --region us-west-2 | docker login --username AWS --pa DOCKER_BUILDKIT=1 docker build \ --build-arg VLLM_VERSION=${VLLM_VERSION} \ --build-arg VLLM_BASE_REPO=${VLLM_BASE_REPO} \ - -f Dockerfile.vllm \ + -f ${DOCKERFILE} \ --target ${BUILD_TARGET} \ -t $IMAGE ${PROJECT_DIR} docker push $IMAGE diff --git a/model-engine/tests/unit/domain/test_llm_use_cases.py b/model-engine/tests/unit/domain/test_llm_use_cases.py index 9e160846..f1392168 100644 --- a/model-engine/tests/unit/domain/test_llm_use_cases.py +++ b/model-engine/tests/unit/domain/test_llm_use_cases.py @@ -704,41 +704,6 @@ async def test_create_model_endpoint_trt_llm_use_case_success( ) -@pytest.mark.asyncio -async def test_create_llm_model_endpoint_use_case_raises_invalid_value_exception( - test_api_key: str, - fake_model_bundle_repository, - fake_model_endpoint_service, - fake_docker_repository_image_always_exists, - fake_model_primitive_gateway, - fake_llm_artifact_gateway, - create_llm_model_endpoint_request_invalid_model_name: CreateLLMModelEndpointV1Request, -): - fake_model_endpoint_service.model_bundle_repository = fake_model_bundle_repository - bundle_use_case = CreateModelBundleV2UseCase( - model_bundle_repository=fake_model_bundle_repository, - docker_repository=fake_docker_repository_image_always_exists, - model_primitive_gateway=fake_model_primitive_gateway, - ) - llm_bundle_use_case = CreateLLMModelBundleV1UseCase( - create_model_bundle_use_case=bundle_use_case, - model_bundle_repository=fake_model_bundle_repository, - llm_artifact_gateway=fake_llm_artifact_gateway, - docker_repository=fake_docker_repository_image_always_exists, - ) - use_case = CreateLLMModelEndpointV1UseCase( - create_llm_model_bundle_use_case=llm_bundle_use_case, - model_endpoint_service=fake_model_endpoint_service, - docker_repository=fake_docker_repository_image_always_exists, - llm_artifact_gateway=fake_llm_artifact_gateway, - ) - user = User(user_id=test_api_key, team_id=test_api_key, is_privileged_user=True) - with pytest.raises(ObjectHasInvalidValueException): - await use_case.execute( - user=user, request=create_llm_model_endpoint_request_invalid_model_name - ) - - @pytest.mark.asyncio async def test_create_llm_model_endpoint_use_case_quantization_exception( test_api_key: str,