diff --git a/vllm/entrypoints/openai/serving_classification.py b/vllm/entrypoints/openai/serving_classification.py index 45bbe732a680..263c5ca3ea7d 100644 --- a/vllm/entrypoints/openai/serving_classification.py +++ b/vllm/entrypoints/openai/serving_classification.py @@ -64,7 +64,7 @@ async def _preprocess( except (ValueError, TypeError) as e: logger.exception("Error in preprocessing prompt inputs") - return self.create_error_response(str(e)) + return self.create_error_response(f"{e} {e.__cause__}") @override def _build_response( diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 7af64306023a..908b851bf3f8 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -127,18 +127,9 @@ async def create_completion( prompt_embeds=request.prompt_embeds, config=self._build_render_config(request), ) - except ValueError as e: - logger.exception("Error in preprocessing prompt inputs") - return self.create_error_response(str(e)) - except TypeError as e: - logger.exception("Error in preprocessing prompt inputs") - return self.create_error_response(str(e)) - except RuntimeError as e: - logger.exception("Error in preprocessing prompt inputs") - return self.create_error_response(str(e)) - except jinja2.TemplateError as e: + except (ValueError, TypeError, RuntimeError, jinja2.TemplateError) as e: logger.exception("Error in preprocessing prompt inputs") - return self.create_error_response(str(e)) + return self.create_error_response(f"{e} {e.__cause__}") # Schedule the request and get the result generator. generators: list[AsyncGenerator[RequestOutput, None]] = [] diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py index 8f1df9a5aea6..966af119a1da 100644 --- a/vllm/entrypoints/openai/serving_embedding.py +++ b/vllm/entrypoints/openai/serving_embedding.py @@ -111,7 +111,7 @@ async def _preprocess( return None except (ValueError, TypeError) as e: logger.exception("Error in preprocessing prompt inputs") - return self.create_error_response(str(e)) + return self.create_error_response(f"{e} {e.__cause__}") def _build_render_config(self, request: EmbeddingCompletionRequest) -> RenderConfig: # Set max_length based on chunked processing capability diff --git a/vllm/entrypoints/openai/serving_pooling.py b/vllm/entrypoints/openai/serving_pooling.py index 39cc539c1187..7f37801b57fa 100644 --- a/vllm/entrypoints/openai/serving_pooling.py +++ b/vllm/entrypoints/openai/serving_pooling.py @@ -163,7 +163,7 @@ async def create_pooling( raise ValueError(f"Unsupported request of type {type(request)}") except (ValueError, TypeError, jinja2.TemplateError) as e: logger.exception("Error in preprocessing prompt inputs") - return self.create_error_response(str(e)) + return self.create_error_response(f"{e} {e.__cause__}") # Schedule the request and get the result generator. generators: list[AsyncGenerator[PoolingRequestOutput, None]] = [] diff --git a/vllm/entrypoints/openai/speech_to_text.py b/vllm/entrypoints/openai/speech_to_text.py index fa6e962a1dd7..653654a3188a 100644 --- a/vllm/entrypoints/openai/speech_to_text.py +++ b/vllm/entrypoints/openai/speech_to_text.py @@ -179,7 +179,7 @@ async def _create_speech_to_text( except ValueError as e: logger.exception("Error in preprocessing prompt inputs") - return self.create_error_response(str(e)) + return self.create_error_response(f"{e} {e.__cause__}") list_result_generator: list[AsyncGenerator[RequestOutput, None]] | None = None try: