From d87cca162a4e8736eae9b86b9544e829a3c066e8 Mon Sep 17 00:00:00 2001 From: Graeme Power Date: Mon, 9 Dec 2024 15:07:37 +0000 Subject: [PATCH] fix: add missing await statements for async exit_stack handling --- llama_cpp/server/app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index f7c028475..b6db453b8 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -318,7 +318,7 @@ async def create_completion( Iterator[llama_cpp.CreateCompletionStreamResponse], ] = await run_in_threadpool(llama, **kwargs) except Exception as err: - exit_stack.close() + await exit_stack.aclose() raise err if isinstance(iterator_or_completion, Iterator): @@ -475,7 +475,7 @@ async def create_chat_completion( # is complete. # https://github.com/tiangolo/fastapi/issues/11143 exit_stack = contextlib.AsyncExitStack() - llama_proxy = exit_stack.enter_async_context(contextlib.asynccontextmanager(get_llama_proxy)()) + llama_proxy = await exit_stack.enter_async_context(contextlib.asynccontextmanager(get_llama_proxy)()) if llama_proxy is None: raise HTTPException( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, @@ -513,7 +513,7 @@ async def create_chat_completion( llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk] ] = await run_in_threadpool(llama.create_chat_completion, **kwargs) except Exception as err: - exit_stack.close() + await exit_stack.aclose() raise err if isinstance(iterator_or_completion, Iterator):