2323
2424# pyright: reportRedeclaration=false
2525
26+ DEFAULT_TIMEOUT_SECONDS = aiohttp .ClientTimeout (total = 600 )
27+
2628@step ("a server listening on {server_fqdn}:{server_port}" )
2729def step_server_config (context , server_fqdn : str , server_port : str ):
2830 context .server_fqdn = server_fqdn
@@ -689,7 +691,7 @@ def step_tokenize_set_add_special(context):
689691@async_run_until_complete
690692async def step_tokenize (context ):
691693 context .tokenized_text = context_text (context )
692- async with aiohttp .ClientSession () as session :
694+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
693695 tokenize_args = {
694696 "content" : context .tokenized_text ,
695697 }
@@ -706,7 +708,7 @@ async def step_tokenize(context):
706708@async_run_until_complete
707709async def step_detokenize (context ):
708710 assert len (context .tokens ) > 0
709- async with aiohttp .ClientSession () as session :
711+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
710712 async with session .post (f'{ context .base_url } /detokenize' ,
711713 json = {
712714 "tokens" : context .tokens ,
@@ -735,7 +737,7 @@ def step_strings_for_tokenization(context):
735737@step ('an OPTIONS request is sent from {origin}' )
736738@async_run_until_complete
737739async def step_options_request (context , origin ):
738- async with aiohttp .ClientSession () as session :
740+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
739741 headers = {'Authorization' : f'Bearer { context .user_api_key } ' , 'Origin' : origin }
740742 async with session .options (f'{ context .base_url } /v1/chat/completions' ,
741743 headers = headers ) as response :
@@ -751,7 +753,7 @@ def step_check_options_header_value(context, cors_header, cors_header_value):
751753@step ('prometheus metrics are exposed' )
752754@async_run_until_complete
753755async def step_prometheus_metrics_exported (context ):
754- async with aiohttp .ClientSession () as session :
756+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
755757 async with await session .get (f'{ context .base_url } /metrics' ) as metrics_response :
756758 assert metrics_response .status == 200
757759 assert metrics_response .headers ['Content-Type' ] == "text/plain; version=0.0.4"
@@ -824,7 +826,7 @@ async def concurrent_requests(context, f_completion, *args, **kwargs):
824826@step ('the slot {slot_id:d} is saved with filename "{filename}"' )
825827@async_run_until_complete
826828async def step_save_slot (context , slot_id , filename ):
827- async with aiohttp .ClientSession () as session :
829+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
828830 async with session .post (f'{ context .base_url } /slots/{ slot_id } ?action=save' ,
829831 json = {"filename" : filename },
830832 headers = {"Content-Type" : "application/json" }) as response :
@@ -834,7 +836,7 @@ async def step_save_slot(context, slot_id, filename):
834836@step ('the slot {slot_id:d} is restored with filename "{filename}"' )
835837@async_run_until_complete
836838async def step_restore_slot (context , slot_id , filename ):
837- async with aiohttp .ClientSession () as session :
839+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
838840 async with session .post (f'{ context .base_url } /slots/{ slot_id } ?action=restore' ,
839841 json = {"filename" : filename },
840842 headers = {"Content-Type" : "application/json" }) as response :
@@ -844,7 +846,7 @@ async def step_restore_slot(context, slot_id, filename):
844846@step ('the slot {slot_id:d} is erased' )
845847@async_run_until_complete
846848async def step_erase_slot (context , slot_id ):
847- async with aiohttp .ClientSession () as session :
849+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
848850 async with session .post (f'{ context .base_url } /slots/{ slot_id } ?action=erase' ,
849851 headers = {"Content-Type" : "application/json" }) as response :
850852 context .response = response
@@ -853,7 +855,7 @@ async def step_erase_slot(context, slot_id):
853855@step ('switch {on_or_off} lora adapter {lora_id:d}' )
854856@async_run_until_complete
855857async def toggle_lora_adapter (context , on_or_off : str , lora_id : int ):
856- async with aiohttp .ClientSession () as session :
858+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
857859 async with session .post (f'{ context .base_url } /lora-adapters' ,
858860 json = [{'id' : lora_id , 'scale' : 1 if on_or_off == 'on' else 0 }],
859861 headers = {"Content-Type" : "application/json" }) as response :
@@ -889,7 +891,7 @@ async def request_completion(prompt,
889891 print (f"Set user_api_key: { user_api_key } " )
890892 headers ['Authorization' ] = f'Bearer { user_api_key } '
891893
892- async with aiohttp .ClientSession () as session :
894+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
893895 async with session .post (f'{ base_url } /completion' ,
894896 json = {
895897 "input_prefix" : prompt_prefix ,
@@ -902,8 +904,7 @@ async def request_completion(prompt,
902904 "temperature" : temperature if temperature is not None else 0.8 ,
903905 "n_probs" : 2 ,
904906 },
905- headers = headers ,
906- timeout = 3600 ) as response :
907+ headers = headers ) as response :
907908 if expect_api_error is None or not expect_api_error :
908909 assert response .status == 200
909910 assert response .headers ['Access-Control-Allow-Origin' ] == origin
@@ -961,7 +962,7 @@ async def oai_chat_completions(user_prompt,
961962 if async_client :
962963 origin = 'llama.cpp'
963964 headers = {'Authorization' : f'Bearer { user_api_key } ' , 'Origin' : origin }
964- async with aiohttp .ClientSession () as session :
965+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
965966 async with session .post (f'{ base_url } { base_path } ' ,
966967 json = payload ,
967968 headers = headers ) as response :
@@ -1048,7 +1049,7 @@ async def oai_chat_completions(user_prompt,
10481049
10491050
10501051async def request_embedding (content , seed , base_url = None ) -> list [list [float ]]:
1051- async with aiohttp .ClientSession () as session :
1052+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
10521053 async with session .post (f'{ base_url } /embedding' ,
10531054 json = {
10541055 "content" : content ,
@@ -1068,14 +1069,13 @@ async def request_oai_embeddings(input, seed,
10681069 headers = []
10691070 if user_api_key is not None :
10701071 headers = {'Authorization' : f'Bearer { user_api_key } ' , 'Origin' : origin }
1071- async with aiohttp .ClientSession () as session :
1072+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
10721073 async with session .post (f'{ base_url } /v1/embeddings' ,
10731074 json = {
10741075 "input" : input ,
10751076 "model" : model ,
10761077 },
1077- headers = headers ,
1078- timeout = 3600 ) as response :
1078+ headers = headers ) as response :
10791079 assert response .status == 200 , f"received status code not expected: { response .status } "
10801080 assert response .headers ['Access-Control-Allow-Origin' ] == origin
10811081 assert response .headers ['Content-Type' ] == "application/json; charset=utf-8"
@@ -1194,7 +1194,7 @@ async def wait_for_slots_status(context,
11941194 if 'GITHUB_ACTIONS' in os .environ :
11951195 timeout *= 2
11961196
1197- async with aiohttp .ClientSession () as session :
1197+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
11981198 while True :
11991199 async with await session .get (f'{ base_url } /slots' , params = params ) as slots_response :
12001200 status_code = slots_response .status
@@ -1237,7 +1237,7 @@ def assert_embeddings(embeddings):
12371237
12381238
12391239async def request_slots_status (context , expected_slots ):
1240- async with aiohttp .ClientSession () as session :
1240+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
12411241 async with await session .get (f'{ context .base_url } /slots' ) as slots_response :
12421242 assert slots_response .status == 200
12431243 slots = await slots_response .json ()
0 commit comments