Skip to content

Commit 5954e17

Browse files
committed
Add disagg test
Signed-off-by: Pengyun Lin <[email protected]>
1 parent 4ba28d8 commit 5954e17

File tree

4 files changed

+60
-2
lines changed

4 files changed

+60
-2
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[
2+
"Place of birth\nThe place of birth (POB) or birthplace is the place where a person was born. This place is often used in legal documents, together with name and date of birth, to uniquely identify a person. Practice regarding whether this place should be a country, a territory or a city/town/locality differs in different countries, but often city or territory is used for native-born citizen passports and countries for foreign-born ones.\nAs a general rule with respect to passports, if the place of birth is to be a country, it's determined to be the country that currently has sovereignty over the actual place of birth, regardless of when the birth actually occurred. The place of birth is not necessarily the place where the parents of the new baby live. If the baby is born in a hospital in another place, that place is the place of birth. In many countries, this also means that the government requires that the birth of the new baby is registered in the place of birth.\nSome countries place less or no importance on the place of birth, instead using alternative geographical characteristics for the purpose of identity documents. For example, Sweden has used the concept of födelsehemort (\"domicile of birth\") since 1947. This means that the domicile of the baby's mother is the registered place of birth.\nSimilarly, Switzerland uses the concept of place of origin. A child born to Swiss parents is automatically assigned the place of origin of the parent with the same last name, so the child either gets their mother's or father's place of origin. A child born to one Swiss parent and one foreign parent acquires the place of origin of their Swiss parent. In a Swiss passport and identity card, the holder's place of origin is stated, not their place of birth. In Japan, the registered domicile is a similar concept.\nIn some countries (primarily in the Americas), the place of birth automatically determines the nationality of the baby, a practice often referred to by the Latin phrase jus soli."
3+
]
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
hostname: localhost
2+
port: 8000
3+
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
4+
free_gpu_memory_fraction: 0.5
5+
backend: "pytorch"
6+
cuda_graph_config: null
7+
disable_overlap_scheduler: True
8+
context_servers:
9+
num_instances: 1
10+
max_num_tokens: 512
11+
max_batch_size: 256
12+
cache_transceiver_config:
13+
backend: default
14+
urls:
15+
- "localhost:8001"
16+
generation_servers:
17+
num_instances: 1
18+
max_num_tokens: 256
19+
max_batch_size: 128
20+
cache_transceiver_config:
21+
backend: default
22+
urls:
23+
- "localhost:8002"

tests/integration/defs/disaggregated/test_disaggregated.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ def get_test_config(test_desc, example_dir, test_root):
3636
"""Get test configuration based on test description."""
3737
test_configs_root = f"{test_root}/test_configs"
3838
config_map = {
39+
"2_ranks_diff_max_tokens":
40+
(2, f"{test_configs_root}/disagg_config_diff_max_tokens.yaml"),
3941
"2_ranks": (2, f"{example_dir}/disagg_config.yaml"),
4042
"2_ranks_trt_backend":
4143
(2, f"{test_configs_root}/disagg_config_trt_backend.yaml"),
@@ -134,7 +136,8 @@ def run_disaggregated_test(example_dir,
134136
test_desc,
135137
num_iters=5,
136138
env=None,
137-
cwd=None):
139+
cwd=None,
140+
prompt_file="prompts.json"):
138141
"""Run disaggregated test with given configuration."""
139142
cleanup_output_files()
140143
run_env = env.copy()
@@ -175,10 +178,13 @@ def run_disaggregated_test(example_dir,
175178
client_cmd = [
176179
'python3', f'{client_dir}/disagg_client.py', '-c',
177180
f'{example_dir}/disagg_config.yaml', '-p',
178-
f'{client_dir}/prompts.json', '--ignore-eos',
181+
f'{client_dir}/{prompt_file}', '--ignore-eos',
179182
'--server-start-timeout',
180183
str(server_start_timeout)
181184
]
185+
if prompt_file == "long_prompts.json":
186+
# Use max_tokens 4 for long prompts to reduce test time
187+
client_cmd.extend(['--max-tokens', '4'])
182188
check_call(client_cmd,
183189
env=env,
184190
poll_procs=[workers_proc, server_proc])
@@ -207,6 +213,10 @@ def run_disaggregated_test(example_dir,
207213
env=env,
208214
poll_procs=[workers_proc, server_proc])
209215

216+
# Skip output verification for long prompts test
217+
if prompt_file == "long_prompts.json":
218+
continue
219+
210220
# Verify outputs
211221
not_expected_strings = ["Berlin Berlin"]
212222

@@ -250,6 +260,27 @@ def run_disaggregated_test(example_dir,
250260
workers_proc.wait()
251261

252262

263+
@pytest.mark.parametrize("llama_model_root", ['TinyLlama-1.1B-Chat-v1.0'],
264+
indirect=True)
265+
def test_disaggregated_diff_max_tokens(disaggregated_test_root,
266+
disaggregated_example_root, llm_venv,
267+
llama_model_root):
268+
src_dst_dict = {
269+
llama_model_root:
270+
f"{llm_venv.get_working_directory()}/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
271+
}
272+
for src, dst in src_dst_dict.items():
273+
if not os.path.islink(dst):
274+
os.makedirs(os.path.dirname(dst), exist_ok=True)
275+
os.symlink(src, dst, target_is_directory=True)
276+
277+
run_disaggregated_test(disaggregated_example_root,
278+
"2_ranks_diff_max_tokens",
279+
env=llm_venv._new_env,
280+
cwd=llm_venv.get_working_directory(),
281+
prompt_file="long_prompts.json")
282+
283+
253284
@pytest.mark.parametrize("llama_model_root", ['TinyLlama-1.1B-Chat-v1.0'],
254285
indirect=True)
255286
def test_disaggregated_single_gpu_with_mpirun(disaggregated_test_root,

tests/integration/test_lists/test-db/l0_a10.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ l0_a10:
2121
- disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]
2222
- disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]
2323
- disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]
24+
- disaggregated/test_disaggregated.py::test_disaggregated_diff_max_tokens[TinyLlama-1.1B-Chat-v1.0]
2425
- test_e2e.py::test_openai_chat_structural_tag_example
2526
- test_e2e.py::test_openai_chat_multimodal_example
2627
- test_e2e.py::test_openai_lora

0 commit comments

Comments
 (0)