Skip to content

Commit a37dc45

Browse files
committed
mid eval
1 parent a7b55e7 commit a37dc45

File tree

7 files changed

+39
-13
lines changed

7 files changed

+39
-13
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
target1 127.0.0.1 8888
1+
target1 10.52.52.47 8888

examples/python/remote_storage_example/client_command.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@
99
# 1048576
1010
# 524288
1111

12-
python3 nixl_p2p_storage_example.py --fileprefix /raid/scratch/testfiles/client --role client --agents_file agent_file.in --name client1 --batch_size 32 --buf_size 524288
12+
export CUFILE_ENV_PATH_JSON="/workspace/nixl-tim/examples/python/remote_storage_example/cufile.json"
13+
14+
python3 nixl_p2p_storage_example.py --fileprefix /raid/scratch/testfiles/client --role client --agents_file agent_file.in --name client1 --batch_size 128 --buf_size 2097152
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
// NOTE : Application can override custom configuration via export CUFILE_ENV_PATH_JSON=<filepath>
3+
// e.g : export CUFILE_ENV_PATH_JSON="/home/<xxx>/cufile.json"
4+
5+
"properties": {
6+
// allow compat mode, this will enable use of cuFile posix read/writes
7+
"allow_compat_mode": true,
8+
// max IO chunk size (parameter should be multiples of 64K) used by cuFileRead/Write internally per IO request
9+
"max_direct_io_size_kb" : 16384,
10+
// device memory size (parameter should be 4K aligned) for reserving bounce buffers for the entire GPU
11+
"max_device_cache_size_kb" : 2097152,
12+
// Note: ensure (max_device_cache_size_kb / per_buffer_cache_size_kb) >= io_batchsize
13+
"per_buffer_cache_size_kb": 16384,
14+
// limit on maximum device memory size (parameter should be 4K aligned) that can be pinned for a given process
15+
"max_device_pinned_mem_size_kb" : 33554432,
16+
// per-io bounce-buffer size (parameter should be multiples of 64K) ranging from 1024kb to 16384kb
17+
"per_buffer_cache_size_kb" : 16384,
18+
19+
// posix bounce buffer pool size allocations
20+
"posix_pool_slab_size_kb" : [ 4, 1024, 16384],
21+
// posix bounce buffer pool max counts
22+
"posix_pool_slab_count": [512, 512, 512]
23+
}
24+
}

examples/python/remote_storage_example/nixl_p2p_storage_example.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,18 +52,16 @@ def remote_storage_transfer(my_agent, my_mem_descs, operation, remote_agent_name
5252
logger.info(f"Sending {operation} request to {remote_agent_name}")
5353
test_descs_str = my_agent.get_serialized_descs(my_mem_descs)
5454

55-
start_time = time.perf_counter()
55+
start_time = time.time()
5656

5757
my_agent.send_notif(remote_agent_name, operation + iterations + test_descs_str)
5858

5959
while not my_agent.check_remote_xfer_done(remote_agent_name, b"COMPLETE"):
6060
continue
6161

62-
end_time = time.perf_counter()
62+
end_time = time.time()
6363

64-
elapsed = end_time - start_time
65-
66-
logger.info(f"Time for {iterations} iterations: {elapsed} seconds")
64+
logger.info(f"Time for {iterations} iterations: {end_time - start_time} seconds")
6765

6866

6967
def connect_to_agents(my_agent, agents_file):
@@ -193,7 +191,7 @@ def run_client(my_agent, nixl_mem_reg_descs, nixl_file_reg_descs, agents_file):
193191
# For sample purposes, write to and then read from local storage
194192
logger.info("Starting local transfer test...")
195193

196-
start_time = time.perf_counter()
194+
start_time = time.time()
197195

198196
for i in range (1, 100):
199197
execute_transfer(
@@ -206,13 +204,13 @@ def run_client(my_agent, nixl_mem_reg_descs, nixl_file_reg_descs, agents_file):
206204
["GDS_MT"]
207205
)
208206

209-
end_time = time.perf_counter()
207+
end_time = time.time()
210208

211209
elapsed = end_time - start_time
212210

213211
logger.info(f"Time for 100 WRITE iterations: {elapsed} seconds")
214212

215-
start_time = time.perf_counter()
213+
start_time = time.time()
216214

217215
for i in range (1, 100):
218216
execute_transfer(
@@ -225,7 +223,7 @@ def run_client(my_agent, nixl_mem_reg_descs, nixl_file_reg_descs, agents_file):
225223
["GDS_MT"]
226224
)
227225

228-
end_time = time.perf_counter()
226+
end_time = time.time()
229227

230228
elapsed = end_time - start_time
231229

Binary file not shown.

examples/python/remote_storage_example/nixl_storage_utils/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def create_agent_with_plugins(agent_name, port):
3838
plugin_list = new_nixl_agent.get_plugin_list()
3939

4040
if "GDS_MT" in plugin_list:
41-
new_nixl_agent.create_backend("GDS_MT", {"thread_count": "8"})
41+
new_nixl_agent.create_backend("GDS_MT")
4242
logger.info("Using GDS storage backend")
4343
if "POSIX" in plugin_list:
4444
new_nixl_agent.create_backend("POSIX")

examples/python/remote_storage_example/server_command.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@
99
# 1048576
1010
# 524288
1111

12-
python3 nixl_p2p_storage_example.py --fileprefix /raid/scratch/testfiles/target1 --role server --port 8888 --name target1 --batch_size 32 --buf_size 524288
12+
export UCX_MAX_RMA_RAILS=1
13+
14+
python3 nixl_p2p_storage_example.py --fileprefix /raid/scratch/testfiles/target1 --role server --port 8888 --name target1 --batch_size 32 --buf_size 8388608

0 commit comments

Comments
 (0)