Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ MODELS=(

# Number of prefill and decode instances to create
NUM_PREFILL_INSTANCES=${NUM_PREFILL_INSTANCES:-1} # Default to 1
NUM_DECODE_INSTANCES=${NUM_DECODE_INSTANCES:-2} # Default to 2
NUM_DECODE_INSTANCES=${NUM_DECODE_INSTANCES:-1} # Default to 1
PREFILLER_TP_SIZE=${PREFILLER_TP_SIZE:-1}
DECODER_TP_SIZE=${DECODER_TP_SIZE:-1}

# Find the git repository root directory
GIT_ROOT=$(git rev-parse --show-toplevel)
Expand Down Expand Up @@ -44,7 +46,6 @@ get_model_args() {
echo "$extra_args"
}


# Function to run tests for a specific model
run_tests_for_model() {
local model_name=$1
Expand All @@ -65,9 +66,11 @@ run_tests_for_model() {
for i in $(seq 0 $((NUM_PREFILL_INSTANCES-1))); do
# Calculate GPU ID - we'll distribute across available GPUs
GPU_ID=$((i % $(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)))


# Calculate port number (base port + instance number)
PORT=$((8100 + i))
# Calculate side channel port
# Calculate side channel port. Avoid clash with with TP workers.
SIDE_CHANNEL_PORT=$((5559 + i))

echo "Starting prefill instance $i on GPU $GPU_ID, port $PORT"
Expand All @@ -78,6 +81,7 @@ run_tests_for_model() {
--enforce-eager \
--disable-log-requests \
--gpu-memory-utilization 0.2 \
--tensor-parallel-size $PREFILLER_TP_SIZE \
--kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}'"

if [ -n "$model_args" ]; then
Expand All @@ -97,10 +101,11 @@ run_tests_for_model() {
for i in $(seq 0 $((NUM_DECODE_INSTANCES-1))); do
# Calculate GPU ID - we'll distribute across available GPUs, starting from after prefill GPUs
GPU_ID=$(((i + NUM_PREFILL_INSTANCES) % $(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)))

# Calculate port number (base port + instance number)
PORT=$((8200 + i))
# Calculate side channel port
SIDE_CHANNEL_PORT=$((5659 + i))
SIDE_CHANNEL_PORT=$((5659 + i * $DECODER_TP_SIZE))
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is bugged only works with 1P 1D btw


echo "Starting decode instance $i on GPU $GPU_ID, port $PORT"

Expand All @@ -110,6 +115,7 @@ run_tests_for_model() {
--enforce-eager \
--disable-log-requests \
--gpu-memory-utilization 0.2 \
--tensor-parallel-size $DECODER_TP_SIZE \
--kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}'"

if [ -n "$model_args" ]; then
Expand Down
Loading