From 12717da0e2edf4d980038960415198ce05558398 Mon Sep 17 00:00:00 2001 From: zdevito Date: Fri, 3 Oct 2025 16:38:49 -0700 Subject: [PATCH 1/2] split CI Differential Revision: [D83884590](https://our.internmc.facebook.com/intern/diff/D83884590/) **NOTE FOR REVIEWERS**: This PR has internal Meta-specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D83884590/)! [ghstack-poisoned] --- .github/workflows/test-gpu-python.yml | 29 +++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-gpu-python.yml b/.github/workflows/test-gpu-python.yml index 2ef02057e..a7eb3426c 100644 --- a/.github/workflows/test-gpu-python.yml +++ b/.github/workflows/test-gpu-python.yml @@ -52,8 +52,33 @@ jobs: # pyre currently does not check these assertions pyright python/tests/test_python_actors.py - # Run GPU Python tests - LC_ALL=C pytest python/tests/ -s -v -m "not oss_skip" + # Run GPU Python tests split into 10 groups sequentially + # Each group runs separately with process cleanup in between + pip install pytest-split + for GROUP in {1..10}; do + echo "Running test group $GROUP of 10..." + + # Kill any existing Python processes to ensure clean state + echo "Cleaning up Python processes before group $GROUP..." + pkill -9 python || true + pkill -9 pytest || true + + # Wait a moment for processes to terminate + sleep 2 + + # Run tests for this group + LC_ALL=C pytest python/tests/ -s -v -m "not oss_skip" \ + --dist=no \ + --group=$GROUP \ + --splits=10 || { + echo "Test group $GROUP failed with exit code $?" + exit 1 + } + + echo "Completed test group $GROUP of 10" + done + + echo "All test groups completed successfully!" # TODO(meriksen): temporarily disabled to unblock lands while debugging # mock CUDA issues on the OSS setup # python python/tests/test_mock_cuda.py From 47ea109fb50dfbb4362a923ccc531dcc9c59cdc7 Mon Sep 17 00:00:00 2001 From: zdevito Date: Fri, 3 Oct 2025 17:37:08 -0700 Subject: [PATCH 2/2] Update on "split CI" Differential Revision: [D83884590](https://our.internmc.facebook.com/intern/diff/D83884590/) **NOTE FOR REVIEWERS**: This PR has internal Meta-specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D83884590/)! [ghstack-poisoned] --- .github/workflows/test-gpu-python.yml | 30 ++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test-gpu-python.yml b/.github/workflows/test-gpu-python.yml index a7eb3426c..67344f52c 100644 --- a/.github/workflows/test-gpu-python.yml +++ b/.github/workflows/test-gpu-python.yml @@ -55,6 +55,8 @@ jobs: # Run GPU Python tests split into 10 groups sequentially # Each group runs separately with process cleanup in between pip install pytest-split + FAILED_GROUPS=() + for GROUP in {1..10}; do echo "Running test group $GROUP of 10..." @@ -67,18 +69,32 @@ jobs: sleep 2 # Run tests for this group - LC_ALL=C pytest python/tests/ -s -v -m "not oss_skip" \ + if LC_ALL=C pytest python/tests/ -s -v -m "not oss_skip" \ + --ignore-glob="**/meta/**" \ --dist=no \ --group=$GROUP \ - --splits=10 || { - echo "Test group $GROUP failed with exit code $?" - exit 1 - } + --splits=10; then + echo "✓ Test group $GROUP completed successfully" + else + FAILED_GROUPS+=($GROUP) + echo "✗ Test group $GROUP failed with exit code $?" + fi - echo "Completed test group $GROUP of 10" done - echo "All test groups completed successfully!" + # Final cleanup after all groups + echo "Final cleanup of Python processes..." + pkill -9 python || true + pkill -9 pytest || true + + # Check if any groups failed and exit with appropriate code + if [ ${#FAILED_GROUPS[@]} -eq 0 ]; then + echo "✓ All test groups completed successfully!" + else + echo "✗ The following test groups failed: ${FAILED_GROUPS[*]}" + echo "Failed groups count: ${#FAILED_GROUPS[@]}/10" + exit 1 + fi # TODO(meriksen): temporarily disabled to unblock lands while debugging # mock CUDA issues on the OSS setup # python python/tests/test_mock_cuda.py