Skip to content

Commit 998abe0

Browse files
driazatiLucien0
authored andcommitted
[ci] Remove hardcoded test shards (apache#10743)
This moves the sharding logic from being inlined in the Jenkinsfile to templated, so we can change just the number of shards and the test allocation in `conftest.py` and the Jenkinsfile will work to match. This also changes the test allocation from a manual balancing before to be random between shards. Each shard needs to know only its shard number and the total number of shards, then it hashes each test and skips it unless that hash falls within its allocated tests. This breaks up related tests across shards but has the downside that any change to the number of shards will shuffle around where the tests end up (but ideally this is rare as we settle on a good number of shards to use). This only does this for the GPU frontend tests but eventually we could expand it to more. Co-authored-by: driazati <[email protected]>
1 parent edb47f4 commit 998abe0

File tree

5 files changed

+390
-219
lines changed

5 files changed

+390
-219
lines changed

Jenkinsfile

Lines changed: 197 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
// 'python3 jenkins/generate.py'
4646
// Note: This timestamp is here to ensure that updates to the Jenkinsfile are
4747
// always rebased on main before merging:
48-
// Generated at 2022-03-30T11:40:52.107833
48+
// Generated at 2022-04-07T13:50:22.427152
4949

5050
import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
5151
// NOTE: these lines are scanned by docker/dev_common.sh. Please update the regex as needed. -->
@@ -88,7 +88,7 @@ tvm_multilib_tsim = 'build/libvta_tsim.so, ' +
8888
upstream_revision = null
8989

9090
// command to start a docker container
91-
docker_run = 'docker/bash.sh'
91+
docker_run = 'docker/bash.sh --env CI --env TVM_SHARD_INDEX --env TVM_NUM_SHARDS'
9292
docker_build = 'docker/build.sh'
9393
// timeout in minutes
9494
max_time = 240
@@ -454,7 +454,7 @@ def fsim_test(image) {
454454

455455
def cmake_build(image, path, make_flag) {
456456
sh (
457-
script: "${docker_run} ${image} ./tests/scripts/task_build.py --sccache-bucket tvm-sccache-prod",
457+
script: "${docker_run} --env CI_NUM_EXECUTORS ${image} ./tests/scripts/task_build.py --sccache-bucket tvm-sccache-prod",
458458
label: 'Run cmake build',
459459
)
460460
}
@@ -673,27 +673,58 @@ stage('Test') {
673673
Utils.markStageSkippedForConditional('unittest: GPU')
674674
}
675675
},
676-
'integration: CPU': {
676+
'integration: CPU 1 of 2': {
677677
if (!skip_ci && is_docs_only_build != 1) {
678678
node('CPU') {
679-
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/ut-python-cpu") {
679+
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
680680
try {
681681
init_git()
682-
unpack_lib('cpu', tvm_multilib_tsim)
683682
timeout(time: max_time, unit: 'MINUTES') {
684-
ci_setup(ci_cpu)
685-
sh (
686-
script: "${docker_run} ${ci_cpu} ./tests/scripts/task_python_integration.sh",
687-
label: 'Run CPU integration tests',
688-
)
683+
withEnv([
684+
'TVM_NUM_SHARDS=2',
685+
'TVM_SHARD_INDEX=0'], {
686+
unpack_lib('cpu', tvm_multilib_tsim)
687+
ci_setup(ci_cpu)
688+
sh (
689+
script: "${docker_run} ${ci_cpu} ./tests/scripts/task_python_integration.sh",
690+
label: 'Run CPU integration tests',
691+
)
692+
})
693+
}
694+
} finally {
695+
junit 'build/pytest-results/*.xml'
696+
}
697+
}
698+
}
699+
} else {
700+
Utils.markStageSkippedForConditional('integration: CPU 1 of 2')
701+
}
702+
},
703+
'integration: CPU 2 of 2': {
704+
if (!skip_ci && is_docs_only_build != 1) {
705+
node('CPU') {
706+
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/integration-python-cpu") {
707+
try {
708+
init_git()
709+
timeout(time: max_time, unit: 'MINUTES') {
710+
withEnv([
711+
'TVM_NUM_SHARDS=2',
712+
'TVM_SHARD_INDEX=1'], {
713+
unpack_lib('cpu', tvm_multilib_tsim)
714+
ci_setup(ci_cpu)
715+
sh (
716+
script: "${docker_run} ${ci_cpu} ./tests/scripts/task_python_integration.sh",
717+
label: 'Run CPU integration tests',
718+
)
719+
})
689720
}
690721
} finally {
691722
junit 'build/pytest-results/*.xml'
692723
}
693724
}
694725
}
695726
} else {
696-
Utils.markStageSkippedForConditional('integration: CPU')
727+
Utils.markStageSkippedForConditional('integration: CPU 2 of 2')
697728
}
698729
},
699730
'unittest: CPU': {
@@ -748,17 +779,16 @@ stage('Test') {
748779
Utils.markStageSkippedForConditional('python3: i386')
749780
}
750781
},
751-
'python3: aarch64': {
782+
'topi: aarch64': {
752783
if (!skip_ci && is_docs_only_build != 1) {
753784
node('ARM') {
754785
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/ut-python-arm") {
755-
try {
756-
init_git()
757-
unpack_lib('arm', tvm_multilib)
758-
timeout(time: max_time, unit: 'MINUTES') {
786+
timeout(time: max_time, unit: 'MINUTES') {
787+
try {
788+
init_git()
789+
unpack_lib('arm', tvm_multilib)
759790
ci_setup(ci_arm)
760791
cpp_unittest(ci_arm)
761-
python_unittest(ci_arm)
762792
sh (
763793
script: "${docker_run} ${ci_arm} ./tests/scripts/task_python_arm_compute_library.sh",
764794
label: 'Run test_arm_compute_lib test',
@@ -767,87 +797,205 @@ stage('Test') {
767797
script: "${docker_run} ${ci_arm} ./tests/scripts/task_python_topi.sh",
768798
label: 'Run TOPI tests',
769799
)
770-
sh (
771-
script: "${docker_run} ${ci_arm} ./tests/scripts/task_python_integration.sh",
772-
label: 'Run CPU integration tests',
773-
)
800+
} finally {
801+
junit 'build/pytest-results/*.xml'
802+
}
803+
}
804+
}
805+
}
806+
} else {
807+
Utils.markStageSkippedForConditional('topi: aarch64')
808+
}
809+
},
810+
'integration: aarch64 1 of 2': {
811+
if (!skip_ci && is_docs_only_build != 1) {
812+
node('ARM') {
813+
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/ut-python-arm") {
814+
try {
815+
init_git()
816+
timeout(time: max_time, unit: 'MINUTES') {
817+
withEnv([
818+
'TVM_NUM_SHARDS=2',
819+
'TVM_SHARD_INDEX=0'], {
820+
unpack_lib('arm', tvm_multilib)
821+
ci_setup(ci_arm)
822+
python_unittest(ci_arm)
823+
sh (
824+
script: "${docker_run} ${ci_arm} ./tests/scripts/task_python_integration.sh",
825+
label: 'Run CPU integration tests',
826+
)
827+
})
774828
}
775829
} finally {
776830
junit 'build/pytest-results/*.xml'
777831
}
778832
}
779833
}
780834
} else {
781-
Utils.markStageSkippedForConditional('python3: arm')
835+
Utils.markStageSkippedForConditional('integration: aarch64 1 of 2')
782836
}
783837
},
784-
'topi: GPU': {
838+
'integration: aarch64 2 of 2': {
839+
if (!skip_ci && is_docs_only_build != 1) {
840+
node('ARM') {
841+
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/ut-python-arm") {
842+
try {
843+
init_git()
844+
timeout(time: max_time, unit: 'MINUTES') {
845+
withEnv([
846+
'TVM_NUM_SHARDS=2',
847+
'TVM_SHARD_INDEX=1'], {
848+
unpack_lib('arm', tvm_multilib)
849+
ci_setup(ci_arm)
850+
python_unittest(ci_arm)
851+
sh (
852+
script: "${docker_run} ${ci_arm} ./tests/scripts/task_python_integration.sh",
853+
label: 'Run CPU integration tests',
854+
)
855+
})
856+
}
857+
} finally {
858+
junit 'build/pytest-results/*.xml'
859+
}
860+
}
861+
}
862+
} else {
863+
Utils.markStageSkippedForConditional('integration: aarch64 2 of 2')
864+
}
865+
},
866+
'topi: GPU 1 of 2': {
785867
if (!skip_ci && is_docs_only_build != 1) {
786868
node('GPU') {
787869
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/topi-python-gpu") {
788870
try {
789871
init_git()
790-
unpack_lib('gpu', tvm_multilib)
791872
timeout(time: max_time, unit: 'MINUTES') {
792-
ci_setup(ci_gpu)
793-
sh (
794-
script: "${docker_run} ${ci_gpu} ./tests/scripts/task_python_topi.sh",
795-
label: 'Run TOPI tests',
796-
)
873+
withEnv([
874+
'TVM_NUM_SHARDS=2',
875+
'TVM_SHARD_INDEX=0'], {
876+
unpack_lib('gpu', tvm_multilib)
877+
ci_setup(ci_gpu)
878+
sh (
879+
script: "${docker_run} ${ci_gpu} ./tests/scripts/task_python_topi.sh",
880+
label: 'Run TOPI tests',
881+
)
882+
})
797883
}
798884
} finally {
799885
junit 'build/pytest-results/*.xml'
800886
}
801887
}
802888
}
803889
} else {
804-
Utils.markStageSkippedForConditional('topi: GPU')
890+
Utils.markStageSkippedForConditional('topi: GPU 1 of 2')
805891
}
806892
},
807-
'frontend: GPU 1': {
893+
'topi: GPU 2 of 2': {
894+
if (!skip_ci && is_docs_only_build != 1) {
895+
node('GPU') {
896+
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/topi-python-gpu") {
897+
try {
898+
init_git()
899+
timeout(time: max_time, unit: 'MINUTES') {
900+
withEnv([
901+
'TVM_NUM_SHARDS=2',
902+
'TVM_SHARD_INDEX=1'], {
903+
unpack_lib('gpu', tvm_multilib)
904+
ci_setup(ci_gpu)
905+
sh (
906+
script: "${docker_run} ${ci_gpu} ./tests/scripts/task_python_topi.sh",
907+
label: 'Run TOPI tests',
908+
)
909+
})
910+
}
911+
} finally {
912+
junit 'build/pytest-results/*.xml'
913+
}
914+
}
915+
}
916+
} else {
917+
Utils.markStageSkippedForConditional('topi: GPU 2 of 2')
918+
}
919+
},
920+
'frontend: GPU 1 of 3': {
808921
if (!skip_ci && is_docs_only_build != 1) {
809922
node('GPU') {
810923
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/frontend-python-gpu") {
811924
try {
812925
init_git()
813-
unpack_lib('gpu', tvm_multilib)
814926
timeout(time: max_time, unit: 'MINUTES') {
815-
ci_setup(ci_gpu)
816-
sh (
817-
script: "${docker_run} ${ci_gpu} ./tests/scripts/task_python_frontend.sh 1",
818-
label: 'Run Python frontend tests (shard 1)',
819-
)
927+
withEnv([
928+
'TVM_NUM_SHARDS=3',
929+
'TVM_SHARD_INDEX=0'], {
930+
unpack_lib('gpu', tvm_multilib)
931+
ci_setup(ci_gpu)
932+
sh (
933+
script: "${docker_run} ${ci_gpu} ./tests/scripts/task_python_frontend.sh",
934+
label: 'Run Python frontend tests',
935+
)
936+
})
820937
}
821938
} finally {
822939
junit 'build/pytest-results/*.xml'
823940
}
824941
}
825942
}
826-
} else {
827-
Utils.markStageSkippedForConditional('frontend: GPU 1')
943+
} else {
944+
Utils.markStageSkippedForConditional('frontend: GPU 1 of 3')
828945
}
829946
},
830-
'frontend: GPU 2': {
947+
'frontend: GPU 2 of 3': {
831948
if (!skip_ci && is_docs_only_build != 1) {
832949
node('GPU') {
833950
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/frontend-python-gpu") {
834951
try {
835952
init_git()
836-
unpack_lib('gpu', tvm_multilib)
837953
timeout(time: max_time, unit: 'MINUTES') {
838-
ci_setup(ci_gpu)
839-
sh (
840-
script: "${docker_run} ${ci_gpu} ./tests/scripts/task_python_frontend.sh 2",
841-
label: 'Run Python frontend tests (shard 2)',
842-
)
954+
withEnv([
955+
'TVM_NUM_SHARDS=3',
956+
'TVM_SHARD_INDEX=1'], {
957+
unpack_lib('gpu', tvm_multilib)
958+
ci_setup(ci_gpu)
959+
sh (
960+
script: "${docker_run} ${ci_gpu} ./tests/scripts/task_python_frontend.sh",
961+
label: 'Run Python frontend tests',
962+
)
963+
})
843964
}
844965
} finally {
845966
junit 'build/pytest-results/*.xml'
846967
}
847968
}
848969
}
849-
} else {
850-
Utils.markStageSkippedForConditional('frontend: GPU 2')
970+
} else {
971+
Utils.markStageSkippedForConditional('frontend: GPU 2 of 3')
972+
}
973+
},
974+
'frontend: GPU 3 of 3': {
975+
if (!skip_ci && is_docs_only_build != 1) {
976+
node('GPU') {
977+
ws("workspace/exec_${env.EXECUTOR_NUMBER}/tvm/frontend-python-gpu") {
978+
try {
979+
init_git()
980+
timeout(time: max_time, unit: 'MINUTES') {
981+
withEnv([
982+
'TVM_NUM_SHARDS=3',
983+
'TVM_SHARD_INDEX=2'], {
984+
unpack_lib('gpu', tvm_multilib)
985+
ci_setup(ci_gpu)
986+
sh (
987+
script: "${docker_run} ${ci_gpu} ./tests/scripts/task_python_frontend.sh",
988+
label: 'Run Python frontend tests',
989+
)
990+
})
991+
}
992+
} finally {
993+
junit 'build/pytest-results/*.xml'
994+
}
995+
}
996+
}
997+
} else {
998+
Utils.markStageSkippedForConditional('frontend: GPU 3 of 3')
851999
}
8521000
},
8531001
'frontend: CPU': {

0 commit comments

Comments
 (0)