@@ -733,3 +733,238 @@ entry:
733733 %0 = load i32 , ptr inttoptr (i64 2147481600 to ptr )
734734 ret i32 %0
735735}
736+
737+ %struct.S = type { i64 , i64 }
738+
739+ define i64 @fold_addi_from_different_bb (i64 %k , i64 %n , ptr %a ) nounwind {
740+ ; RV32I-LABEL: fold_addi_from_different_bb:
741+ ; RV32I: # %bb.0: # %entry
742+ ; RV32I-NEXT: addi sp, sp, -48
743+ ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
744+ ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
745+ ; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
746+ ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
747+ ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
748+ ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
749+ ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
750+ ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
751+ ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
752+ ; RV32I-NEXT: mv s0, a4
753+ ; RV32I-NEXT: mv s1, a3
754+ ; RV32I-NEXT: mv s2, a2
755+ ; RV32I-NEXT: beqz a3, .LBB20_3
756+ ; RV32I-NEXT: # %bb.1: # %entry
757+ ; RV32I-NEXT: slti a1, s1, 0
758+ ; RV32I-NEXT: beqz a1, .LBB20_4
759+ ; RV32I-NEXT: .LBB20_2:
760+ ; RV32I-NEXT: li s3, 0
761+ ; RV32I-NEXT: li s4, 0
762+ ; RV32I-NEXT: j .LBB20_6
763+ ; RV32I-NEXT: .LBB20_3:
764+ ; RV32I-NEXT: seqz a1, s2
765+ ; RV32I-NEXT: bnez a1, .LBB20_2
766+ ; RV32I-NEXT: .LBB20_4: # %for.body.lr.ph
767+ ; RV32I-NEXT: li s5, 0
768+ ; RV32I-NEXT: li s6, 0
769+ ; RV32I-NEXT: li s3, 0
770+ ; RV32I-NEXT: li s4, 0
771+ ; RV32I-NEXT: slli a0, a0, 4
772+ ; RV32I-NEXT: add a0, s0, a0
773+ ; RV32I-NEXT: addi s7, a0, 8
774+ ; RV32I-NEXT: .LBB20_5: # %for.body
775+ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
776+ ; RV32I-NEXT: mv a0, s0
777+ ; RV32I-NEXT: call f@plt
778+ ; RV32I-NEXT: lw a0, 4(s7)
779+ ; RV32I-NEXT: lw a1, 0(s7)
780+ ; RV32I-NEXT: add a0, a0, s4
781+ ; RV32I-NEXT: add s3, a1, s3
782+ ; RV32I-NEXT: sltu s4, s3, a1
783+ ; RV32I-NEXT: addi s5, s5, 1
784+ ; RV32I-NEXT: seqz a1, s5
785+ ; RV32I-NEXT: add s6, s6, a1
786+ ; RV32I-NEXT: xor a1, s5, s2
787+ ; RV32I-NEXT: xor a2, s6, s1
788+ ; RV32I-NEXT: or a1, a1, a2
789+ ; RV32I-NEXT: add s4, a0, s4
790+ ; RV32I-NEXT: bnez a1, .LBB20_5
791+ ; RV32I-NEXT: .LBB20_6: # %for.cond.cleanup
792+ ; RV32I-NEXT: mv a0, s3
793+ ; RV32I-NEXT: mv a1, s4
794+ ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
795+ ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
796+ ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
797+ ; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
798+ ; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
799+ ; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
800+ ; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
801+ ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
802+ ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
803+ ; RV32I-NEXT: addi sp, sp, 48
804+ ; RV32I-NEXT: ret
805+ ;
806+ ; RV32I-MEDIUM-LABEL: fold_addi_from_different_bb:
807+ ; RV32I-MEDIUM: # %bb.0: # %entry
808+ ; RV32I-MEDIUM-NEXT: addi sp, sp, -48
809+ ; RV32I-MEDIUM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
810+ ; RV32I-MEDIUM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
811+ ; RV32I-MEDIUM-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
812+ ; RV32I-MEDIUM-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
813+ ; RV32I-MEDIUM-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
814+ ; RV32I-MEDIUM-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
815+ ; RV32I-MEDIUM-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
816+ ; RV32I-MEDIUM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
817+ ; RV32I-MEDIUM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
818+ ; RV32I-MEDIUM-NEXT: mv s0, a4
819+ ; RV32I-MEDIUM-NEXT: mv s1, a3
820+ ; RV32I-MEDIUM-NEXT: mv s2, a2
821+ ; RV32I-MEDIUM-NEXT: beqz a3, .LBB20_3
822+ ; RV32I-MEDIUM-NEXT: # %bb.1: # %entry
823+ ; RV32I-MEDIUM-NEXT: slti a1, s1, 0
824+ ; RV32I-MEDIUM-NEXT: beqz a1, .LBB20_4
825+ ; RV32I-MEDIUM-NEXT: .LBB20_2:
826+ ; RV32I-MEDIUM-NEXT: li s3, 0
827+ ; RV32I-MEDIUM-NEXT: li s4, 0
828+ ; RV32I-MEDIUM-NEXT: j .LBB20_6
829+ ; RV32I-MEDIUM-NEXT: .LBB20_3:
830+ ; RV32I-MEDIUM-NEXT: seqz a1, s2
831+ ; RV32I-MEDIUM-NEXT: bnez a1, .LBB20_2
832+ ; RV32I-MEDIUM-NEXT: .LBB20_4: # %for.body.lr.ph
833+ ; RV32I-MEDIUM-NEXT: li s5, 0
834+ ; RV32I-MEDIUM-NEXT: li s6, 0
835+ ; RV32I-MEDIUM-NEXT: li s3, 0
836+ ; RV32I-MEDIUM-NEXT: li s4, 0
837+ ; RV32I-MEDIUM-NEXT: slli a0, a0, 4
838+ ; RV32I-MEDIUM-NEXT: add a0, s0, a0
839+ ; RV32I-MEDIUM-NEXT: addi s7, a0, 8
840+ ; RV32I-MEDIUM-NEXT: .LBB20_5: # %for.body
841+ ; RV32I-MEDIUM-NEXT: # =>This Inner Loop Header: Depth=1
842+ ; RV32I-MEDIUM-NEXT: mv a0, s0
843+ ; RV32I-MEDIUM-NEXT: call f@plt
844+ ; RV32I-MEDIUM-NEXT: lw a0, 4(s7)
845+ ; RV32I-MEDIUM-NEXT: lw a1, 0(s7)
846+ ; RV32I-MEDIUM-NEXT: add a0, a0, s4
847+ ; RV32I-MEDIUM-NEXT: add s3, a1, s3
848+ ; RV32I-MEDIUM-NEXT: sltu s4, s3, a1
849+ ; RV32I-MEDIUM-NEXT: addi s5, s5, 1
850+ ; RV32I-MEDIUM-NEXT: seqz a1, s5
851+ ; RV32I-MEDIUM-NEXT: add s6, s6, a1
852+ ; RV32I-MEDIUM-NEXT: xor a1, s5, s2
853+ ; RV32I-MEDIUM-NEXT: xor a2, s6, s1
854+ ; RV32I-MEDIUM-NEXT: or a1, a1, a2
855+ ; RV32I-MEDIUM-NEXT: add s4, a0, s4
856+ ; RV32I-MEDIUM-NEXT: bnez a1, .LBB20_5
857+ ; RV32I-MEDIUM-NEXT: .LBB20_6: # %for.cond.cleanup
858+ ; RV32I-MEDIUM-NEXT: mv a0, s3
859+ ; RV32I-MEDIUM-NEXT: mv a1, s4
860+ ; RV32I-MEDIUM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
861+ ; RV32I-MEDIUM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
862+ ; RV32I-MEDIUM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
863+ ; RV32I-MEDIUM-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
864+ ; RV32I-MEDIUM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
865+ ; RV32I-MEDIUM-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
866+ ; RV32I-MEDIUM-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
867+ ; RV32I-MEDIUM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
868+ ; RV32I-MEDIUM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
869+ ; RV32I-MEDIUM-NEXT: addi sp, sp, 48
870+ ; RV32I-MEDIUM-NEXT: ret
871+ ;
872+ ; RV64I-LABEL: fold_addi_from_different_bb:
873+ ; RV64I: # %bb.0: # %entry
874+ ; RV64I-NEXT: addi sp, sp, -48
875+ ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
876+ ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
877+ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
878+ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
879+ ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
880+ ; RV64I-NEXT: blez a1, .LBB20_3
881+ ; RV64I-NEXT: # %bb.1: # %for.body.lr.ph
882+ ; RV64I-NEXT: mv s0, a2
883+ ; RV64I-NEXT: mv s1, a1
884+ ; RV64I-NEXT: li s2, 0
885+ ; RV64I-NEXT: slli a0, a0, 4
886+ ; RV64I-NEXT: add a0, a2, a0
887+ ; RV64I-NEXT: addi s3, a0, 8
888+ ; RV64I-NEXT: .LBB20_2: # %for.body
889+ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
890+ ; RV64I-NEXT: mv a0, s0
891+ ; RV64I-NEXT: call f@plt
892+ ; RV64I-NEXT: ld a0, 0(s3)
893+ ; RV64I-NEXT: addi s1, s1, -1
894+ ; RV64I-NEXT: add s2, a0, s2
895+ ; RV64I-NEXT: bnez s1, .LBB20_2
896+ ; RV64I-NEXT: j .LBB20_4
897+ ; RV64I-NEXT: .LBB20_3:
898+ ; RV64I-NEXT: li s2, 0
899+ ; RV64I-NEXT: .LBB20_4: # %for.cond.cleanup
900+ ; RV64I-NEXT: mv a0, s2
901+ ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
902+ ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
903+ ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
904+ ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
905+ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
906+ ; RV64I-NEXT: addi sp, sp, 48
907+ ; RV64I-NEXT: ret
908+ ;
909+ ; RV64I-MEDIUM-LABEL: fold_addi_from_different_bb:
910+ ; RV64I-MEDIUM: # %bb.0: # %entry
911+ ; RV64I-MEDIUM-NEXT: addi sp, sp, -48
912+ ; RV64I-MEDIUM-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
913+ ; RV64I-MEDIUM-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
914+ ; RV64I-MEDIUM-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
915+ ; RV64I-MEDIUM-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
916+ ; RV64I-MEDIUM-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
917+ ; RV64I-MEDIUM-NEXT: blez a1, .LBB20_3
918+ ; RV64I-MEDIUM-NEXT: # %bb.1: # %for.body.lr.ph
919+ ; RV64I-MEDIUM-NEXT: mv s0, a2
920+ ; RV64I-MEDIUM-NEXT: mv s1, a1
921+ ; RV64I-MEDIUM-NEXT: li s2, 0
922+ ; RV64I-MEDIUM-NEXT: slli a0, a0, 4
923+ ; RV64I-MEDIUM-NEXT: add a0, a2, a0
924+ ; RV64I-MEDIUM-NEXT: addi s3, a0, 8
925+ ; RV64I-MEDIUM-NEXT: .LBB20_2: # %for.body
926+ ; RV64I-MEDIUM-NEXT: # =>This Inner Loop Header: Depth=1
927+ ; RV64I-MEDIUM-NEXT: mv a0, s0
928+ ; RV64I-MEDIUM-NEXT: call f@plt
929+ ; RV64I-MEDIUM-NEXT: ld a0, 0(s3)
930+ ; RV64I-MEDIUM-NEXT: addi s1, s1, -1
931+ ; RV64I-MEDIUM-NEXT: add s2, a0, s2
932+ ; RV64I-MEDIUM-NEXT: bnez s1, .LBB20_2
933+ ; RV64I-MEDIUM-NEXT: j .LBB20_4
934+ ; RV64I-MEDIUM-NEXT: .LBB20_3:
935+ ; RV64I-MEDIUM-NEXT: li s2, 0
936+ ; RV64I-MEDIUM-NEXT: .LBB20_4: # %for.cond.cleanup
937+ ; RV64I-MEDIUM-NEXT: mv a0, s2
938+ ; RV64I-MEDIUM-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
939+ ; RV64I-MEDIUM-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
940+ ; RV64I-MEDIUM-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
941+ ; RV64I-MEDIUM-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
942+ ; RV64I-MEDIUM-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
943+ ; RV64I-MEDIUM-NEXT: addi sp, sp, 48
944+ ; RV64I-MEDIUM-NEXT: ret
945+ entry:
946+ %cmp4 = icmp sgt i64 %n , 0
947+ br i1 %cmp4 , label %for.body.lr.ph , label %for.cond.cleanup
948+
949+ for.body.lr.ph: ; preds = %entry
950+ ; TODO: when this GEP is expanded, the resulting `addi` should be folded
951+ ; into the load in the loop body.
952+ %y = getelementptr inbounds %struct.S , ptr %a , i64 %k , i32 1
953+ br label %for.body
954+
955+ for.cond.cleanup: ; preds = %for.body, %entry
956+ %s.0.lcssa = phi i64 [ 0 , %entry ], [ %add , %for.body ]
957+ ret i64 %s.0.lcssa
958+
959+ for.body: ; preds = %for.body.lr.ph, %for.body
960+ %i.06 = phi i64 [ 0 , %for.body.lr.ph ], [ %inc , %for.body ]
961+ %s.05 = phi i64 [ 0 , %for.body.lr.ph ], [ %add , %for.body ]
962+ call void @f (ptr %a )
963+ %0 = load i64 , ptr %y , align 8
964+ %add = add nsw i64 %0 , %s.05
965+ %inc = add nuw nsw i64 %i.06 , 1
966+ %exitcond.not = icmp eq i64 %inc , %n
967+ br i1 %exitcond.not , label %for.cond.cleanup , label %for.body
968+ }
969+
970+ declare void @f (ptr )
0 commit comments