@@ -1336,8 +1336,8 @@ define amdgpu_kernel void @memmove_flat_align1_global_align1(ptr %dst, ptr addrs
13361336; MAX1024-NEXT: ret void
13371337;
13381338; ALL-LABEL: @memmove_flat_align1_global_align1(
1339- ; ALL-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[SRC :%.*]] to ptr
1340- ; ALL-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr [[TMP1 ]], [[DST:%.* ]]
1339+ ; ALL-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DST :%.*]] to ptr addrspace(1)
1340+ ; ALL-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.* ]], [[TMP1 ]]
13411341; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]]
13421342; ALL: memmove_bwd_loop:
13431343; ALL-NEXT: [[TMP2:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 16, [[TMP0:%.*]] ]
@@ -1404,8 +1404,8 @@ define amdgpu_kernel void @memmove_flat_align1_private_align1(ptr %dst, ptr addr
14041404; MAX1024-NEXT: ret void
14051405;
14061406; ALL-LABEL: @memmove_flat_align1_private_align1(
1407- ; ALL-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[SRC :%.*]] to ptr
1408- ; ALL-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr [[TMP1 ]], [[DST:%.* ]]
1407+ ; ALL-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DST :%.*]] to ptr addrspace(5)
1408+ ; ALL-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(5) [[SRC:%.* ]], [[TMP1 ]]
14091409; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]]
14101410; ALL: memmove_bwd_loop:
14111411; ALL-NEXT: [[TMP2:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 16, [[TMP0:%.*]] ]
@@ -1514,7 +1514,59 @@ define amdgpu_kernel void @memmove_global_align1_private_align1(ptr addrspace(1)
15141514
15151515define amdgpu_kernel void @memmove_global_align1_p999_align1 (ptr addrspace (1 ) %dst , ptr addrspace (999 ) %src , i64 %size ) {
15161516; OPT-LABEL: @memmove_global_align1_p999_align1(
1517- ; OPT-NEXT: call void @llvm.memmove.p1.p999.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(999) [[SRC:%.*]], i64 [[SIZE:%.*]], i1 false)
1517+ ; OPT-NEXT: [[TMP1:%.*]] = lshr i64 [[SIZE:%.*]], 4
1518+ ; OPT-NEXT: [[TMP2:%.*]] = and i64 [[SIZE]], 15
1519+ ; OPT-NEXT: [[TMP3:%.*]] = sub i64 [[SIZE]], [[TMP2]]
1520+ ; OPT-NEXT: [[SKIP_RESIDUAL:%.*]] = icmp eq i64 [[TMP2]], 0
1521+ ; OPT-NEXT: [[SKIP_MAIN:%.*]] = icmp eq i64 [[TMP1]], 0
1522+ ; OPT-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[DST:%.*]] to ptr addrspace(999)
1523+ ; OPT-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(999) [[SRC:%.*]], [[TMP4]]
1524+ ; OPT-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_COPY_BACKWARDS:%.*]], label [[MEMMOVE_COPY_FORWARD:%.*]]
1525+ ; OPT: memmove_copy_backwards:
1526+ ; OPT-NEXT: br i1 [[SKIP_RESIDUAL]], label [[MEMMOVE_BWD_MIDDLE:%.*]], label [[MEMMOVE_BWD_RESIDUAL_LOOP:%.*]]
1527+ ; OPT: memmove_bwd_residual_loop:
1528+ ; OPT-NEXT: [[TMP5:%.*]] = phi i64 [ [[BWD_RESIDUAL_INDEX:%.*]], [[MEMMOVE_BWD_RESIDUAL_LOOP]] ], [ [[SIZE]], [[MEMMOVE_COPY_BACKWARDS]] ]
1529+ ; OPT-NEXT: [[BWD_RESIDUAL_INDEX]] = sub i64 [[TMP5]], 1
1530+ ; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(999) [[SRC]], i64 [[BWD_RESIDUAL_INDEX]]
1531+ ; OPT-NEXT: [[ELEMENT:%.*]] = load i8, ptr addrspace(999) [[TMP6]], align 1
1532+ ; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_RESIDUAL_INDEX]]
1533+ ; OPT-NEXT: store i8 [[ELEMENT]], ptr addrspace(1) [[TMP7]], align 1
1534+ ; OPT-NEXT: [[TMP8:%.*]] = icmp eq i64 [[BWD_RESIDUAL_INDEX]], [[TMP3]]
1535+ ; OPT-NEXT: br i1 [[TMP8]], label [[MEMMOVE_BWD_MIDDLE]], label [[MEMMOVE_BWD_RESIDUAL_LOOP]]
1536+ ; OPT: memmove_bwd_middle:
1537+ ; OPT-NEXT: br i1 [[SKIP_MAIN]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_MAIN_LOOP:%.*]]
1538+ ; OPT: memmove_bwd_main_loop:
1539+ ; OPT-NEXT: [[TMP9:%.*]] = phi i64 [ [[BWD_MAIN_INDEX:%.*]], [[MEMMOVE_BWD_MAIN_LOOP]] ], [ [[TMP1]], [[MEMMOVE_BWD_MIDDLE]] ]
1540+ ; OPT-NEXT: [[BWD_MAIN_INDEX]] = sub i64 [[TMP9]], 1
1541+ ; OPT-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(999) [[SRC]], i64 [[BWD_MAIN_INDEX]]
1542+ ; OPT-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(999) [[TMP10]], align 1
1543+ ; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST]], i64 [[BWD_MAIN_INDEX]]
1544+ ; OPT-NEXT: store <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP11]], align 1
1545+ ; OPT-NEXT: [[TMP12:%.*]] = icmp eq i64 [[BWD_MAIN_INDEX]], 0
1546+ ; OPT-NEXT: br i1 [[TMP12]], label [[MEMMOVE_DONE]], label [[MEMMOVE_BWD_MAIN_LOOP]]
1547+ ; OPT: memmove_copy_forward:
1548+ ; OPT-NEXT: br i1 [[SKIP_MAIN]], label [[MEMMOVE_FWD_MIDDLE:%.*]], label [[MEMMOVE_FWD_MAIN_LOOP:%.*]]
1549+ ; OPT: memmove_fwd_main_loop:
1550+ ; OPT-NEXT: [[FWD_MAIN_INDEX:%.*]] = phi i64 [ [[TMP15:%.*]], [[MEMMOVE_FWD_MAIN_LOOP]] ], [ 0, [[MEMMOVE_COPY_FORWARD]] ]
1551+ ; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(999) [[SRC]], i64 [[FWD_MAIN_INDEX]]
1552+ ; OPT-NEXT: [[ELEMENT2:%.*]] = load <4 x i32>, ptr addrspace(999) [[TMP13]], align 1
1553+ ; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST]], i64 [[FWD_MAIN_INDEX]]
1554+ ; OPT-NEXT: store <4 x i32> [[ELEMENT2]], ptr addrspace(1) [[TMP14]], align 1
1555+ ; OPT-NEXT: [[TMP15]] = add i64 [[FWD_MAIN_INDEX]], 1
1556+ ; OPT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP15]], [[TMP1]]
1557+ ; OPT-NEXT: br i1 [[TMP16]], label [[MEMMOVE_FWD_MIDDLE]], label [[MEMMOVE_FWD_MAIN_LOOP]]
1558+ ; OPT: memmove_fwd_middle:
1559+ ; OPT-NEXT: br i1 [[SKIP_RESIDUAL]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_RESIDUAL_LOOP:%.*]]
1560+ ; OPT: memmove_fwd_residual_loop:
1561+ ; OPT-NEXT: [[FWD_RESIDUAL_INDEX:%.*]] = phi i64 [ [[TMP19:%.*]], [[MEMMOVE_FWD_RESIDUAL_LOOP]] ], [ [[TMP3]], [[MEMMOVE_FWD_MIDDLE]] ]
1562+ ; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(999) [[SRC]], i64 [[FWD_RESIDUAL_INDEX]]
1563+ ; OPT-NEXT: [[ELEMENT3:%.*]] = load i8, ptr addrspace(999) [[TMP17]], align 1
1564+ ; OPT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_RESIDUAL_INDEX]]
1565+ ; OPT-NEXT: store i8 [[ELEMENT3]], ptr addrspace(1) [[TMP18]], align 1
1566+ ; OPT-NEXT: [[TMP19]] = add i64 [[FWD_RESIDUAL_INDEX]], 1
1567+ ; OPT-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], [[SIZE]]
1568+ ; OPT-NEXT: br i1 [[TMP20]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_RESIDUAL_LOOP]]
1569+ ; OPT: memmove_done:
15181570; OPT-NEXT: ret void
15191571;
15201572 call void @llvm.memmove.p1.p999.i64 (ptr addrspace (1 ) %dst , ptr addrspace (999 ) %src , i64 %size , i1 false )
@@ -1523,7 +1575,59 @@ define amdgpu_kernel void @memmove_global_align1_p999_align1(ptr addrspace(1) %d
15231575
15241576define amdgpu_kernel void @memmove_p999_align1_p1_align1 (ptr addrspace (999 ) %dst , ptr addrspace (1 ) %src , i64 %size ) {
15251577; OPT-LABEL: @memmove_p999_align1_p1_align1(
1526- ; OPT-NEXT: call void @llvm.memmove.p999.p1.i64(ptr addrspace(999) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 [[SIZE:%.*]], i1 false)
1578+ ; OPT-NEXT: [[TMP1:%.*]] = lshr i64 [[SIZE:%.*]], 4
1579+ ; OPT-NEXT: [[TMP2:%.*]] = and i64 [[SIZE]], 15
1580+ ; OPT-NEXT: [[TMP3:%.*]] = sub i64 [[SIZE]], [[TMP2]]
1581+ ; OPT-NEXT: [[SKIP_RESIDUAL:%.*]] = icmp eq i64 [[TMP2]], 0
1582+ ; OPT-NEXT: [[SKIP_MAIN:%.*]] = icmp eq i64 [[TMP1]], 0
1583+ ; OPT-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(999) [[DST:%.*]] to ptr addrspace(1)
1584+ ; OPT-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[TMP4]]
1585+ ; OPT-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_COPY_BACKWARDS:%.*]], label [[MEMMOVE_COPY_FORWARD:%.*]]
1586+ ; OPT: memmove_copy_backwards:
1587+ ; OPT-NEXT: br i1 [[SKIP_RESIDUAL]], label [[MEMMOVE_BWD_MIDDLE:%.*]], label [[MEMMOVE_BWD_RESIDUAL_LOOP:%.*]]
1588+ ; OPT: memmove_bwd_residual_loop:
1589+ ; OPT-NEXT: [[TMP5:%.*]] = phi i64 [ [[BWD_RESIDUAL_INDEX:%.*]], [[MEMMOVE_BWD_RESIDUAL_LOOP]] ], [ [[SIZE]], [[MEMMOVE_COPY_BACKWARDS]] ]
1590+ ; OPT-NEXT: [[BWD_RESIDUAL_INDEX]] = sub i64 [[TMP5]], 1
1591+ ; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_RESIDUAL_INDEX]]
1592+ ; OPT-NEXT: [[ELEMENT:%.*]] = load i8, ptr addrspace(1) [[TMP6]], align 1
1593+ ; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(999) [[DST]], i64 [[BWD_RESIDUAL_INDEX]]
1594+ ; OPT-NEXT: store i8 [[ELEMENT]], ptr addrspace(999) [[TMP7]], align 1
1595+ ; OPT-NEXT: [[TMP8:%.*]] = icmp eq i64 [[BWD_RESIDUAL_INDEX]], [[TMP3]]
1596+ ; OPT-NEXT: br i1 [[TMP8]], label [[MEMMOVE_BWD_MIDDLE]], label [[MEMMOVE_BWD_RESIDUAL_LOOP]]
1597+ ; OPT: memmove_bwd_middle:
1598+ ; OPT-NEXT: br i1 [[SKIP_MAIN]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_MAIN_LOOP:%.*]]
1599+ ; OPT: memmove_bwd_main_loop:
1600+ ; OPT-NEXT: [[TMP9:%.*]] = phi i64 [ [[BWD_MAIN_INDEX:%.*]], [[MEMMOVE_BWD_MAIN_LOOP]] ], [ [[TMP1]], [[MEMMOVE_BWD_MIDDLE]] ]
1601+ ; OPT-NEXT: [[BWD_MAIN_INDEX]] = sub i64 [[TMP9]], 1
1602+ ; OPT-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC]], i64 [[BWD_MAIN_INDEX]]
1603+ ; OPT-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP10]], align 1
1604+ ; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(999) [[DST]], i64 [[BWD_MAIN_INDEX]]
1605+ ; OPT-NEXT: store <4 x i32> [[ELEMENT1]], ptr addrspace(999) [[TMP11]], align 1
1606+ ; OPT-NEXT: [[TMP12:%.*]] = icmp eq i64 [[BWD_MAIN_INDEX]], 0
1607+ ; OPT-NEXT: br i1 [[TMP12]], label [[MEMMOVE_DONE]], label [[MEMMOVE_BWD_MAIN_LOOP]]
1608+ ; OPT: memmove_copy_forward:
1609+ ; OPT-NEXT: br i1 [[SKIP_MAIN]], label [[MEMMOVE_FWD_MIDDLE:%.*]], label [[MEMMOVE_FWD_MAIN_LOOP:%.*]]
1610+ ; OPT: memmove_fwd_main_loop:
1611+ ; OPT-NEXT: [[FWD_MAIN_INDEX:%.*]] = phi i64 [ [[TMP15:%.*]], [[MEMMOVE_FWD_MAIN_LOOP]] ], [ 0, [[MEMMOVE_COPY_FORWARD]] ]
1612+ ; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC]], i64 [[FWD_MAIN_INDEX]]
1613+ ; OPT-NEXT: [[ELEMENT2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP13]], align 1
1614+ ; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(999) [[DST]], i64 [[FWD_MAIN_INDEX]]
1615+ ; OPT-NEXT: store <4 x i32> [[ELEMENT2]], ptr addrspace(999) [[TMP14]], align 1
1616+ ; OPT-NEXT: [[TMP15]] = add i64 [[FWD_MAIN_INDEX]], 1
1617+ ; OPT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP15]], [[TMP1]]
1618+ ; OPT-NEXT: br i1 [[TMP16]], label [[MEMMOVE_FWD_MIDDLE]], label [[MEMMOVE_FWD_MAIN_LOOP]]
1619+ ; OPT: memmove_fwd_middle:
1620+ ; OPT-NEXT: br i1 [[SKIP_RESIDUAL]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_RESIDUAL_LOOP:%.*]]
1621+ ; OPT: memmove_fwd_residual_loop:
1622+ ; OPT-NEXT: [[FWD_RESIDUAL_INDEX:%.*]] = phi i64 [ [[TMP19:%.*]], [[MEMMOVE_FWD_RESIDUAL_LOOP]] ], [ [[TMP3]], [[MEMMOVE_FWD_MIDDLE]] ]
1623+ ; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_RESIDUAL_INDEX]]
1624+ ; OPT-NEXT: [[ELEMENT3:%.*]] = load i8, ptr addrspace(1) [[TMP17]], align 1
1625+ ; OPT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(999) [[DST]], i64 [[FWD_RESIDUAL_INDEX]]
1626+ ; OPT-NEXT: store i8 [[ELEMENT3]], ptr addrspace(999) [[TMP18]], align 1
1627+ ; OPT-NEXT: [[TMP19]] = add i64 [[FWD_RESIDUAL_INDEX]], 1
1628+ ; OPT-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], [[SIZE]]
1629+ ; OPT-NEXT: br i1 [[TMP20]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_RESIDUAL_LOOP]]
1630+ ; OPT: memmove_done:
15271631; OPT-NEXT: ret void
15281632;
15291633 call void @llvm.memmove.p999.p1.i64 (ptr addrspace (999 ) %dst , ptr addrspace (1 ) %src , i64 %size , i1 false )
@@ -1532,7 +1636,59 @@ define amdgpu_kernel void @memmove_p999_align1_p1_align1(ptr addrspace(999) %dst
15321636
15331637define amdgpu_kernel void @memmove_p999_align1_p998_align1 (ptr addrspace (999 ) %dst , ptr addrspace (998 ) %src , i64 %size ) {
15341638; OPT-LABEL: @memmove_p999_align1_p998_align1(
1535- ; OPT-NEXT: call void @llvm.memmove.p999.p998.i64(ptr addrspace(999) [[DST:%.*]], ptr addrspace(998) [[SRC:%.*]], i64 [[SIZE:%.*]], i1 false)
1639+ ; OPT-NEXT: [[TMP1:%.*]] = lshr i64 [[SIZE:%.*]], 4
1640+ ; OPT-NEXT: [[TMP2:%.*]] = and i64 [[SIZE]], 15
1641+ ; OPT-NEXT: [[TMP3:%.*]] = sub i64 [[SIZE]], [[TMP2]]
1642+ ; OPT-NEXT: [[SKIP_RESIDUAL:%.*]] = icmp eq i64 [[TMP2]], 0
1643+ ; OPT-NEXT: [[SKIP_MAIN:%.*]] = icmp eq i64 [[TMP1]], 0
1644+ ; OPT-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(999) [[DST:%.*]] to ptr addrspace(998)
1645+ ; OPT-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(998) [[SRC:%.*]], [[TMP4]]
1646+ ; OPT-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_COPY_BACKWARDS:%.*]], label [[MEMMOVE_COPY_FORWARD:%.*]]
1647+ ; OPT: memmove_copy_backwards:
1648+ ; OPT-NEXT: br i1 [[SKIP_RESIDUAL]], label [[MEMMOVE_BWD_MIDDLE:%.*]], label [[MEMMOVE_BWD_RESIDUAL_LOOP:%.*]]
1649+ ; OPT: memmove_bwd_residual_loop:
1650+ ; OPT-NEXT: [[TMP5:%.*]] = phi i64 [ [[BWD_RESIDUAL_INDEX:%.*]], [[MEMMOVE_BWD_RESIDUAL_LOOP]] ], [ [[SIZE]], [[MEMMOVE_COPY_BACKWARDS]] ]
1651+ ; OPT-NEXT: [[BWD_RESIDUAL_INDEX]] = sub i64 [[TMP5]], 1
1652+ ; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(998) [[SRC]], i64 [[BWD_RESIDUAL_INDEX]]
1653+ ; OPT-NEXT: [[ELEMENT:%.*]] = load i8, ptr addrspace(998) [[TMP6]], align 1
1654+ ; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(999) [[DST]], i64 [[BWD_RESIDUAL_INDEX]]
1655+ ; OPT-NEXT: store i8 [[ELEMENT]], ptr addrspace(999) [[TMP7]], align 1
1656+ ; OPT-NEXT: [[TMP8:%.*]] = icmp eq i64 [[BWD_RESIDUAL_INDEX]], [[TMP3]]
1657+ ; OPT-NEXT: br i1 [[TMP8]], label [[MEMMOVE_BWD_MIDDLE]], label [[MEMMOVE_BWD_RESIDUAL_LOOP]]
1658+ ; OPT: memmove_bwd_middle:
1659+ ; OPT-NEXT: br i1 [[SKIP_MAIN]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_MAIN_LOOP:%.*]]
1660+ ; OPT: memmove_bwd_main_loop:
1661+ ; OPT-NEXT: [[TMP9:%.*]] = phi i64 [ [[BWD_MAIN_INDEX:%.*]], [[MEMMOVE_BWD_MAIN_LOOP]] ], [ [[TMP1]], [[MEMMOVE_BWD_MIDDLE]] ]
1662+ ; OPT-NEXT: [[BWD_MAIN_INDEX]] = sub i64 [[TMP9]], 1
1663+ ; OPT-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(998) [[SRC]], i64 [[BWD_MAIN_INDEX]]
1664+ ; OPT-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(998) [[TMP10]], align 1
1665+ ; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(999) [[DST]], i64 [[BWD_MAIN_INDEX]]
1666+ ; OPT-NEXT: store <4 x i32> [[ELEMENT1]], ptr addrspace(999) [[TMP11]], align 1
1667+ ; OPT-NEXT: [[TMP12:%.*]] = icmp eq i64 [[BWD_MAIN_INDEX]], 0
1668+ ; OPT-NEXT: br i1 [[TMP12]], label [[MEMMOVE_DONE]], label [[MEMMOVE_BWD_MAIN_LOOP]]
1669+ ; OPT: memmove_copy_forward:
1670+ ; OPT-NEXT: br i1 [[SKIP_MAIN]], label [[MEMMOVE_FWD_MIDDLE:%.*]], label [[MEMMOVE_FWD_MAIN_LOOP:%.*]]
1671+ ; OPT: memmove_fwd_main_loop:
1672+ ; OPT-NEXT: [[FWD_MAIN_INDEX:%.*]] = phi i64 [ [[TMP15:%.*]], [[MEMMOVE_FWD_MAIN_LOOP]] ], [ 0, [[MEMMOVE_COPY_FORWARD]] ]
1673+ ; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(998) [[SRC]], i64 [[FWD_MAIN_INDEX]]
1674+ ; OPT-NEXT: [[ELEMENT2:%.*]] = load <4 x i32>, ptr addrspace(998) [[TMP13]], align 1
1675+ ; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(999) [[DST]], i64 [[FWD_MAIN_INDEX]]
1676+ ; OPT-NEXT: store <4 x i32> [[ELEMENT2]], ptr addrspace(999) [[TMP14]], align 1
1677+ ; OPT-NEXT: [[TMP15]] = add i64 [[FWD_MAIN_INDEX]], 1
1678+ ; OPT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP15]], [[TMP1]]
1679+ ; OPT-NEXT: br i1 [[TMP16]], label [[MEMMOVE_FWD_MIDDLE]], label [[MEMMOVE_FWD_MAIN_LOOP]]
1680+ ; OPT: memmove_fwd_middle:
1681+ ; OPT-NEXT: br i1 [[SKIP_RESIDUAL]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_RESIDUAL_LOOP:%.*]]
1682+ ; OPT: memmove_fwd_residual_loop:
1683+ ; OPT-NEXT: [[FWD_RESIDUAL_INDEX:%.*]] = phi i64 [ [[TMP19:%.*]], [[MEMMOVE_FWD_RESIDUAL_LOOP]] ], [ [[TMP3]], [[MEMMOVE_FWD_MIDDLE]] ]
1684+ ; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(998) [[SRC]], i64 [[FWD_RESIDUAL_INDEX]]
1685+ ; OPT-NEXT: [[ELEMENT3:%.*]] = load i8, ptr addrspace(998) [[TMP17]], align 1
1686+ ; OPT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(999) [[DST]], i64 [[FWD_RESIDUAL_INDEX]]
1687+ ; OPT-NEXT: store i8 [[ELEMENT3]], ptr addrspace(999) [[TMP18]], align 1
1688+ ; OPT-NEXT: [[TMP19]] = add i64 [[FWD_RESIDUAL_INDEX]], 1
1689+ ; OPT-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], [[SIZE]]
1690+ ; OPT-NEXT: br i1 [[TMP20]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_RESIDUAL_LOOP]]
1691+ ; OPT: memmove_done:
15361692; OPT-NEXT: ret void
15371693;
15381694 call void @llvm.memmove.p999.p998.i64 (ptr addrspace (999 ) %dst , ptr addrspace (998 ) %src , i64 %size , i1 false )
@@ -1726,8 +1882,8 @@ define amdgpu_kernel void @memmove_flat_align1_local_align1(ptr addrspace(0) %ds
17261882; MAX1024-NEXT: ret void
17271883;
17281884; ALL-LABEL: @memmove_flat_align1_local_align1(
1729- ; ALL-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[SRC :%.*]] to ptr
1730- ; ALL-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr [[TMP1 ]], [[DST:%.* ]]
1885+ ; ALL-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DST :%.*]] to ptr addrspace(3)
1886+ ; ALL-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(3) [[SRC:%.* ]], [[TMP1 ]]
17311887; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]]
17321888; ALL: memmove_bwd_loop:
17331889; ALL-NEXT: [[TMP2:%.*]] = phi i32 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 32, [[TMP0:%.*]] ]
@@ -1761,8 +1917,8 @@ define amdgpu_kernel void @memmove_flat_align1_local_align1_unknown_size(ptr add
17611917; OPT-NEXT: [[TMP3:%.*]] = sub i32 [[SIZE]], [[TMP2]]
17621918; OPT-NEXT: [[SKIP_RESIDUAL:%.*]] = icmp eq i32 [[TMP2]], 0
17631919; OPT-NEXT: [[SKIP_MAIN:%.*]] = icmp eq i32 [[TMP1]], 0
1764- ; OPT-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(3) [[SRC :%.*]] to ptr
1765- ; OPT-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr [[TMP4 ]], [[DST:%.* ]]
1920+ ; OPT-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[DST :%.*]] to ptr addrspace(3)
1921+ ; OPT-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(3) [[SRC:%.* ]], [[TMP4 ]]
17661922; OPT-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_COPY_BACKWARDS:%.*]], label [[MEMMOVE_COPY_FORWARD:%.*]]
17671923; OPT: memmove_copy_backwards:
17681924; OPT-NEXT: br i1 [[SKIP_RESIDUAL]], label [[MEMMOVE_BWD_MIDDLE:%.*]], label [[MEMMOVE_BWD_RESIDUAL_LOOP:%.*]]
0 commit comments