diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16725,7 +16725,6 @@ } bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { - Val = ST->getValue(); EVT STType = Val.getValueType(); EVT STMemType = ST->getMemoryVT(); if (STType == STMemType) @@ -16817,6 +16816,7 @@ // significant bit in the loaded value maps to the least significant bit in // the stored value). With Offset=n (for n > 0) the loaded value starts at the // n:th least significant byte of the stored value. + int64_t OrigOffset = Offset; if (DAG.getDataLayout().isBigEndian()) Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() - (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) / @@ -16868,11 +16868,23 @@ } } + // Handle some cases for big-endian that would be Offset 0 and handled for + // little-endian. + SDValue Val = ST->getValue(); + if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) { + if (STType.isInteger() && !STType.isVector() && LDType.isInteger() && + !LDType.isVector() && isTypeLegal(STType) && + TLI.isOperationLegal(ISD::SRL, STType)) { + Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val, + DAG.getConstant(Offset * 8, SDLoc(LD), STType)); + Offset = 0; + } + } + // TODO: Deal with nonzero offset. if (LD->getBasePtr().isUndef() || Offset != 0) return SDValue(); // Model necessary truncations / extenstions. - SDValue Val; // Truncate Value To Stored Memory Size. do { if (!getTruncatedStoreValue(ST, Val)) diff --git a/llvm/test/CodeGen/AArch64/load-store-forwarding.ll b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll --- a/llvm/test/CodeGen/AArch64/load-store-forwarding.ll +++ b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll @@ -5,8 +5,9 @@ define i8 @test1(i32 %a, i8* %pa) { ; CHECK-BE-LABEL: test1: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: str w0, [x1] -; CHECK-BE-NEXT: ldrb w0, [x1] +; CHECK-BE-NEXT: mov w8, w0 +; CHECK-BE-NEXT: lsr w0, w0, #24 +; CHECK-BE-NEXT: str w8, [x1] ; CHECK-BE-NEXT: ret ; ; CHECK-LE-LABEL: test1: diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll --- a/llvm/test/CodeGen/Mips/cconv/vector.ll +++ b/llvm/test/CodeGen/Mips/cconv/vector.ll @@ -1751,12 +1751,10 @@ ; MIPS64R5EB: # %bb.0: ; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 ; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EB-NEXT: sd $5, 16($sp) -; MIPS64R5EB-NEXT: sd $4, 24($sp) -; MIPS64R5EB-NEXT: lw $1, 16($sp) +; MIPS64R5EB-NEXT: dsrl $1, $5, 32 ; MIPS64R5EB-NEXT: insert.d $w0[0], $1 ; MIPS64R5EB-NEXT: insert.d $w0[1], $5 -; MIPS64R5EB-NEXT: lw $1, 24($sp) +; MIPS64R5EB-NEXT: dsrl $1, $4, 32 ; MIPS64R5EB-NEXT: insert.d $w1[0], $1 ; MIPS64R5EB-NEXT: insert.d $w1[1], $4 ; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0 diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll @@ -80,8 +80,9 @@ ; 32BIT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3 -; 32BIT: STW killed renamable $r3, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 8) -; 32BIT-NEXT: renamable $r3 = LBZ 0, %fixed-stack.0 :: (dereferenceable load (s8) +; 32BIT: renamable $r4 = COPY $r3 +; 32BIT: renamable $r3 = RLWINM $r3, 8, 24, 31 +; 32BIT: STW killed renamable $r4, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 8) ; 32BIT-NEXT: BLR ; 64BIT: fixedStack: @@ -92,18 +93,21 @@ ; 64BIT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3 -; 64BIT: STD killed renamable $x3, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) -; 64BIT-NEXT: renamable $x3 = LBZ8 0, %fixed-stack.0 :: (dereferenceable load (s8) +; 64BIT: renamable $x4 = COPY $x3 +; 64BIT: renamable $x3 = RLDICL $x3, 8, 56 +; 64BIT: STD killed renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) ; CHECKASM-LABEL: .test_byval_1Byte: -; ASM32: stw 3, 24(1) -; ASM32-NEXT: lbz 3, 24(1) -; ASM32-NEXT: blr +; ASM32: mr 4, 3 +; ASM32-NEXT: srwi 3, 3, 24 +; ASM32-NEXT: stw 4, 24(1) +; ASM32-NEXT: blr -; ASM64: std 3, 48(1) -; ASM64-NEXT: lbz 3, 48(1) -; ASM64-NEXT: blr +; ASM64: mr 4, 3 +; ASM64-NEXT: rldicl 3, 3, 8, 56 +; ASM64-NEXT: std 4, 48(1) +; ASM64-NEXT: blr @f = common global float 0.000000e+00, align 4 @@ -433,10 +437,10 @@ ; 64BIT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3 ; 64BIT: STD killed renamable $x3, 0, %fixed-stack.2 :: (store (s64) into %fixed-stack.2, align 16) -; 64BIT-NEXT: STD killed renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) +; 64BIT: STD renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) ; 64BIT-DAG: renamable $r[[SCRATCH1:[0-9]+]] = LBZ 3, %fixed-stack.2 :: (dereferenceable load (s8) -; 64BIT-DAG: renamable $r[[SCRATCH2:[0-9]+]] = LWZ 0, %fixed-stack.0 :: (dereferenceable load (s32) -; 64BIT-NEXT: renamable $r[[SCRATCH3:[0-9]+]] = nsw ADD4 killed renamable $r[[SCRATCH2]], killed renamable $r[[SCRATCH1]] +; 64BIT-DAG: renamable $x[[SCRATCH2:[0-9]+]] = RLDICL killed renamable $x4, 32, 32 +; 64BIT-NEXT: renamable $r[[SCRATCH3:[0-9]+]] = nsw ADD4 renamable $r[[SCRATCH2]], killed renamable $r[[SCRATCH1]], implicit killed $x[[SCRATCH2]] ; 64BIT-NEXT: renamable $x3 = EXTSW_32_64 killed renamable $r[[SCRATCH3]] ; 64BIT-NEXT: BLR8 @@ -449,9 +453,9 @@ ; ASM32-NEXT: blr ; ASM64: std 3, 48(1) +; ASM64-NEXT: lbz [[SCRATCH1:[0-9]+]], 51(1) ; ASM64-NEXT: std 4, 56(1) -; ASM64-DAG: lbz [[SCRATCH1:[0-9]+]], 51(1) -; ASM64-DAG: lwz [[SCRATCH2:[0-9]+]], 56(1) +; ASM64-NEXT: rldicl [[SCRATCH2:[0-9]+]], 4, 32, 32 ; ASM64-NEXT: add [[SCRATCH3:[0-9]+]], [[SCRATCH2]], [[SCRATCH1]] ; ASM64-NEXT: extsw 3, [[SCRATCH3]] ; ASM64-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll b/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll @@ -925,13 +925,13 @@ ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -144(r1) -; P8BE-NEXT: std r4, 200(r1) ; P8BE-NEXT: addi r5, r1, 128 +; P8BE-NEXT: rldicl r6, r4, 32, 32 ; P8BE-NEXT: std r3, 192(r1) -; P8BE-NEXT: lwz r4, 200(r1) +; P8BE-NEXT: std r4, 200(r1) ; P8BE-NEXT: std r3, 128(r1) ; P8BE-NEXT: mr r3, r5 -; P8BE-NEXT: stw r4, 136(r1) +; P8BE-NEXT: stw r6, 136(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -946,9 +946,9 @@ ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -144(r1) ; P9BE-NEXT: std r4, 200(r1) +; P9BE-NEXT: rldicl r5, r4, 32, 32 ; P9BE-NEXT: addi r4, r1, 128 ; P9BE-NEXT: std r3, 192(r1) -; P9BE-NEXT: lwz r5, 200(r1) ; P9BE-NEXT: std r3, 128(r1) ; P9BE-NEXT: mr r3, r4 ; P9BE-NEXT: stw r5, 136(r1) @@ -965,10 +965,10 @@ ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -144(r1) -; P10BE-NEXT: std r4, 200(r1) ; P10BE-NEXT: std r3, 192(r1) +; P10BE-NEXT: std r4, 200(r1) +; P10BE-NEXT: rldicl r5, r4, 32, 32 ; P10BE-NEXT: addi r4, r1, 128 -; P10BE-NEXT: lwz r5, 200(r1) ; P10BE-NEXT: std r3, 128(r1) ; P10BE-NEXT: mr r3, r4 ; P10BE-NEXT: stw r5, 136(r1) @@ -1098,14 +1098,14 @@ ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -144(r1) -; P8BE-NEXT: std r4, 200(r1) ; P8BE-NEXT: addi r6, r1, 126 +; P8BE-NEXT: sth r5, 208(r1) +; P8BE-NEXT: rldicl r5, r4, 32, 32 ; P8BE-NEXT: std r3, 192(r1) -; P8BE-NEXT: lwz r4, 200(r1) +; P8BE-NEXT: std r4, 200(r1) ; P8BE-NEXT: stdx r3, 0, r6 ; P8BE-NEXT: mr r3, r6 -; P8BE-NEXT: sth r5, 208(r1) -; P8BE-NEXT: stw r4, 134(r1) +; P8BE-NEXT: stw r5, 134(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -1120,9 +1120,9 @@ ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -144(r1) ; P9BE-NEXT: std r4, 200(r1) -; P9BE-NEXT: addi r4, r1, 126 ; P9BE-NEXT: sth r5, 208(r1) -; P9BE-NEXT: lwz r5, 200(r1) +; P9BE-NEXT: rldicl r5, r4, 32, 32 +; P9BE-NEXT: addi r4, r1, 126 ; P9BE-NEXT: std r3, 192(r1) ; P9BE-NEXT: stdx r3, 0, r4 ; P9BE-NEXT: mr r3, r4 @@ -1140,11 +1140,11 @@ ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -144(r1) -; P10BE-NEXT: std r4, 200(r1) ; P10BE-NEXT: std r3, 192(r1) -; P10BE-NEXT: addi r4, r1, 126 +; P10BE-NEXT: std r4, 200(r1) ; P10BE-NEXT: sth r5, 208(r1) -; P10BE-NEXT: lwz r5, 200(r1) +; P10BE-NEXT: rldicl r5, r4, 32, 32 +; P10BE-NEXT: addi r4, r1, 126 ; P10BE-NEXT: stdx r3, 0, r4 ; P10BE-NEXT: mr r3, r4 ; P10BE-NEXT: stw r5, 134(r1) diff --git a/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll b/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll @@ -569,14 +569,13 @@ ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -128(r1) -; P8BE-NEXT: rldicl r4, r3, 56, 8 ; P8BE-NEXT: stb r3, 183(r1) -; P8BE-NEXT: stw r4, 179(r1) +; P8BE-NEXT: rldicl r3, r3, 56, 8 ; P8BE-NEXT: lbz r4, 183(r1) -; P8BE-NEXT: lwz r3, 179(r1) -; P8BE-NEXT: stb r4, 127(r1) +; P8BE-NEXT: stw r3, 179(r1) ; P8BE-NEXT: stw r3, 123(r1) ; P8BE-NEXT: addi r3, r1, 123 +; P8BE-NEXT: stb r4, 127(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -590,14 +589,13 @@ ; P9BE-NEXT: mflr r0 ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -128(r1) -; P9BE-NEXT: rldicl r4, r3, 56, 8 ; P9BE-NEXT: stb r3, 183(r1) -; P9BE-NEXT: stw r4, 179(r1) +; P9BE-NEXT: rldicl r3, r3, 56, 8 ; P9BE-NEXT: lbz r4, 183(r1) -; P9BE-NEXT: lwz r3, 179(r1) -; P9BE-NEXT: stb r4, 127(r1) +; P9BE-NEXT: stw r3, 179(r1) ; P9BE-NEXT: stw r3, 123(r1) ; P9BE-NEXT: addi r3, r1, 123 +; P9BE-NEXT: stb r4, 127(r1) ; P9BE-NEXT: bl callee ; P9BE-NEXT: nop ; P9BE-NEXT: li r3, 0 @@ -611,14 +609,13 @@ ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -128(r1) -; P10BE-NEXT: rldicl r4, r3, 56, 8 ; P10BE-NEXT: stb r3, 183(r1) -; P10BE-NEXT: stw r4, 179(r1) +; P10BE-NEXT: rldicl r3, r3, 56, 8 ; P10BE-NEXT: lbz r4, 183(r1) -; P10BE-NEXT: lwz r3, 179(r1) -; P10BE-NEXT: stb r4, 127(r1) +; P10BE-NEXT: stw r3, 179(r1) ; P10BE-NEXT: stw r3, 123(r1) ; P10BE-NEXT: addi r3, r1, 123 +; P10BE-NEXT: stb r4, 127(r1) ; P10BE-NEXT: bl callee ; P10BE-NEXT: nop ; P10BE-NEXT: li r3, 0 @@ -884,18 +881,17 @@ ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -128(r1) -; P8BE-NEXT: rldicl r4, r3, 40, 24 -; P8BE-NEXT: rldicl r5, r3, 56, 8 +; P8BE-NEXT: rldicl r4, r3, 56, 8 ; P8BE-NEXT: stb r3, 183(r1) -; P8BE-NEXT: stw r4, 177(r1) -; P8BE-NEXT: sth r5, 181(r1) -; P8BE-NEXT: lbz r4, 183(r1) -; P8BE-NEXT: lwz r3, 177(r1) -; P8BE-NEXT: lhz r5, 181(r1) -; P8BE-NEXT: stb r4, 127(r1) +; P8BE-NEXT: rldicl r3, r3, 40, 24 +; P8BE-NEXT: sth r4, 181(r1) +; P8BE-NEXT: lbz r5, 183(r1) +; P8BE-NEXT: lhz r4, 181(r1) +; P8BE-NEXT: stw r3, 177(r1) ; P8BE-NEXT: stw r3, 121(r1) ; P8BE-NEXT: addi r3, r1, 121 -; P8BE-NEXT: sth r5, 125(r1) +; P8BE-NEXT: stb r5, 127(r1) +; P8BE-NEXT: sth r4, 125(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -909,17 +905,16 @@ ; P9BE-NEXT: mflr r0 ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -128(r1) -; P9BE-NEXT: rldicl r4, r3, 40, 24 +; P9BE-NEXT: rldicl r4, r3, 56, 8 ; P9BE-NEXT: stb r3, 183(r1) ; P9BE-NEXT: lbz r5, 183(r1) -; P9BE-NEXT: stw r4, 177(r1) -; P9BE-NEXT: rldicl r4, r3, 56, 8 -; P9BE-NEXT: lwz r3, 177(r1) +; P9BE-NEXT: rldicl r3, r3, 40, 24 ; P9BE-NEXT: sth r4, 181(r1) -; P9BE-NEXT: lhz r4, 181(r1) -; P9BE-NEXT: stb r5, 127(r1) +; P9BE-NEXT: stw r3, 177(r1) ; P9BE-NEXT: stw r3, 121(r1) ; P9BE-NEXT: addi r3, r1, 121 +; P9BE-NEXT: lhz r4, 181(r1) +; P9BE-NEXT: stb r5, 127(r1) ; P9BE-NEXT: sth r4, 125(r1) ; P9BE-NEXT: bl callee ; P9BE-NEXT: nop @@ -934,17 +929,16 @@ ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -128(r1) -; P10BE-NEXT: rldicl r4, r3, 40, 24 +; P10BE-NEXT: rldicl r4, r3, 56, 8 ; P10BE-NEXT: stb r3, 183(r1) +; P10BE-NEXT: rldicl r3, r3, 40, 24 ; P10BE-NEXT: lbz r5, 183(r1) -; P10BE-NEXT: stw r4, 177(r1) -; P10BE-NEXT: rldicl r4, r3, 56, 8 -; P10BE-NEXT: lwz r3, 177(r1) ; P10BE-NEXT: sth r4, 181(r1) -; P10BE-NEXT: lhz r4, 181(r1) -; P10BE-NEXT: stb r5, 127(r1) +; P10BE-NEXT: stw r3, 177(r1) ; P10BE-NEXT: stw r3, 121(r1) ; P10BE-NEXT: addi r3, r1, 121 +; P10BE-NEXT: lhz r4, 181(r1) +; P10BE-NEXT: stb r5, 127(r1) ; P10BE-NEXT: sth r4, 125(r1) ; P10BE-NEXT: bl callee ; P10BE-NEXT: nop diff --git a/llvm/test/CodeGen/PowerPC/pr45301.ll b/llvm/test/CodeGen/PowerPC/pr45301.ll --- a/llvm/test/CodeGen/PowerPC/pr45301.ll +++ b/llvm/test/CodeGen/PowerPC/pr45301.ll @@ -13,15 +13,15 @@ ; CHECK-NEXT: nop ; CHECK-NEXT: addis r4, r2, g@toc@ha ; CHECK-NEXT: addi r4, r4, g@toc@l -; CHECK-NEXT: ld r5, 0(r4) -; CHECK-NEXT: std r5, 0(r3) ; CHECK-NEXT: ld r5, 16(r4) ; CHECK-NEXT: std r5, 16(r3) -; CHECK-NEXT: ld r6, 8(r4) -; CHECK-NEXT: std r6, 8(r3) -; CHECK-NEXT: ld r6, 24(r4) -; CHECK-NEXT: std r6, 24(r3) -; CHECK-NEXT: lwz r6, 0(r3) +; CHECK-NEXT: ld r6, 0(r4) +; CHECK-NEXT: std r6, 0(r3) +; CHECK-NEXT: rldicl r6, r6, 32, 32 +; CHECK-NEXT: ld r7, 8(r4) +; CHECK-NEXT: std r7, 8(r3) +; CHECK-NEXT: ld r7, 24(r4) +; CHECK-NEXT: std r7, 24(r3) ; CHECK-NEXT: ld r4, 32(r4) ; CHECK-NEXT: std r4, 32(r3) ; CHECK-NEXT: li r4, 20 diff --git a/llvm/test/CodeGen/PowerPC/store-forward-be32.ll b/llvm/test/CodeGen/PowerPC/store-forward-be32.ll --- a/llvm/test/CodeGen/PowerPC/store-forward-be32.ll +++ b/llvm/test/CodeGen/PowerPC/store-forward-be32.ll @@ -15,8 +15,9 @@ define i32 @ustc1(%struct.USST* noundef byval(%struct.USST) align 4 %s) { ; CHECK-LABEL: ustc1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stw 3, 24(1) -; CHECK-NEXT: lbz 3, 24(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: srwi 3, 3, 24 +; CHECK-NEXT: stw 4, 24(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.USST, %struct.USST* %s, i32 0, i32 0 @@ -30,8 +31,9 @@ define i32 @ustc2(%struct.USST* noundef byval(%struct.USST) align 4 %s) { ; CHECK-LABEL: ustc2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stw 3, 24(1) -; CHECK-NEXT: lhz 3, 24(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: srwi 3, 3, 16 +; CHECK-NEXT: stw 4, 24(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.USST, %struct.USST* %s, i32 0, i32 0 @@ -44,9 +46,9 @@ define i32 @stc1(%struct.SST* noundef byval(%struct.SST) align 4 %s) { ; CHECK-LABEL: stc1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stw 3, 24(1) -; CHECK-NEXT: lbz 3, 24(1) -; CHECK-NEXT: extsb 3, 3 +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: srawi 3, 3, 24 +; CHECK-NEXT: stw 4, 24(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.SST, %struct.SST* %s, i32 0, i32 0 @@ -60,8 +62,9 @@ define i32 @stc2(%struct.SST* noundef byval(%struct.SST) align 4 %s) { ; CHECK-LABEL: stc2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stw 3, 24(1) -; CHECK-NEXT: lha 3, 24(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: srawi 3, 3, 16 +; CHECK-NEXT: stw 4, 24(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.SST, %struct.SST* %s, i32 0, i32 0 @@ -74,8 +77,9 @@ define i32 @ctc(%struct.CST* noundef byval(%struct.CST) align 4 %s) { ; CHECK-LABEL: ctc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stw 3, 24(1) -; CHECK-NEXT: lbz 3, 24(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: srwi 3, 3, 24 +; CHECK-NEXT: stw 4, 24(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.CST, %struct.CST* %s, i32 0, i32 0 @@ -88,9 +92,9 @@ define i32 @sctc(%struct.SCST* noundef byval(%struct.SCST) align 4 %s) { ; CHECK-LABEL: sctc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stw 3, 24(1) -; CHECK-NEXT: lbz 3, 24(1) -; CHECK-NEXT: extsb 3, 3 +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: srawi 3, 3, 24 +; CHECK-NEXT: stw 4, 24(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.SCST, %struct.SCST* %s, i32 0, i32 0 @@ -117,9 +121,8 @@ ; CHECK-LABEL: tc41: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stw 3, 24(1) -; CHECK-NEXT: lbz 3, 24(1) +; CHECK-NEXT: srawi 3, 3, 24 ; CHECK-NEXT: stw 4, 28(1) -; CHECK-NEXT: extsb 3, 3 ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.ST, %struct.ST* %s, i32 0, i32 0 @@ -133,7 +136,7 @@ ; CHECK-LABEL: tc42: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stw 3, 24(1) -; CHECK-NEXT: lha 3, 24(1) +; CHECK-NEXT: srawi 3, 3, 16 ; CHECK-NEXT: stw 4, 28(1) ; CHECK-NEXT: blr entry: @@ -176,7 +179,7 @@ ; CHECK-LABEL: utc41: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stw 3, 24(1) -; CHECK-NEXT: lbz 3, 24(1) +; CHECK-NEXT: srwi 3, 3, 24 ; CHECK-NEXT: stw 4, 28(1) ; CHECK-NEXT: blr entry: @@ -191,7 +194,7 @@ ; CHECK-LABEL: utc42: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stw 3, 24(1) -; CHECK-NEXT: lhz 3, 24(1) +; CHECK-NEXT: srwi 3, 3, 16 ; CHECK-NEXT: stw 4, 28(1) ; CHECK-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/store-forward-be64.ll b/llvm/test/CodeGen/PowerPC/store-forward-be64.ll --- a/llvm/test/CodeGen/PowerPC/store-forward-be64.ll +++ b/llvm/test/CodeGen/PowerPC/store-forward-be64.ll @@ -17,8 +17,9 @@ define zeroext i32 @ustc1(%struct.USST* noundef byval(%struct.USST) align 8 %s) { ; CHECK-LABEL: ustc1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lbz 3, 48(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: rldicl 3, 3, 8, 56 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.USST, %struct.USST* %s, i32 0, i32 0 @@ -32,8 +33,9 @@ define zeroext i32 @ustc2(%struct.USST* noundef byval(%struct.USST) align 8 %s) { ; CHECK-LABEL: ustc2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lhz 3, 48(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: rldicl 3, 3, 16, 48 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.USST, %struct.USST* %s, i32 0, i32 0 @@ -46,9 +48,11 @@ define signext i32 @stc1(%struct.SST* noundef byval(%struct.SST) align 8 %s) { ; CHECK-LABEL: stc1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lbz 3, 48(1) -; CHECK-NEXT: extsb 3, 3 +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: rldicl 3, 3, 16, 48 +; CHECK-NEXT: std 4, 48(1) +; CHECK-NEXT: extsh 3, 3 +; CHECK-NEXT: srawi 3, 3, 8 ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.SST, %struct.SST* %s, i32 0, i32 0 @@ -62,8 +66,9 @@ define signext i32 @stc2(%struct.SST* noundef byval(%struct.SST) align 8 %s) { ; CHECK-LABEL: stc2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lha 3, 48(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: sradi 3, 3, 48 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.SST, %struct.SST* %s, i32 0, i32 0 @@ -76,8 +81,9 @@ define signext i32 @ctc(%struct.CST* noundef byval(%struct.CST) align 8 %s) { ; CHECK-LABEL: ctc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lbz 3, 48(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: rldicl 3, 3, 8, 56 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.CST, %struct.CST* %s, i32 0, i32 0 @@ -90,9 +96,9 @@ define signext i32 @sctc(%struct.SCST* noundef byval(%struct.SCST) align 8 %s) { ; CHECK-LABEL: sctc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lbz 3, 48(1) -; CHECK-NEXT: extsb 3, 3 +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: sradi 3, 3, 56 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.SCST, %struct.SCST* %s, i32 0, i32 0 @@ -105,8 +111,9 @@ define signext i32 @tc44(%struct.ST* noundef byval(%struct.ST) align 8 %s) { ; CHECK-LABEL: tc44: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lwa 3, 48(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: sradi 3, 3, 32 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.ST, %struct.ST* %s, i32 0, i32 0 @@ -118,9 +125,9 @@ define signext i32 @tc41(%struct.ST* noundef byval(%struct.ST) align 8 %s) { ; CHECK-LABEL: tc41: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lbz 3, 48(1) -; CHECK-NEXT: extsb 3, 3 +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: sradi 3, 3, 56 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.ST, %struct.ST* %s, i32 0, i32 0 @@ -133,8 +140,9 @@ define signext i32 @tc42(%struct.ST* noundef byval(%struct.ST) align 8 %s) { ; CHECK-LABEL: tc42: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lha 3, 48(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: sradi 3, 3, 48 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.ST, %struct.ST* %s, i32 0, i32 0 @@ -147,9 +155,9 @@ define signext i32 @tc43(%struct.ST* noundef byval(%struct.ST) align 8 %s) { ; CHECK-LABEL: tc43: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lwz 3, 48(1) -; CHECK-NEXT: srawi 3, 3, 8 +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: sradi 3, 3, 40 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.ST, %struct.ST* %s, i32 0, i32 0 @@ -162,8 +170,9 @@ define zeroext i32 @utc44(%struct.UST* noundef byval(%struct.UST) align 8 %s) #0 { ; CHECK-LABEL: utc44: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lwz 3, 48(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: rldicl 3, 3, 32, 32 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.UST, %struct.UST* %s, i32 0, i32 0 @@ -175,8 +184,9 @@ define zeroext i32 @utc41(%struct.UST* noundef byval(%struct.UST) align 8 %s) { ; CHECK-LABEL: utc41: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lbz 3, 48(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: rldicl 3, 3, 8, 56 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.UST, %struct.UST* %s, i32 0, i32 0 @@ -189,8 +199,9 @@ define zeroext i32 @utc42(%struct.UST* noundef byval(%struct.UST) align 8 %s) { ; CHECK-LABEL: utc42: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lhz 3, 48(1) +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: rldicl 3, 3, 16, 48 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.UST, %struct.UST* %s, i32 0, i32 0 @@ -203,9 +214,9 @@ define zeroext i32 @utc43(%struct.UST* noundef byval(%struct.UST) align 8 %s) { ; CHECK-LABEL: utc43: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) -; CHECK-NEXT: lwz 3, 48(1) -; CHECK-NEXT: rlwinm 3, 3, 24, 8, 31 +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: rldicl 3, 3, 24, 40 +; CHECK-NEXT: std 4, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.UST, %struct.UST* %s, i32 0, i32 0 @@ -266,9 +277,10 @@ define i64 @ltc84(%struct.LST* noundef byval(%struct.LST) align 8 %s) { ; CHECK-LABEL: ltc84: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: sradi 3, 3, 32 +; CHECK-NEXT: std 5, 48(1) ; CHECK-NEXT: std 4, 56(1) -; CHECK-NEXT: lwa 3, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.LST, %struct.LST* %s, i32 0, i32 0 @@ -297,9 +309,10 @@ define i64 @ltc82(%struct.LST* noundef byval(%struct.LST) align 8 %s) { ; CHECK-LABEL: ltc82: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: sradi 3, 3, 48 +; CHECK-NEXT: std 5, 48(1) ; CHECK-NEXT: std 4, 56(1) -; CHECK-NEXT: lha 3, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.LST, %struct.LST* %s, i32 0, i32 0 @@ -312,10 +325,10 @@ define i64 @ltc81(%struct.LST* noundef byval(%struct.LST) align 8 %s) { ; CHECK-LABEL: ltc81: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: sradi 3, 3, 56 +; CHECK-NEXT: std 5, 48(1) ; CHECK-NEXT: std 4, 56(1) -; CHECK-NEXT: lbz 3, 48(1) -; CHECK-NEXT: extsb 3, 3 ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.LST, %struct.LST* %s, i32 0, i32 0 @@ -389,9 +402,10 @@ define i64 @ultc84(%struct.ULST* noundef byval(%struct.ULST) align 8 %s) { ; CHECK-LABEL: ultc84: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: rldicl 3, 3, 32, 32 +; CHECK-NEXT: std 5, 48(1) ; CHECK-NEXT: std 4, 56(1) -; CHECK-NEXT: lwz 3, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.ULST, %struct.ULST* %s, i32 0, i32 0 @@ -420,9 +434,10 @@ define i64 @ultc82(%struct.ULST* noundef byval(%struct.ULST) align 8 %s) { ; CHECK-LABEL: ultc82: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: rldicl 3, 3, 16, 48 +; CHECK-NEXT: std 5, 48(1) ; CHECK-NEXT: std 4, 56(1) -; CHECK-NEXT: lhz 3, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.ULST, %struct.ULST* %s, i32 0, i32 0 @@ -435,9 +450,10 @@ define i64 @ultc81(%struct.ULST* noundef byval(%struct.ULST) align 8 %s) { ; CHECK-LABEL: ultc81: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: std 3, 48(1) +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: rldicl 3, 3, 8, 56 +; CHECK-NEXT: std 5, 48(1) ; CHECK-NEXT: std 4, 56(1) -; CHECK-NEXT: lbz 3, 48(1) ; CHECK-NEXT: blr entry: %a = getelementptr inbounds %struct.ULST, %struct.ULST* %s, i32 0, i32 0