Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16572,7 +16572,6 @@ } bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { - Val = ST->getValue(); EVT STType = Val.getValueType(); EVT STMemType = ST->getMemoryVT(); if (STType == STMemType) @@ -16664,6 +16663,7 @@ // significant bit in the loaded value maps to the least significant bit in // the stored value). With Offset=n (for n > 0) the loaded value starts at the // n:th least significant byte of the stored value. + int64_t OrigOffset = Offset; if (DAG.getDataLayout().isBigEndian()) Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() - (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) / @@ -16715,11 +16715,23 @@ } } + // Handle some cases for big-endian that would be Offset 0 and handled for + // little-endian. + SDValue Val = ST->getValue(); + if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) { + if (STType.isInteger() && !STType.isVector() && LDType.isInteger() && + !LDType.isVector() && isTypeLegal(STType) && + TLI.isOperationLegal(ISD::SRL, STType)) { + Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val, + DAG.getConstant(Offset * 8, SDLoc(LD), STType)); + Offset = 0; + } + } + // TODO: Deal with nonzero offset. if (LD->getBasePtr().isUndef() || Offset != 0) return SDValue(); // Model necessary truncations / extenstions. - SDValue Val; // Truncate Value To Stored Memory Size. do { if (!getTruncatedStoreValue(ST, Val)) Index: llvm/test/CodeGen/AArch64/load-store-forwarding.ll =================================================================== --- llvm/test/CodeGen/AArch64/load-store-forwarding.ll +++ llvm/test/CodeGen/AArch64/load-store-forwarding.ll @@ -5,8 +5,9 @@ define i8 @test1(i32 %a, i8* %pa) { ; CHECK-BE-LABEL: test1: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: str w0, [x1] -; CHECK-BE-NEXT: ldrb w0, [x1] +; CHECK-BE-NEXT: mov w8, w0 +; CHECK-BE-NEXT: lsr w0, w0, #24 +; CHECK-BE-NEXT: str w8, [x1] ; CHECK-BE-NEXT: ret ; ; CHECK-LE-LABEL: test1: Index: llvm/test/CodeGen/Mips/cconv/vector.ll =================================================================== --- llvm/test/CodeGen/Mips/cconv/vector.ll +++ llvm/test/CodeGen/Mips/cconv/vector.ll @@ -1751,12 +1751,10 @@ ; MIPS64R5EB: # %bb.0: ; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 ; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS64R5EB-NEXT: sd $5, 16($sp) -; MIPS64R5EB-NEXT: sd $4, 24($sp) -; MIPS64R5EB-NEXT: lw $1, 16($sp) +; MIPS64R5EB-NEXT: dsrl $1, $5, 32 ; MIPS64R5EB-NEXT: insert.d $w0[0], $1 ; MIPS64R5EB-NEXT: insert.d $w0[1], $5 -; MIPS64R5EB-NEXT: lw $1, 24($sp) +; MIPS64R5EB-NEXT: dsrl $1, $4, 32 ; MIPS64R5EB-NEXT: insert.d $w1[0], $1 ; MIPS64R5EB-NEXT: insert.d $w1[1], $4 ; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0 Index: llvm/test/CodeGen/PowerPC/aix-cc-byval.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-cc-byval.ll +++ llvm/test/CodeGen/PowerPC/aix-cc-byval.ll @@ -80,8 +80,9 @@ ; 32BIT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3 -; 32BIT: STW killed renamable $r3, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 8) -; 32BIT-NEXT: renamable $r3 = LBZ 0, %fixed-stack.0 :: (dereferenceable load (s8) +; 32BIT: renamable $r4 = COPY $r3 +; 32BIT: renamable $r3 = RLWINM $r3, 8, 24, 31 +; 32BIT: STW killed renamable $r4, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 8) ; 32BIT-NEXT: BLR ; 64BIT: fixedStack: @@ -92,18 +93,21 @@ ; 64BIT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3 -; 64BIT: STD killed renamable $x3, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) -; 64BIT-NEXT: renamable $x3 = LBZ8 0, %fixed-stack.0 :: (dereferenceable load (s8) +; 64BIT: renamable $x4 = COPY $x3 +; 64BIT: renamable $x3 = RLDICL $x3, 8, 56 +; 64BIT: STD killed renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) ; CHECKASM-LABEL: .test_byval_1Byte: -; ASM32: stw 3, 24(1) -; ASM32-NEXT: lbz 3, 24(1) -; ASM32-NEXT: blr +; ASM32: mr 4, 3 +; ASM32-NEXT: srwi 3, 3, 24 +; ASM32-NEXT: stw 4, 24(1) +; ASM32-NEXT: blr -; ASM64: std 3, 48(1) -; ASM64-NEXT: lbz 3, 48(1) -; ASM64-NEXT: blr +; ASM64: mr 4, 3 +; ASM64-NEXT: rldicl 3, 3, 8, 56 +; ASM64-NEXT: std 4, 48(1) +; ASM64-NEXT: blr @f = common global float 0.000000e+00, align 4 @@ -433,10 +437,10 @@ ; 64BIT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3 ; 64BIT: STD killed renamable $x3, 0, %fixed-stack.2 :: (store (s64) into %fixed-stack.2, align 16) -; 64BIT-NEXT: STD killed renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) +; 64BIT: STD renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) ; 64BIT-DAG: renamable $r[[SCRATCH1:[0-9]+]] = LBZ 3, %fixed-stack.2 :: (dereferenceable load (s8) -; 64BIT-DAG: renamable $r[[SCRATCH2:[0-9]+]] = LWZ 0, %fixed-stack.0 :: (dereferenceable load (s32) -; 64BIT-NEXT: renamable $r[[SCRATCH3:[0-9]+]] = nsw ADD4 killed renamable $r[[SCRATCH2]], killed renamable $r[[SCRATCH1]] +; 64BIT-DAG: renamable $x[[SCRATCH2:[0-9]+]] = RLDICL killed renamable $x4, 32, 32 +; 64BIT-NEXT: renamable $r[[SCRATCH3:[0-9]+]] = nsw ADD4 renamable $r[[SCRATCH2]], killed renamable $r[[SCRATCH1]], implicit killed $x[[SCRATCH2]] ; 64BIT-NEXT: renamable $x3 = EXTSW_32_64 killed renamable $r[[SCRATCH3]] ; 64BIT-NEXT: BLR8 @@ -449,9 +453,9 @@ ; ASM32-NEXT: blr ; ASM64: std 3, 48(1) +; ASM64-NEXT: lbz [[SCRATCH1:[0-9]+]], 51(1) ; ASM64-NEXT: std 4, 56(1) -; ASM64-DAG: lbz [[SCRATCH1:[0-9]+]], 51(1) -; ASM64-DAG: lwz [[SCRATCH2:[0-9]+]], 56(1) +; ASM64-NEXT: rldicl [[SCRATCH2:[0-9]+]], 4, 32, 32 ; ASM64-NEXT: add [[SCRATCH3:[0-9]+]], [[SCRATCH2]], [[SCRATCH1]] ; ASM64-NEXT: extsw 3, [[SCRATCH3]] ; ASM64-NEXT: blr Index: llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll +++ llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll @@ -925,13 +925,13 @@ ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -144(r1) -; P8BE-NEXT: std r4, 200(r1) ; P8BE-NEXT: addi r5, r1, 128 +; P8BE-NEXT: rldicl r6, r4, 32, 32 ; P8BE-NEXT: std r3, 192(r1) -; P8BE-NEXT: lwz r4, 200(r1) +; P8BE-NEXT: std r4, 200(r1) ; P8BE-NEXT: std r3, 128(r1) ; P8BE-NEXT: mr r3, r5 -; P8BE-NEXT: stw r4, 136(r1) +; P8BE-NEXT: stw r6, 136(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -946,9 +946,9 @@ ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -144(r1) ; P9BE-NEXT: std r4, 200(r1) +; P9BE-NEXT: rldicl r5, r4, 32, 32 ; P9BE-NEXT: addi r4, r1, 128 ; P9BE-NEXT: std r3, 192(r1) -; P9BE-NEXT: lwz r5, 200(r1) ; P9BE-NEXT: std r3, 128(r1) ; P9BE-NEXT: mr r3, r4 ; P9BE-NEXT: stw r5, 136(r1) @@ -965,10 +965,10 @@ ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -144(r1) -; P10BE-NEXT: std r4, 200(r1) ; P10BE-NEXT: std r3, 192(r1) +; P10BE-NEXT: std r4, 200(r1) +; P10BE-NEXT: rldicl r5, r4, 32, 32 ; P10BE-NEXT: addi r4, r1, 128 -; P10BE-NEXT: lwz r5, 200(r1) ; P10BE-NEXT: std r3, 128(r1) ; P10BE-NEXT: mr r3, r4 ; P10BE-NEXT: stw r5, 136(r1) @@ -1098,14 +1098,14 @@ ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -144(r1) -; P8BE-NEXT: std r4, 200(r1) ; P8BE-NEXT: addi r6, r1, 126 +; P8BE-NEXT: sth r5, 208(r1) +; P8BE-NEXT: rldicl r5, r4, 32, 32 ; P8BE-NEXT: std r3, 192(r1) -; P8BE-NEXT: lwz r4, 200(r1) +; P8BE-NEXT: std r4, 200(r1) ; P8BE-NEXT: stdx r3, 0, r6 ; P8BE-NEXT: mr r3, r6 -; P8BE-NEXT: sth r5, 208(r1) -; P8BE-NEXT: stw r4, 134(r1) +; P8BE-NEXT: stw r5, 134(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -1120,9 +1120,9 @@ ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -144(r1) ; P9BE-NEXT: std r4, 200(r1) -; P9BE-NEXT: addi r4, r1, 126 ; P9BE-NEXT: sth r5, 208(r1) -; P9BE-NEXT: lwz r5, 200(r1) +; P9BE-NEXT: rldicl r5, r4, 32, 32 +; P9BE-NEXT: addi r4, r1, 126 ; P9BE-NEXT: std r3, 192(r1) ; P9BE-NEXT: stdx r3, 0, r4 ; P9BE-NEXT: mr r3, r4 @@ -1140,11 +1140,11 @@ ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -144(r1) -; P10BE-NEXT: std r4, 200(r1) ; P10BE-NEXT: std r3, 192(r1) -; P10BE-NEXT: addi r4, r1, 126 +; P10BE-NEXT: std r4, 200(r1) ; P10BE-NEXT: sth r5, 208(r1) -; P10BE-NEXT: lwz r5, 200(r1) +; P10BE-NEXT: rldicl r5, r4, 32, 32 +; P10BE-NEXT: addi r4, r1, 126 ; P10BE-NEXT: stdx r3, 0, r4 ; P10BE-NEXT: mr r3, r4 ; P10BE-NEXT: stw r5, 134(r1) Index: llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll +++ llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll @@ -569,14 +569,13 @@ ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -128(r1) -; P8BE-NEXT: rldicl r4, r3, 56, 8 ; P8BE-NEXT: stb r3, 183(r1) -; P8BE-NEXT: stw r4, 179(r1) +; P8BE-NEXT: rldicl r3, r3, 56, 8 ; P8BE-NEXT: lbz r4, 183(r1) -; P8BE-NEXT: lwz r3, 179(r1) -; P8BE-NEXT: stb r4, 127(r1) +; P8BE-NEXT: stw r3, 179(r1) ; P8BE-NEXT: stw r3, 123(r1) ; P8BE-NEXT: addi r3, r1, 123 +; P8BE-NEXT: stb r4, 127(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -590,14 +589,13 @@ ; P9BE-NEXT: mflr r0 ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -128(r1) -; P9BE-NEXT: rldicl r4, r3, 56, 8 ; P9BE-NEXT: stb r3, 183(r1) -; P9BE-NEXT: stw r4, 179(r1) +; P9BE-NEXT: rldicl r3, r3, 56, 8 ; P9BE-NEXT: lbz r4, 183(r1) -; P9BE-NEXT: lwz r3, 179(r1) -; P9BE-NEXT: stb r4, 127(r1) +; P9BE-NEXT: stw r3, 179(r1) ; P9BE-NEXT: stw r3, 123(r1) ; P9BE-NEXT: addi r3, r1, 123 +; P9BE-NEXT: stb r4, 127(r1) ; P9BE-NEXT: bl callee ; P9BE-NEXT: nop ; P9BE-NEXT: li r3, 0 @@ -611,14 +609,13 @@ ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -128(r1) -; P10BE-NEXT: rldicl r4, r3, 56, 8 ; P10BE-NEXT: stb r3, 183(r1) -; P10BE-NEXT: stw r4, 179(r1) +; P10BE-NEXT: rldicl r3, r3, 56, 8 ; P10BE-NEXT: lbz r4, 183(r1) -; P10BE-NEXT: lwz r3, 179(r1) -; P10BE-NEXT: stb r4, 127(r1) +; P10BE-NEXT: stw r3, 179(r1) ; P10BE-NEXT: stw r3, 123(r1) ; P10BE-NEXT: addi r3, r1, 123 +; P10BE-NEXT: stb r4, 127(r1) ; P10BE-NEXT: bl callee ; P10BE-NEXT: nop ; P10BE-NEXT: li r3, 0 @@ -884,18 +881,17 @@ ; P8BE-NEXT: mflr r0 ; P8BE-NEXT: std r0, 16(r1) ; P8BE-NEXT: stdu r1, -128(r1) -; P8BE-NEXT: rldicl r4, r3, 40, 24 -; P8BE-NEXT: rldicl r5, r3, 56, 8 +; P8BE-NEXT: rldicl r4, r3, 56, 8 ; P8BE-NEXT: stb r3, 183(r1) -; P8BE-NEXT: stw r4, 177(r1) -; P8BE-NEXT: sth r5, 181(r1) -; P8BE-NEXT: lbz r4, 183(r1) -; P8BE-NEXT: lwz r3, 177(r1) -; P8BE-NEXT: lhz r5, 181(r1) -; P8BE-NEXT: stb r4, 127(r1) +; P8BE-NEXT: rldicl r3, r3, 40, 24 +; P8BE-NEXT: sth r4, 181(r1) +; P8BE-NEXT: lbz r5, 183(r1) +; P8BE-NEXT: lhz r4, 181(r1) +; P8BE-NEXT: stw r3, 177(r1) ; P8BE-NEXT: stw r3, 121(r1) ; P8BE-NEXT: addi r3, r1, 121 -; P8BE-NEXT: sth r5, 125(r1) +; P8BE-NEXT: stb r5, 127(r1) +; P8BE-NEXT: sth r4, 125(r1) ; P8BE-NEXT: bl callee ; P8BE-NEXT: nop ; P8BE-NEXT: li r3, 0 @@ -909,17 +905,16 @@ ; P9BE-NEXT: mflr r0 ; P9BE-NEXT: std r0, 16(r1) ; P9BE-NEXT: stdu r1, -128(r1) -; P9BE-NEXT: rldicl r4, r3, 40, 24 +; P9BE-NEXT: rldicl r4, r3, 56, 8 ; P9BE-NEXT: stb r3, 183(r1) ; P9BE-NEXT: lbz r5, 183(r1) -; P9BE-NEXT: stw r4, 177(r1) -; P9BE-NEXT: rldicl r4, r3, 56, 8 -; P9BE-NEXT: lwz r3, 177(r1) +; P9BE-NEXT: rldicl r3, r3, 40, 24 ; P9BE-NEXT: sth r4, 181(r1) -; P9BE-NEXT: lhz r4, 181(r1) -; P9BE-NEXT: stb r5, 127(r1) +; P9BE-NEXT: stw r3, 177(r1) ; P9BE-NEXT: stw r3, 121(r1) ; P9BE-NEXT: addi r3, r1, 121 +; P9BE-NEXT: lhz r4, 181(r1) +; P9BE-NEXT: stb r5, 127(r1) ; P9BE-NEXT: sth r4, 125(r1) ; P9BE-NEXT: bl callee ; P9BE-NEXT: nop @@ -934,17 +929,16 @@ ; P10BE-NEXT: mflr r0 ; P10BE-NEXT: std r0, 16(r1) ; P10BE-NEXT: stdu r1, -128(r1) -; P10BE-NEXT: rldicl r4, r3, 40, 24 +; P10BE-NEXT: rldicl r4, r3, 56, 8 ; P10BE-NEXT: stb r3, 183(r1) +; P10BE-NEXT: rldicl r3, r3, 40, 24 ; P10BE-NEXT: lbz r5, 183(r1) -; P10BE-NEXT: stw r4, 177(r1) -; P10BE-NEXT: rldicl r4, r3, 56, 8 -; P10BE-NEXT: lwz r3, 177(r1) ; P10BE-NEXT: sth r4, 181(r1) -; P10BE-NEXT: lhz r4, 181(r1) -; P10BE-NEXT: stb r5, 127(r1) +; P10BE-NEXT: stw r3, 177(r1) ; P10BE-NEXT: stw r3, 121(r1) ; P10BE-NEXT: addi r3, r1, 121 +; P10BE-NEXT: lhz r4, 181(r1) +; P10BE-NEXT: stb r5, 127(r1) ; P10BE-NEXT: sth r4, 125(r1) ; P10BE-NEXT: bl callee ; P10BE-NEXT: nop Index: llvm/test/CodeGen/PowerPC/pr45301.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pr45301.ll +++ llvm/test/CodeGen/PowerPC/pr45301.ll @@ -13,15 +13,15 @@ ; CHECK-NEXT: nop ; CHECK-NEXT: addis r4, r2, g@toc@ha ; CHECK-NEXT: addi r4, r4, g@toc@l -; CHECK-NEXT: ld r5, 0(r4) -; CHECK-NEXT: std r5, 0(r3) ; CHECK-NEXT: ld r5, 16(r4) ; CHECK-NEXT: std r5, 16(r3) -; CHECK-NEXT: ld r6, 8(r4) -; CHECK-NEXT: std r6, 8(r3) -; CHECK-NEXT: ld r6, 24(r4) -; CHECK-NEXT: std r6, 24(r3) -; CHECK-NEXT: lwz r6, 0(r3) +; CHECK-NEXT: ld r6, 0(r4) +; CHECK-NEXT: std r6, 0(r3) +; CHECK-NEXT: rldicl r6, r6, 32, 32 +; CHECK-NEXT: ld r7, 8(r4) +; CHECK-NEXT: std r7, 8(r3) +; CHECK-NEXT: ld r7, 24(r4) +; CHECK-NEXT: std r7, 24(r3) ; CHECK-NEXT: ld r4, 32(r4) ; CHECK-NEXT: std r4, 32(r3) ; CHECK-NEXT: li r4, 20