diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8964,6 +8964,10 @@ if (SDValue MULH = combineShiftToMULH(N, DAG, TLI)) return MULH; + // Attempt to convert a sra of a load into a narrower sign-extending load. + if (SDValue NarrowLoad = reduceLoadWidth(N)) + return NarrowLoad; + return SDValue(); } @@ -12151,10 +12155,10 @@ if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; ExtVT = cast(N->getOperand(1))->getVT(); - } else if (Opc == ISD::SRL) { - // Another special-case: SRL is basically zero-extending a narrower value, - // or it may be shifting a higher subword, half or byte into the lowest - // bits. + } else if (Opc == ISD::SRL || Opc == ISD::SRA) { + // Another special-case: SRL/SRA is basically zero/sign-extending a narrower + // value, or it may be shifting a higher subword, half or byte into the + // lowest bits. // Only handle shift with constant shift amount, and the shiftee must be a // load. @@ -12168,13 +12172,16 @@ uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits(); if (MemoryWidth <= ShAmt) return SDValue(); - // Attempt to fold away the SRL by using ZEXTLOAD. - ExtType = ISD::ZEXTLOAD; + // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD. + ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD; ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt); // If original load is a SEXTLOAD then we can't simply replace it by a // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD - // followed by a ZEXT, but that is not handled at the moment). - if (LN->getExtensionType() == ISD::SEXTLOAD) + // followed by a ZEXT, but that is not handled at the moment). Similarly if + // the original load is a ZEXTLOAD and we want to use a SEXTLOAD. + if ((LN->getExtensionType() == ISD::SEXTLOAD || + LN->getExtensionType() == ISD::ZEXTLOAD) && + LN->getExtensionType() != ExtType) return SDValue(); } else if (Opc == ISD::AND) { // An AND with a constant mask is the same as a truncate + zero-extend. diff --git a/llvm/test/CodeGen/PowerPC/pr13891.ll b/llvm/test/CodeGen/PowerPC/pr13891.ll --- a/llvm/test/CodeGen/PowerPC/pr13891.ll +++ b/llvm/test/CodeGen/PowerPC/pr13891.ll @@ -7,7 +7,7 @@ define void @_Z5check3foos(%struct.foo* nocapture byval(%struct.foo) %f, i16 signext %i) noinline { ; CHECK-LABEL: _Z5check3foos: ; CHECK: sth 3, {{[0-9]+}}(1) -; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1) +; CHECK: lbz {{[0-9]+}}, {{[0-9]+}}(1) entry: %0 = bitcast %struct.foo* %f to i16* %1 = load i16, i16* %0, align 2 diff --git a/llvm/test/CodeGen/X86/combine-sra-load.ll b/llvm/test/CodeGen/X86/combine-sra-load.ll --- a/llvm/test/CodeGen/X86/combine-sra-load.ll +++ b/llvm/test/CodeGen/X86/combine-sra-load.ll @@ -1,12 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK -; FIXME: fold (sra (load i32), 16)) -> (sextload i16) +; fold (sra (load i32), 16)) -> (sextload i16) define i32 @sra_half(i32* %p) { ; CHECK-LABEL: sra_half: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: sarl $16, %eax +; CHECK-NEXT: movswl 2(%rdi), %eax ; CHECK-NEXT: retq %load = load i32, i32* %p %shift = ashr i32 %load, 16 @@ -25,12 +24,11 @@ ret <4 x i32> %shift } -; FIXME: fold (sra (load i64), 48)) -> (sextload i16) +; fold (sra (load i64), 48)) -> (sextload i16) define i64 @sra_large_shift(i64* %r) { ; CHECK-LABEL: sra_large_shift: ; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: sarq $48, %rax +; CHECK-NEXT: movswq 6(%rdi), %rax ; CHECK-NEXT: retq %t0 = load i64, i64* %r %conv = ashr i64 %t0, 48 @@ -61,12 +59,11 @@ ret i32 %shift } -; FIXME: fold (sra (sextload i16 to i32), 8) -> (sextload i8) +; fold (sra (sextload i16 to i32), 8) -> (sextload i8) define i32 @sra_of_sextload(i16* %p) { ; CHECK-LABEL: sra_of_sextload: ; CHECK: # %bb.0: -; CHECK-NEXT: movswl (%rdi), %eax -; CHECK-NEXT: sarl $8, %eax +; CHECK-NEXT: movsbl 1(%rdi), %eax ; CHECK-NEXT: retq %load = load i16, i16* %p %sext = sext i16 %load to i32 @@ -89,12 +86,11 @@ ret i32 %shift } -; FIXME: Fold even if SRA has multiple uses. +; Fold even if SRA has multiple uses. define i32 @sra_to_sextload_multiple_sra_uses(i32* %p) { ; CHECK-LABEL: sra_to_sextload_multiple_sra_uses: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %ecx -; CHECK-NEXT: sarl $16, %ecx +; CHECK-NEXT: movswl 2(%rdi), %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: xorl $6, %eax ; CHECK-NEXT: orl %ecx, %eax