diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8948,6 +8948,10 @@ if (SDValue MULH = combineShiftToMULH(N, DAG, TLI)) return MULH; + // Attempt to convert a sra of a load into a narrower sign-extending load. + if (SDValue NarrowLoad = ReduceLoadWidth(N)) + return NarrowLoad; + return SDValue(); } @@ -12120,6 +12124,28 @@ if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; ExtVT = cast(N->getOperand(1))->getVT(); + } else if (Opc == ISD::SRA) { + // Another special-case: SRA is basically sign-extending a narrower value, + // or it may be shifting a higher subword, half or byte into the lowest + // bits. + SDValue N1 = N->getOperand(1); + // Only handle shift with constant shift amount, and the shiftee must be a + // non-zextload load. + auto *LN0 = dyn_cast(N0); + auto *N1C = dyn_cast(N1); + if (!N1C || !LN0) + return SDValue(); + if (LN0->getExtensionType() == ISD::ZEXTLOAD) + return SDValue(); + // If the shift amount is larger than the memory type then we're not + // accessing any of the loaded bytes. + ShAmt = N1C->getZExtValue(); + uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits(); + if (MemoryWidth <= ShAmt) + return SDValue(); + // Attempt to fold away the SRA by using SEXTLOAD. + ExtType = ISD::SEXTLOAD; + ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt); } else if (Opc == ISD::SRL) { // Another special-case: SRL is basically zero-extending a narrower value, // or it maybe shifting a higher subword, half or byte into the lowest @@ -12161,6 +12187,8 @@ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); } + // FIXME: Investigate/describe why we limit this to hasOneUse (it seems a bit + // limiting for the case when N==N0, i.e. when being called from visitSRL). if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { SDValue SRL = N0; if (auto *ConstShift = dyn_cast(SRL.getOperand(1))) { diff --git a/llvm/test/CodeGen/PowerPC/pr13891.ll b/llvm/test/CodeGen/PowerPC/pr13891.ll --- a/llvm/test/CodeGen/PowerPC/pr13891.ll +++ b/llvm/test/CodeGen/PowerPC/pr13891.ll @@ -7,7 +7,7 @@ define void @_Z5check3foos(%struct.foo* nocapture byval(%struct.foo) %f, i16 signext %i) noinline { ; CHECK-LABEL: _Z5check3foos: ; CHECK: sth 3, {{[0-9]+}}(1) -; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1) +; CHECK: lbz {{[0-9]+}}, {{[0-9]+}}(1) entry: %0 = bitcast %struct.foo* %f to i16* %1 = load i16, i16* %0, align 2 diff --git a/llvm/test/CodeGen/X86/combine-sra-load.ll b/llvm/test/CodeGen/X86/combine-sra-load.ll --- a/llvm/test/CodeGen/X86/combine-sra-load.ll +++ b/llvm/test/CodeGen/X86/combine-sra-load.ll @@ -1,12 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK -; FIXME: fold (sra (load i32), 16)) -> (sextload i16) +; fold (sra (load i32), 16)) -> (sextload i16) define i32 @sra_half(i32* %p) { ; CHECK-LABEL: sra_half: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: sarl $16, %eax +; CHECK-NEXT: movswl 2(%rdi), %eax ; CHECK-NEXT: retq %load = load i32, i32* %p %shift = ashr i32 %load, 16 @@ -25,12 +24,11 @@ ret <4 x i32> %shift } -; FIXME: fold (sra (load i64), 48)) -> (sextload i16) +; fold (sra (load i64), 48)) -> (sextload i16) define i64 @sra_large_shift(i64* %r) { ; CHECK-LABEL: sra_large_shift: ; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: sarq $48, %rax +; CHECK-NEXT: movswq 6(%rdi), %rax ; CHECK-NEXT: retq %t0 = load i64, i64* %r %conv = ashr i64 %t0, 48 @@ -61,12 +59,11 @@ ret i32 %shift } -; FIXME: fold (sra (sextload i16 to i32), 8) -> (sextload i8) +; fold (sra (sextload i16 to i32), 8) -> (sextload i8) define i32 @sra_of_sextload(i16* %p) { ; CHECK-LABEL: sra_of_sextload: ; CHECK: # %bb.0: -; CHECK-NEXT: movswl (%rdi), %eax -; CHECK-NEXT: sarl $8, %eax +; CHECK-NEXT: movsbl 1(%rdi), %eax ; CHECK-NEXT: retq %load = load i16, i16* %p %sext = sext i16 %load to i32 @@ -88,12 +85,11 @@ ret i32 %shift } -; FIXME: Fold even if SRA has multiple uses. +; Fold even if SRA has multiple uses. define i32 @sra_to_sextload_multiple_sra_uses(i32* %p) { ; CHECK-LABEL: sra_to_sextload_multiple_sra_uses: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %ecx -; CHECK-NEXT: sarl $16, %ecx +; CHECK-NEXT: movswl 2(%rdi), %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: xorl $6, %eax ; CHECK-NEXT: orl %ecx, %eax