diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14442,15 +14442,23 @@ SDValue ShiftLHS = N->getOperand(0); EVT VT = N->getValueType(0); - // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine - // it with shift 'N' to let it be lowered to UBFX. + // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not + // combine it with shift 'N' to let it be lowered to UBFX except: + // ((x >> C) & mask) << C. if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && isa(ShiftLHS.getOperand(1))) { uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1); - if (isMask_64(TruncMask) && - ShiftLHS.getOperand(0).getOpcode() == ISD::SRL && - isa(ShiftLHS.getOperand(0).getOperand(1))) - return false; + if (isMask_64(TruncMask)) { + SDValue AndLHS = ShiftLHS.getOperand(0); + if (AndLHS.getOpcode() == ISD::SRL) { + if (auto *SRLC = dyn_cast(AndLHS.getOperand(1))) { + if (N->getOpcode() == ISD::SHL) + if (auto *SHLC = dyn_cast(N->getOperand(1))) + return SRLC->getAPIntValue() == SHLC->getAPIntValue(); + return false; + } + } + } } return true; } diff --git a/llvm/test/CodeGen/AArch64/shift-logic.ll b/llvm/test/CodeGen/AArch64/shift-logic.ll --- a/llvm/test/CodeGen/AArch64/shift-logic.ll +++ b/llvm/test/CodeGen/AArch64/shift-logic.ll @@ -151,3 +151,27 @@ %sh1 = lshr i32 %r, 7 ret i32 %sh1 } + +define i64 @desirable_to_commute1(i64 %x) { +; CHECK-LABEL: desirable_to_commute1: +; CHECK: // %bb.0: +; CHECK-NEXT: and x0, x0, #0x7fff8 +; CHECK-NEXT: ret + %s1 = lshr i64 %x, 3 + %a = and i64 %s1, 65535 + %s2 = shl i64 %a, 3 + ret i64 %s2 +} + +define i64 @desirable_to_commute2(i64* %p, i64 %i) { +; CHECK-LABEL: desirable_to_commute2: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x1, #0x1ff8 +; CHECK-NEXT: ldr x0, [x0, x8] +; CHECK-NEXT: ret + %lshr = lshr i64 %i, 3 + %and = and i64 %lshr, 1023 + %pidx = getelementptr i64, i64* %p, i64 %and + %r = load i64, i64* %pidx + ret i64 %r +}