diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2535,21 +2535,37 @@ const SDNode *AndOp0 = AndOp0Val.getNode(); uint64_t ShlImm; - if (!isOpcWithIntImmediate(AndOp0, ISD::SHL, ShlImm)) + if (isOpcWithIntImmediate(AndOp0, ISD::SHL, ShlImm)) { + // For pattern "and(shl(val, N), shifted-mask)", 'Src' is set to 'val'. + Src = AndOp0->getOperand(0); + } else if (VT == MVT::i64 && AndOp0->getOpcode() == ISD::ANY_EXTEND && isOpcWithIntImmediate(AndOp0->getOperand(0).getNode(), ISD::SHL, ShlImm)) { + // For pattern "and(any_extend(shl(val, N)), shifted-mask)" + // + // ShlVal == shl(val, N), and ShlVal is a left shift on a smaller type + SDValue ShlVal = AndOp0->getOperand(0); + // widdens val to MVT::i64, and assigns it to 'Src'. + Src = Widen(CurDAG, ShlVal.getOperand(0)); + + VT = ShlVal.getValueType(); + } else return false; - // Bail out if the SHL has more than one use, since then we'll end up - // generating SHL+UBFIZ instead of just keeping SHL+AND. + // Bail out if the AndOp0 has more than one use, since then we'll end up + // generating AndOp0+UBFIZ instead of just keeping AndOp0+AND. if (!BiggerPattern && !AndOp0->hasOneUse()) return false; DstLSB = countTrailingZeros(NonZeroBits); Width = countTrailingOnes(NonZeroBits >> DstLSB); + // VT is the ValueType of source (before any_extend node if there is one), so clamp Width according to source bit width. + assert(VT.isSimple() && VT.isScalarInteger() && "VT should be a simple scalar type"); + Width = std::min(Width, (int)VT.getSizeInBits()); + if (ShlImm != DstLSB && !BiggerPattern) return false; - Src = getLeftShift(CurDAG, AndOp0->getOperand(0), ShlImm - DstLSB); + Src = getLeftShift(CurDAG, Src, ShlImm - DstLSB); return true; } diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll --- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -579,9 +579,8 @@ define i64 @test_truncated_shift(i64 %x, i64 %y) { ; CHECK-LABEL: test_truncated_shift: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsl w8, w1, #25 -; CHECK-NEXT: lsr x8, x8, #25 -; CHECK-NEXT: bfi x0, x8, #25, #5 +; CHECK-NEXT: // kill: def $w1 killed $w1 killed $x1 def $x1 +; CHECK-NEXT: bfi x0, x1, #25, #5 ; CHECK-NEXT: ret entry: %and = and i64 %x, -1040187393 @@ -590,3 +589,14 @@ %or = or i64 %and5, %and ret i64 %or } + +define i64 @test_and_extended_shift_with_imm(i64 %0) { +; CHECK-LABEL: test_and_extended_shift_with_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 def $x0 +; CHECK-NEXT: ubfiz x0, x0, #7, #8 +; CHECK-NEXT: ret + %2 = shl i64 %0, 7 + %3 = and i64 %2, 32640 ; #0x7f80 + ret i64 %3 +}