Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2568,17 +2568,39 @@ SDValue AndOp0 = Op.getOperand(0); uint64_t ShlImm; - if (!isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) + if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) { + // For pattern "and(shl(val, N), shifted-mask)", 'Src' is set to 'val'. + Src = AndOp0.getOperand(0); + } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND && + isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL, + ShlImm)) { + // For pattern "and(any_extend(shl(val, N)), shifted-mask)" + + // ShlVal == shl(val, N), which is a left shift on a smaller type + SDValue ShlVal = AndOp0.getOperand(0); + + // Widdens val to MVT::i64, and assigns it to 'Src'. + Src = Widen(CurDAG, ShlVal.getOperand(0)); + + VT = ShlVal.getValueType(); + } else return false; - // Bail out if the SHL has more than one use, since then we'll end up - // generating SHL+UBFIZ instead of just keeping SHL+AND. + // For !BiggerPattern, bail out if the AndOp0 has more than one use, since + // then we'll end up generating AndOp0+UBFIZ instead of just keeping + // AndOp0+AND. if (!BiggerPattern && !AndOp0.hasOneUse()) return false; DstLSB = countTrailingZeros(NonZeroBits); Width = countTrailingOnes(NonZeroBits >> DstLSB); + // VT is the ValueType of source (before any_extend node if there is one), so + // clamp Width according to source bit width. + assert(VT.isSimple() && VT.isScalarInteger() && + "VT should be a simple scalar type"); + Width = std::min(Width, (int)VT.getSizeInBits()); + // BFI encompasses sufficiently many nodes that it's worth inserting an extra // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL // amount. BiggerPattern is true when this pattern is being matched for BFI, @@ -2587,7 +2609,7 @@ if (ShlImm != DstLSB && !BiggerPattern) return false; - Src = getLeftShift(CurDAG, AndOp0.getOperand(0), ShlImm - DstLSB); + Src = getLeftShift(CurDAG, Src, ShlImm - DstLSB); return true; } Index: llvm/test/CodeGen/AArch64/bitfield-insert.ll =================================================================== --- llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -580,9 +580,8 @@ define i64 @test_truncated_shift(i64 %x, i64 %y) { ; CHECK-LABEL: test_truncated_shift: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsl w8, w1, #25 -; CHECK-NEXT: lsr x8, x8, #25 -; CHECK-NEXT: bfi x0, x8, #25, #5 +; CHECK-NEXT: // kill: def $w1 killed $w1 killed $x1 def $x1 +; CHECK-NEXT: bfi x0, x1, #25, #5 ; CHECK-NEXT: ret entry: %and = and i64 %x, -1040187393 @@ -591,3 +590,14 @@ %or = or i64 %and5, %and ret i64 %or } + +define i64 @test_and_extended_shift_with_imm(i64 %0) { +; CHECK-LABEL: test_and_extended_shift_with_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 def $x0 +; CHECK-NEXT: ubfiz x0, x0, #7, #8 +; CHECK-NEXT: ret + %2 = shl i64 %0, 7 + %3 = and i64 %2, 32640 ; #0x7f80 + ret i64 %3 +}