diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2580,17 +2580,54 @@ SDValue AndOp0 = Op.getOperand(0); uint64_t ShlImm; - if (!isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) + SDValue ShlOp0; + if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) { + // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'. + ShlOp0 = AndOp0.getOperand(0); + } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND && + isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL, + ShlImm)) { + // For pattern "and(any_extend(shl(val, N)), shifted-mask)" + + // ShlVal == shl(val, N), which is a left shift on a smaller type. + SDValue ShlVal = AndOp0.getOperand(0); + + // Since this is after type legalization and ShlVal is extended to MVT::i64, + // expect VT to be MVT::i32. + assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32."); + + // Widens 'val' to MVT::i64 as the source of bit field positioning. + ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0)); + } else return false; - // Bail out if the SHL has more than one use, since then we'll end up - // generating SHL+UBFIZ instead of just keeping SHL+AND. + // For !BiggerPattern, bail out if the AndOp0 has more than one use, since + // then we'll end up generating AndOp0+UBFIZ instead of just keeping + // AndOp0+AND. if (!BiggerPattern && !AndOp0.hasOneUse()) return false; DstLSB = countTrailingZeros(NonZeroBits); Width = countTrailingOnes(NonZeroBits >> DstLSB); + // Bail out on large Width. This happens when no proper combining / constant + // folding was performed. + if (Width >= (int)VT.getSizeInBits()) { + // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and + // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to + // "val". + // If VT is i32, what Width >= 32 means: + // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op + // demands at least 'Width' bits (after dag-combiner). This together with + // `any_extend` Op (undefined higher bits) indicates missed combination + // when lowering the 'and' IR instruction to an machine IR instruction. + LLVM_DEBUG( + dbgs() + << "Found large Width in bit-field-positioning -- this indicates no " + "proper combining / constant folding was performed\n"); + return false; + } + // BFI encompasses sufficiently many nodes that it's worth inserting an extra // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL // amount. BiggerPattern is true when this pattern is being matched for BFI, @@ -2599,7 +2636,7 @@ if (ShlImm != uint64_t(DstLSB) && !BiggerPattern) return false; - Src = getLeftShift(CurDAG, AndOp0.getOperand(0), ShlImm - DstLSB); + Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB); return true; } diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll --- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -580,9 +580,8 @@ define i64 @test_truncated_shift(i64 %x, i64 %y) { ; CHECK-LABEL: test_truncated_shift: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsl w8, w1, #25 -; CHECK-NEXT: lsr x8, x8, #25 -; CHECK-NEXT: bfi x0, x8, #25, #5 +; CHECK-NEXT: // kill: def $w1 killed $w1 killed $x1 def $x1 +; CHECK-NEXT: bfi x0, x1, #25, #5 ; CHECK-NEXT: ret entry: %and = and i64 %x, -1040187393 @@ -591,3 +590,14 @@ %or = or i64 %and5, %and ret i64 %or } + +define i64 @test_and_extended_shift_with_imm(i64 %0) { +; CHECK-LABEL: test_and_extended_shift_with_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 def $x0 +; CHECK-NEXT: ubfiz x0, x0, #7, #8 +; CHECK-NEXT: ret + %2 = shl i64 %0, 7 + %3 = and i64 %2, 32640 ; #0x7f80 + ret i64 %3 +}