Index: lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1507,9 +1507,31 @@ ReplaceNode(N, St); } +static bool isShiftedMask(uint64_t Mask, EVT VT) { + assert(VT == MVT::i32 || VT == MVT::i64); + if (VT == MVT::i32) + return isShiftedMask_32(Mask); + return isShiftedMask_64(Mask); +} + +static unsigned countTrailingZeros(uint64_t Val, EVT VT) { + assert(VT == MVT::i32 || VT == MVT::i64); + if (VT == MVT::i32) + return countTrailingZeros(Val); + return countTrailingZeros(Val); +} + +static unsigned countTrailingOnes(uint64_t Val, EVT VT) { + assert(VT == MVT::i32 || VT == MVT::i64); + if (VT == MVT::i32) + return countTrailingOnes(Val); + return countTrailingOnes(Val); +} + static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, + unsigned &DstLSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern) { assert(N->getOpcode() == ISD::AND && @@ -1541,8 +1563,8 @@ // simplified. Try to undo that AndImm |= maskTrailingOnes(NumberOfIgnoredLowBits); - // The immediate is a mask of the low bits iff imm & (imm+1) == 0 - if (AndImm & (AndImm + 1)) + // The immediate should be a mask, potentially shifted. + if (!isShiftedMask(AndImm, VT)) return false; bool ClampMSB = false; @@ -1584,10 +1606,19 @@ return false; } - LSB = SrlImm; - MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes(AndImm) - : countTrailingOnes(AndImm)) - - 1; + // If the mask is shifted left, then by how much? + DstLSB = countTrailingZeros(AndImm, VT); + // Pretend that the mask is not shifted so that cto works. + AndImm >>= DstLSB; + + // How many bits are we extracting? + unsigned NBits = countTrailingOnes(AndImm, VT); + + // Starting from which bit? (accounting for the shifted mask) + LSB = SrlImm + DstLSB; + // And the last bit to extract is? + MSB = LSB + NBits - 1; + if (ClampMSB) // Since we're moving the extend before the right shift operation, we need // to clamp the MSB to make sure we don't shift in undefined bits instead of @@ -1771,18 +1802,21 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, + unsigned &DstLSB, unsigned NumberOfIgnoredLowBits = 0, bool BiggerPattern = false) { if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return false; + DstLSB = 0; + switch (N->getOpcode()) { default: if (!N->isMachineOpcode()) return false; break; case ISD::AND: - return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, + return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, DstLSB, NumberOfIgnoredLowBits, BiggerPattern); case ISD::SRL: case ISD::SRA: @@ -1810,31 +1844,40 @@ return false; } +static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount); + bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { - unsigned Opc, Immr, Imms; + unsigned Opc, Immr, Imms, DstLSB; SDValue Opd0; - if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) + if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms, DstLSB)) return false; EVT VT = N->getValueType(0); SDLoc dl(N); - // If the bit extract operation is 64bit but the original type is 32bit, we - // need to add one EXTRACT_SUBREG. - if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { - SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), - CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; + // In what value type should we perform the extraction? + EVT BFMVT = VT; + if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) + BFMVT = MVT::i64; - SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); - SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); - ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, - MVT::i32, SDValue(BFM, 0), SubReg)); - return true; + SDValue BFMOps[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, BFMVT), + CurDAG->getTargetConstant(Imms, dl, BFMVT)}; + SDNode *BFM = CurDAG->getMachineNode(Opc, dl, BFMVT, BFMOps); + + // If the bit extract operation is 64bit but the original type is 32bit, + // we will need to add one EXTRACT_SUBREG. + if (BFMVT != VT) { + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, VT); + SDValue ExtractOps[] = {SDValue(BFM, 0), SubReg}; + BFM = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT, + ExtractOps); } - SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), - CurDAG->getTargetConstant(Imms, dl, VT)}; - CurDAG->SelectNodeTo(N, Opc, VT, Ops); + // The extracted bits may be shifted left afterwards. + if (DstLSB) + BFM = getLeftShift(CurDAG, SDValue(BFM, 0), DstLSB).getNode(); + + ReplaceNode(N, BFM); return true; } @@ -2107,9 +2150,9 @@ /// Does this tree qualify as an attempt to move a bitfield into position, /// essentially "(and (shl VAL, N), Mask)". static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, - bool BiggerPattern, - SDValue &Src, int &ShiftAmount, - int &MaskWidth) { + bool BiggerPattern, SDValue &Src, + unsigned &ShiftAmount, + unsigned &MaskWidth) { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); (void)BitWidth; @@ -2157,13 +2200,6 @@ return true; } -static bool isShiftedMask(uint64_t Mask, EVT VT) { - assert(VT == MVT::i32 || VT == MVT::i64); - if (VT == MVT::i32) - return isShiftedMask_32(Mask); - return isShiftedMask_64(Mask); -} - // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being // inserted only sets known zero bits. static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { @@ -2299,9 +2335,11 @@ SDNode *OrOpd1 = OrOpd1Val.getNode(); unsigned BFXOpc; - int DstLSB, Width; - if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, + unsigned DstLSB, Width; + if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, DstLSB, NumberOfIgnoredLowBits, BiggerPattern)) { + if (DstLSB != 0) + continue; // BFXIL inserts starting with bit 0, always. // Check that the returned opcode is compatible with the pattern, // i.e., same type and zero extended (U and not S) if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || @@ -2309,7 +2347,6 @@ continue; // Compute the width of the bitfield insertion - DstLSB = 0; Width = ImmS - ImmR + 1; // FIXME: This constraint is to catch bitfield insertion we may // want to widen the pattern if we want to grab general bitfied @@ -2319,9 +2356,8 @@ // If the mask on the insertee is correct, we have a BFXIL operation. We // can share the ImmR and ImmS values from the already-computed UBFM. - } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, - BiggerPattern, - Src, DstLSB, Width)) { + } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, BiggerPattern, Src, + DstLSB, Width)) { ImmR = (BitWidth - DstLSB) % BitWidth; ImmS = Width - 1; } else @@ -2445,7 +2481,7 @@ return false; SDValue Op0; - int DstLSB, Width; + unsigned DstLSB, Width; if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, Op0, DstLSB, Width)) return false; Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -372,10 +372,6 @@ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; - /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. - bool isDesirableToCommuteWithShift(const SDNode *N, - CombineLevel Level) const override; - /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8840,24 +8840,6 @@ return ScratchRegs; } -bool -AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, - CombineLevel Level) const { - N = N->getOperand(0).getNode(); - EVT VT = N->getValueType(0); - // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine - // it with shift to let it be lowered to UBFX. - if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && - isa(N->getOperand(1))) { - uint64_t TruncMask = N->getConstantOperandVal(1); - if (isMask_64(TruncMask) && - N->getOperand(0).getOpcode() == ISD::SRL && - isa(N->getOperand(0)->getOperand(1))) - return false; - } - return true; -} - bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); Index: test/CodeGen/AArch64/arm64-bitfield-extract.ll =================================================================== --- test/CodeGen/AArch64/arm64-bitfield-extract.ll +++ test/CodeGen/AArch64/arm64-bitfield-extract.ll @@ -969,10 +969,11 @@ define i64 @fct21(i64 %x) { ; LLC-LABEL: fct21: ; LLC: // %bb.0: // %entry -; LLC-NEXT: adrp x9, arr ; LLC-NEXT: ubfx x8, x0, #4, #4 +; LLC-NEXT: adrp x9, arr +; LLC-NEXT: lsl x8, x8, #3 ; LLC-NEXT: add x9, x9, :lo12:arr -; LLC-NEXT: ldr x0, [x9, x8, lsl #3] +; LLC-NEXT: ldr x0, [x9, x8] ; LLC-NEXT: ret ; OPT-LABEL: @fct21( ; OPT-NEXT: entry: Index: test/CodeGen/AArch64/bitfield-insert.ll =================================================================== --- test/CodeGen/AArch64/bitfield-insert.ll +++ test/CodeGen/AArch64/bitfield-insert.ll @@ -221,7 +221,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: lsr w9, w9, #14 +; CHECK-NEXT: lsl w9, w9, #12 +; CHECK-NEXT: lsr w9, w9, #26 ; CHECK-NEXT: bfi w8, w9, #26, #5 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret Index: test/CodeGen/AArch64/extract-bits.ll =================================================================== --- test/CodeGen/AArch64/extract-bits.ll +++ test/CodeGen/AArch64/extract-bits.ll @@ -1002,8 +1002,8 @@ define i32 @c1_i32(i32 %arg) nounwind { ; CHECK-LABEL: c1_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #19 -; CHECK-NEXT: and w0, w8, #0xffc +; CHECK-NEXT: ubfx w8, w0, #21, #10 +; CHECK-NEXT: lsl w0, w8, #2 ; CHECK-NEXT: ret %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 4092 @@ -1027,8 +1027,8 @@ define i32 @c4_i32_bad(i32 %arg) nounwind { ; CHECK-LABEL: c4_i32_bad: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #19 -; CHECK-NEXT: and w0, w8, #0x1ffe +; CHECK-NEXT: lsr w8, w0, #20 +; CHECK-NEXT: lsl w0, w8, #1 ; CHECK-NEXT: ret %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 16382 @@ -1052,8 +1052,8 @@ define i64 @c1_i64(i64 %arg) nounwind { ; CHECK-LABEL: c1_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #51 -; CHECK-NEXT: and x0, x8, #0xffc +; CHECK-NEXT: ubfx x8, x0, #53, #10 +; CHECK-NEXT: lsl x0, x8, #2 ; CHECK-NEXT: ret %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 4092 @@ -1077,8 +1077,8 @@ define i64 @c4_i64_bad(i64 %arg) nounwind { ; CHECK-LABEL: c4_i64_bad: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #51 -; CHECK-NEXT: and x0, x8, #0x1ffe +; CHECK-NEXT: lsr x8, x0, #52 +; CHECK-NEXT: lsl x0, x8, #1 ; CHECK-NEXT: ret %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 16382 Index: test/CodeGen/AArch64/pull-binop-through-shift.ll =================================================================== --- test/CodeGen/AArch64/pull-binop-through-shift.ll +++ test/CodeGen/AArch64/pull-binop-through-shift.ll @@ -108,8 +108,8 @@ define i32 @and_signbit_lshr(i32 %x, i32* %dst) { ; CHECK-LABEL: and_signbit_lshr: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #8 -; CHECK-NEXT: and w0, w8, #0xffff00 +; CHECK-NEXT: lsr w8, w0, #16 +; CHECK-NEXT: lsl w0, w8, #8 ; CHECK-NEXT: str w0, [x1] ; CHECK-NEXT: ret %t0 = and i32 %x, 4294901760 ; 0xFFFF0000 @@ -120,8 +120,8 @@ define i32 @and_nosignbit_lshr(i32 %x, i32* %dst) { ; CHECK-LABEL: and_nosignbit_lshr: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #8 -; CHECK-NEXT: and w0, w8, #0x7fff00 +; CHECK-NEXT: ubfx w8, w0, #16, #15 +; CHECK-NEXT: lsl w0, w8, #8 ; CHECK-NEXT: str w0, [x1] ; CHECK-NEXT: ret %t0 = and i32 %x, 2147418112 ; 0x7FFF0000 @@ -223,8 +223,8 @@ define i32 @and_nosignbit_ashr(i32 %x, i32* %dst) { ; CHECK-LABEL: and_nosignbit_ashr: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #8 -; CHECK-NEXT: and w0, w8, #0x7fff00 +; CHECK-NEXT: ubfx w8, w0, #16, #15 +; CHECK-NEXT: lsl w0, w8, #8 ; CHECK-NEXT: str w0, [x1] ; CHECK-NEXT: ret %t0 = and i32 %x, 2147418112 ; 0x7FFF0000 Index: test/CodeGen/AArch64/rotate-extract.ll =================================================================== --- test/CodeGen/AArch64/rotate-extract.ll +++ test/CodeGen/AArch64/rotate-extract.ll @@ -97,9 +97,10 @@ define i32 @no_extract_shrl(i32 %i) nounwind { ; CHECK-LABEL: no_extract_shrl: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #3 -; CHECK-NEXT: lsr w0, w0, #9 -; CHECK-NEXT: bfi w0, w8, #28, #4 +; CHECK-NEXT: lsl w8, w0, #25 +; CHECK-NEXT: and w8, w8, #0xf0000000 +; CHECK-NEXT: bfxil w8, w0, #9, #23 +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %lhs_div = lshr i32 %i, 3 %rhs_div = lshr i32 %i, 9 Index: test/CodeGen/AArch64/selectcc-to-shiftand.ll =================================================================== --- test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -19,8 +19,8 @@ define i32 @neg_sel_special_constant(i32 %a) { ; CHECK-LABEL: neg_sel_special_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #22 -; CHECK-NEXT: and w0, w8, #0x200 +; CHECK-NEXT: lsr w8, w0, #31 +; CHECK-NEXT: lsl w0, w8, #9 ; CHECK-NEXT: ret %tmp.1 = icmp slt i32 %a, 0 %retval = select i1 %tmp.1, i32 512, i32 0