diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -68,8 +68,6 @@ template bool SelectRDVLImm(SDValue N, SDValue &Imm); - bool tryMLAV64LaneV128(SDNode *N); - bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); @@ -835,135 +833,6 @@ return AArch64_AM::InvalidShiftExtend; } -// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. -static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { - if (DL->getOpcode() != AArch64ISD::DUPLANE16 && - DL->getOpcode() != AArch64ISD::DUPLANE32) - return false; - - SDValue SV = DL->getOperand(0); - if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) - return false; - - SDValue EV = SV.getOperand(1); - if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) - return false; - - ConstantSDNode *DLidx = cast(DL->getOperand(1).getNode()); - ConstantSDNode *EVidx = cast(EV.getOperand(1).getNode()); - LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); - LaneOp = EV.getOperand(0); - - return true; -} - -// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a -// high lane extract. -static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, - SDValue &LaneOp, int &LaneIdx) { - - if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { - std::swap(Op0, Op1); - if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) - return false; - } - StdOp = Op1; - return true; -} - -/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand -/// is a lane in the upper half of a 128-bit vector. Recognize and select this -/// so that we don't emit unnecessary lane extracts. -bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { - SDLoc dl(N); - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. - SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. - int LaneIdx = -1; // Will hold the lane index. - - if (Op1.getOpcode() != ISD::MUL || - !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, - LaneIdx)) { - std::swap(Op0, Op1); - if (Op1.getOpcode() != ISD::MUL || - !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, - LaneIdx)) - return false; - } - - SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); - - SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; - - unsigned MLAOpc = ~0U; - - switch (N->getSimpleValueType(0).SimpleTy) { - default: - llvm_unreachable("Unrecognized MLA."); - case MVT::v4i16: - MLAOpc = AArch64::MLAv4i16_indexed; - break; - case MVT::v8i16: - MLAOpc = AArch64::MLAv8i16_indexed; - break; - case MVT::v2i32: - MLAOpc = AArch64::MLAv2i32_indexed; - break; - case MVT::v4i32: - MLAOpc = AArch64::MLAv4i32_indexed; - break; - } - - ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); - return true; -} - -bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { - SDLoc dl(N); - SDValue SMULLOp0; - SDValue SMULLOp1; - int LaneIdx; - - if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, - LaneIdx)) - return false; - - SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); - - SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; - - unsigned SMULLOpc = ~0U; - - if (IntNo == Intrinsic::aarch64_neon_smull) { - switch (N->getSimpleValueType(0).SimpleTy) { - default: - llvm_unreachable("Unrecognized SMULL."); - case MVT::v4i32: - SMULLOpc = AArch64::SMULLv4i16_indexed; - break; - case MVT::v2i64: - SMULLOpc = AArch64::SMULLv2i32_indexed; - break; - } - } else if (IntNo == Intrinsic::aarch64_neon_umull) { - switch (N->getSimpleValueType(0).SimpleTy) { - default: - llvm_unreachable("Unrecognized SMULL."); - case MVT::v4i32: - SMULLOpc = AArch64::UMULLv4i16_indexed; - break; - case MVT::v2i64: - SMULLOpc = AArch64::UMULLv2i32_indexed; - break; - } - } else - llvm_unreachable("Unrecognized intrinsic."); - - ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); - return true; -} - /// Instructions that accept extend modifiers like UXTW expect the register /// being extended to be a GPR32, but the incoming DAG might be acting on a /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if @@ -4307,11 +4176,6 @@ return; break; - case ISD::ADD: - if (tryMLAV64LaneV128(Node)) - return; - break; - case ISD::LOAD: { // Try to select as an indexed load. Fall through to normal processing // if we can't. @@ -4955,11 +4819,6 @@ : AArch64::TBXv16i8Four, true); return; - case Intrinsic::aarch64_neon_smull: - case Intrinsic::aarch64_neon_umull: - if (tryMULLV64LaneV128(IntNo, Node)) - return; - break; case Intrinsic::aarch64_sve_srshl_single_x2: if (auto Op = SelectOpcodeFromVT( Node->getValueType(0), diff --git a/llvm/test/CodeGen/AArch64/sve-pr62151.ll b/llvm/test/CodeGen/AArch64/sve-pr62151.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-pr62151.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+sve < %s | FileCheck %s + + +define i32 @build_interpolation(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) { +; CHECK-LABEL: build_interpolation: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s +; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: mla v0.2s, v1.2s, v0.s[1] +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %3 = mul nsw <2 x i32> %1, %0 + %4 = sdiv <2 x i32> %3, %2 + %shift = shufflevector <2 x i32> %4, <2 x i32> poison, <2 x i32> + %5 = mul nsw <2 x i32> %shift, %1 + %6 = add nsw <2 x i32> %5, %4 + %add = extractelement <2 x i32> %6, i64 0 + ret i32 %add +}