Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -163,6 +163,9 @@ VMULLs, // ...signed VMULLu, // ...unsigned + // Reciprocal estimates. + VRSQRTE, + UMLAL, // 64bit Unsigned Accumulate Multiply SMLAL, // 64bit Signed Accumulate Multiply UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply @@ -323,7 +326,9 @@ APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const override; - + SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, + unsigned &RefinementSteps, + bool &UseOneConstNR) const override; bool ExpandInlineAsm(CallInst *CI) const override; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1387,6 +1387,7 @@ case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; + case ARMISD::VRSQRTE: return "ARMISD::VRSQRTE"; case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; @@ -11884,6 +11885,36 @@ } } +/// getEstimate - Return the appropriate estimate DAG for either the reciprocal +/// or the reciprocal square root. +static SDValue getEstimate(const ARMSubtarget &ST, + const ARMTargetLowering::DAGCombinerInfo &DCI, unsigned Opcode, + const SDValue &Operand, unsigned &ExtraSteps) { + if (!ST.hasNEON()) + return SDValue(); + + EVT VT = Operand.getValueType(); + + std::string RecipOp; + RecipOp = "sqrt"; + RecipOp = ((VT.isVector()) ? "vec-": "") + RecipOp; + + RecipOp += (VT.getScalarType() == MVT::f32) ? "d": "f"; + + TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + ExtraSteps = Recips.getRefinementSteps(RecipOp); + return DCI.DAG.getNode(Opcode, SDLoc(Operand), VT, Operand); +} + +SDValue ARMTargetLowering::getRsqrtEstimate(SDValue Operand, + DAGCombinerInfo &DCI, unsigned &ExtraSteps, bool &UseOneConst) const { + UseOneConst = true; + return getEstimate(*Subtarget, DCI, ARMISD::VRSQRTE, Operand, ExtraSteps); +} + //===----------------------------------------------------------------------===// // ARM Inline Assembly Support //===----------------------------------------------------------------------===// Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -503,6 +503,7 @@ def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; +def NEONfrsqrte : SDNode<"ARMISD::VRSQRTE", SDTFPUnaryOp>; // Types for vector shift by immediates. The "SHX" version is for long and // narrow operations where the source and destination vectors have different @@ -5392,6 +5393,9 @@ v8f16, v8f16, int_arm_neon_vrsqrte>, Requires<[HasNEON, HasFullFP16]>; +def : Pat<(NEONfrsqrte SPR:$Vm), + (EXTRACT_SUBREG (VRSQRTEfd (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Vm, ssub_0)), ssub_0)>; + // VRSQRTS : Vector Reciprocal Square Root Step def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, IIC_VRECSD, "vrsqrts", "f32", Index: lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- lib/Target/ARM/ARMTargetMachine.cpp +++ lib/Target/ARM/ARMTargetMachine.cpp @@ -198,6 +198,16 @@ return *RM; } + // Helper function to set up the defaults for reciprocals. + static void initReciprocals(ARMTargetMachine& TM) + { + + TM.Options.Reciprocals.setDefaults("sqrtf", true, 0); + TM.Options.Reciprocals.setDefaults("sqrtd", true, 0); + TM.Options.Reciprocals.setDefaults("vec-sqrtf", true, 0); + TM.Options.Reciprocals.setDefaults("vec-sqrtd", true, 0); + } + /// Create an ARM architecture model. /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, @@ -281,6 +291,7 @@ CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { + initReciprocals(*this); initAsmInfo(); if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "