Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -61,7 +61,6 @@ class MCSymbol; template class SmallVectorImpl; class DataLayout; - struct TargetRecip; class TargetRegisterClass; class TargetLibraryInfo; class TargetLoweringObjectFile; @@ -247,6 +246,37 @@ return false; } + /// Reciprocal estimate status values used by the functions below. + enum ReciprocalEstimate : int { + Unspecified = -1, + Disabled = 0, + Enabled = 1 + }; + + /// Return a ReciprocalEstimate enum value for a square root of the given type + /// based on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getSqrtEnabled(EVT VT, MachineFunction &MF) const; + + /// Return a ReciprocalEstimate enum value for a division of the given type + /// based on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getDivEnabled(EVT VT, MachineFunction &MF) const; + + /// Return the refinement step count for a square root of the given type based + /// on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; + + /// Return the refinement step count for a division of the given type based + /// on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; + /// Returns true if target has indicated at least one type should be bypassed. bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } @@ -539,12 +569,6 @@ } } - /// Return the reciprocal estimate code generation preferences for this target - /// after potentially overriding settings using the function's attributes. - /// FIXME: Like all unsafe-math target settings, this should really be an - /// instruction-level attribute/metadata/FMF. - TargetRecip getTargetRecipForFunc(MachineFunction &MF) const; - /// Vector types are broken down into some number of legal first class types. /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 @@ -2158,7 +2182,6 @@ /// sequence of memory operands that is recognized by PrologEpilogInserter. MachineBasicBlock *emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const; - TargetRecip ReciprocalEstimates; }; /// This class defines information used to lower LLVM code to legal SelectionDAG @@ -2971,31 +2994,35 @@ /// roots. /// Return a reciprocal square root estimate value for the input operand. - /// The RefinementSteps output is the number of Newton-Raphson refinement - /// iterations required to generate a sufficient (though not necessarily - /// IEEE-754 compliant) estimate for the value type. + /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or + /// 'Enabled' as set by a potential default override attribute. + /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson + /// refinement iterations required to generate a sufficient (though not + /// necessarily IEEE-754 compliant) estimate is returned in that parameter. /// The boolean UseOneConstNR output is used to select a Newton-Raphson /// algorithm implementation that uses one constant or two constants. /// A target may choose to implement its own refinement within this function. /// If that's true, then return '0' as the number of RefinementSteps to avoid /// any further refinement of the estimate. /// An empty SDValue return means no estimate sequence can be created. - virtual SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps, + virtual SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, + int Enabled, int &RefinementSteps, bool &UseOneConstNR) const { return SDValue(); } /// Return a reciprocal estimate value for the input operand. - /// The RefinementSteps output is the number of Newton-Raphson refinement - /// iterations required to generate a sufficient (though not necessarily - /// IEEE-754 compliant) estimate for the value type. + /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or + /// 'Enabled' as set by a potential default override attribute. + /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson + /// refinement iterations required to generate a sufficient (though not + /// necessarily IEEE-754 compliant) estimate is returned in that parameter. /// A target may choose to implement its own refinement within this function. /// If that's true, then return '0' as the number of RefinementSteps to avoid /// any further refinement of the estimate. /// An empty SDValue return means no estimate sequence can be created. - virtual SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const { + virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, + int Enabled, int &RefinementSteps) const { return SDValue(); } Index: llvm/trunk/include/llvm/Target/TargetOptions.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetOptions.h +++ llvm/trunk/include/llvm/Target/TargetOptions.h @@ -15,7 +15,6 @@ #ifndef LLVM_TARGET_TARGETOPTIONS_H #define LLVM_TARGET_TARGETOPTIONS_H -#include "llvm/Target/TargetRecip.h" #include "llvm/MC/MCTargetOptions.h" namespace llvm { Index: llvm/trunk/include/llvm/Target/TargetRecip.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetRecip.h +++ llvm/trunk/include/llvm/Target/TargetRecip.h @@ -1,71 +0,0 @@ -//===--------------------- llvm/Target/TargetRecip.h ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class is used to customize machine-specific reciprocal estimate code -// generation in a target-independent way. -// If a target does not support operations in this specification, then code -// generation will default to using supported operations. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_TARGETRECIP_H -#define LLVM_TARGET_TARGETRECIP_H - -#include -#include -#include -#include - -namespace llvm { - -class StringRef; - -struct TargetRecip { -public: - TargetRecip(); - - /// Parse a comma-separated string of reciprocal settings to set values in - /// this struct. - void set(StringRef &Args); - - /// Set enablement and refinement steps for a particular reciprocal operation. - /// Use "all" to give all operations the same values. - void set(StringRef Key, bool Enable, unsigned RefSteps); - - /// Return true if the reciprocal operation has been enabled. - bool isEnabled(StringRef Key) const; - - /// Return the number of iterations necessary to refine the - /// the result of a machine instruction for the given reciprocal operation. - unsigned getRefinementSteps(StringRef Key) const; - - bool operator==(const TargetRecip &Other) const; - -private: - // TODO: We should be able to use special values (enums) to simplify this into - // just an int, but we have to be careful because the user is allowed to - // specify "default" as a setting and just change the refinement step count. - struct RecipParams { - bool Enabled; - int8_t RefinementSteps; - - RecipParams() : Enabled(false), RefinementSteps(0) {} - }; - - std::map RecipMap; - typedef std::map::iterator RecipIter; - typedef std::map::const_iterator ConstRecipIter; - - bool parseGlobalParams(const std::string &Arg); - void parseIndividualParams(const std::vector &Args); -}; - -} // end namespace llvm - -#endif // LLVM_TARGET_TARGETRECIP_H Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14870,11 +14870,21 @@ if (Level >= AfterLegalizeDAG) return SDValue(); - // Expose the DAG combiner to the target combiner implementations. - TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + // TODO: Handle half and/or extended types? + EVT VT = Op.getValueType(); + if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + return SDValue(); - unsigned Iterations = 0; - if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { + // If estimates are explicitly disabled for this function, we're done. + MachineFunction &MF = DAG.getMachineFunction(); + int Enabled = TLI.getDivEnabled(VT, MF); + if (Enabled == TLI.ReciprocalEstimate::Disabled) + return SDValue(); + + // Estimates may be explicitly enabled for this type with a custom number of + // refinement steps. + int Iterations = TLI.getDivRefinementSteps(VT, MF); + if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { if (Iterations) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal, we need to find the zero of the function: @@ -14889,7 +14899,7 @@ AddToWorklist(Est.getNode()); // Newton iterations: Est = Est + Est (1 - Arg * Est) - for (unsigned i = 0; i < Iterations; ++i) { + for (int i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); AddToWorklist(NewEst.getNode()); @@ -15011,11 +15021,24 @@ if (Level >= AfterLegalizeDAG) return SDValue(); - // Expose the DAG combiner to the target combiner implementations. - TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); - unsigned Iterations = 0; + // TODO: Handle half and/or extended types? + EVT VT = Op.getValueType(); + if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + return SDValue(); + + // If estimates are explicitly disabled for this function, we're done. + MachineFunction &MF = DAG.getMachineFunction(); + int Enabled = TLI.getSqrtEnabled(VT, MF); + if (Enabled == TLI.ReciprocalEstimate::Disabled) + return SDValue(); + + // Estimates may be explicitly enabled for this type with a custom number of + // refinement steps. + int Iterations = TLI.getSqrtRefinementSteps(VT, MF); + bool UseOneConstNR = false; - if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { + if (SDValue Est = + TLI.getRsqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR)) { AddToWorklist(Est.getNode()); if (Iterations) { Est = UseOneConstNR Index: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp @@ -14,6 +14,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -837,7 +838,6 @@ InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple()); InitCmpLibcallCCs(CmpLibcallCCs); InitLibcallCallingConvs(LibcallCallingConvs); - ReciprocalEstimates.set("all", false, 0); } void TargetLoweringBase::initActions() { @@ -1485,22 +1485,6 @@ return MVT::i32; // return the default value } -TargetRecip -TargetLoweringBase::getTargetRecipForFunc(MachineFunction &MF) const { - const Function *F = MF.getFunction(); - StringRef RecipAttrName = "reciprocal-estimates"; - if (!F->hasFnAttribute(RecipAttrName)) - return ReciprocalEstimates; - - // Make a copy of the target's default reciprocal codegen settings. - TargetRecip Recips = ReciprocalEstimates; - - // Override any settings that are customized for this function. - StringRef RecipString = F->getFnAttribute(RecipAttrName).getValueAsString(); - Recips.set(RecipString); - return Recips; -} - /// getVectorTypeBreakdown - Vector types are broken down into some number of /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. @@ -1891,3 +1875,187 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { MaximumJumpTableSize = Val; } + +//===----------------------------------------------------------------------===// +// Reciprocal Estimates +//===----------------------------------------------------------------------===// + +/// Get the reciprocal estimate attribute string for a function that will +/// override the target defaults. +static StringRef getRecipEstimateForFunc(MachineFunction &MF) { + const Function *F = MF.getFunction(); + StringRef RecipAttrName = "reciprocal-estimates"; + if (!F->hasFnAttribute(RecipAttrName)) + return StringRef(); + + return F->getFnAttribute(RecipAttrName).getValueAsString(); +} + +/// Construct a string for the given reciprocal operation of the given type. +/// This string should match the corresponding option to the front-end's +/// "-mrecip" flag assuming those strings have been passed through in an +/// attribute string. For example, "vec-divf" for a division of a vXf32. +static std::string getReciprocalOpName(bool IsSqrt, EVT VT) { + std::string Name = VT.isVector() ? "vec-" : ""; + + Name += IsSqrt ? "sqrt" : "div"; + + // TODO: Handle "half" or other float types? + if (VT.getScalarType() == MVT::f64) { + Name += "d"; + } else { + assert(VT.getScalarType() == MVT::f32 && + "Unexpected FP type for reciprocal estimate"); + Name += "f"; + } + + return Name; +} + +/// Return the character position and value (a single numeric character) of a +/// customized refinement operation in the input string if it exists. Return +/// false if there is no customized refinement step count. +static bool parseRefinementStep(StringRef In, size_t &Position, + uint8_t &Value) { + const char RefStepToken = ':'; + Position = In.find(RefStepToken); + if (Position == StringRef::npos) + return false; + + StringRef RefStepString = In.substr(Position + 1); + // Allow exactly one numeric character for the additional refinement + // step parameter. + if (RefStepString.size() == 1) { + char RefStepChar = RefStepString[0]; + if (RefStepChar >= '0' && RefStepChar <= '9') { + Value = RefStepChar - '0'; + return true; + } + } + report_fatal_error("Invalid refinement step for -recip."); +} + +/// For the input attribute string, return one of the ReciprocalEstimate enum +/// status values (enabled, disabled, or not specified) for this operation on +/// the specified data type. +static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) { + if (Override.empty()) + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + + SmallVector OverrideVector; + SplitString(Override, OverrideVector, ","); + unsigned NumArgs = OverrideVector.size(); + + // Check if "all", "none", or "default" was specified. + if (NumArgs == 1) { + // Look for an optional setting of the number of refinement steps needed + // for this type of reciprocal operation. + size_t RefPos; + uint8_t RefSteps; + if (parseRefinementStep(Override, RefPos, RefSteps)) { + // Split the string for further processing. + Override = Override.substr(0, RefPos); + } + + // All reciprocal types are enabled. + if (Override == "all") + return TargetLoweringBase::ReciprocalEstimate::Enabled; + + // All reciprocal types are disabled. + if (Override == "none") + return TargetLoweringBase::ReciprocalEstimate::Disabled; + + // Target defaults for enablement are used. + if (Override == "default") + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + } + + // The attribute string may omit the size suffix ('f'/'d'). + StringRef VTName = getReciprocalOpName(IsSqrt, VT); + StringRef VTNameNoSize = VTName.drop_back(); + static const char DisabledPrefix = '!'; + + for (StringRef RecipType : OverrideVector) { + size_t RefPos; + uint8_t RefSteps; + if (parseRefinementStep(RecipType, RefPos, RefSteps)) + RecipType = RecipType.substr(0, RefPos); + + // Ignore the disablement token for string matching. + bool IsDisabled = RecipType[0] == DisabledPrefix; + if (IsDisabled) + RecipType = RecipType.substr(1); + + if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize)) + return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled + : TargetLoweringBase::ReciprocalEstimate::Enabled; + } + + return TargetLoweringBase::ReciprocalEstimate::Unspecified; +} + +/// For the input attribute string, return the customized refinement step count +/// for this operation on the specified data type. If the step count does not +/// exist, return the ReciprocalEstimate enum value for unspecified. +static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) { + if (Override.empty()) + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + + SmallVector OverrideVector; + SplitString(Override, OverrideVector, ","); + unsigned NumArgs = OverrideVector.size(); + + // Check if "all", "default", or "none" was specified. + if (NumArgs == 1) { + // Look for an optional setting of the number of refinement steps needed + // for this type of reciprocal operation. + size_t RefPos; + uint8_t RefSteps; + if (!parseRefinementStep(Override, RefPos, RefSteps)) + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + + // Split the string for further processing. + Override = Override.substr(0, RefPos); + assert(Override != "none" && + "Disabled reciprocals, but specifed refinement steps?"); + + // If this is a general override, return the specified number of steps. + if (Override == "all" || Override == "default") + return RefSteps; + } + + // The attribute string may omit the size suffix ('f'/'d'). + StringRef VTName = getReciprocalOpName(IsSqrt, VT); + StringRef VTNameNoSize = VTName.drop_back(); + + for (StringRef RecipType : OverrideVector) { + size_t RefPos; + uint8_t RefSteps; + if (!parseRefinementStep(RecipType, RefPos, RefSteps)) + continue; + + RecipType = RecipType.substr(0, RefPos); + if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize)) + return RefSteps; + } + + return TargetLoweringBase::ReciprocalEstimate::Unspecified; +} + +int TargetLoweringBase::getSqrtEnabled(EVT VT, MachineFunction &MF) const { + return getOpEnabled(true, VT, getRecipEstimateForFunc(MF)); +} + +int TargetLoweringBase::getDivEnabled(EVT VT, MachineFunction &MF) const { + return getOpEnabled(false, VT, getRecipEstimateForFunc(MF)); +} + +int TargetLoweringBase::getSqrtRefinementSteps(EVT VT, + MachineFunction &MF) const { + return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF)); +} + +int TargetLoweringBase::getDivRefinementSteps(EVT VT, + MachineFunction &MF) const { + return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF)); +} Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -164,13 +164,11 @@ bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override { return true; } - SDValue getRsqrtEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps, + SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps, bool &UseOneConstNR) const override; - SDValue getRecipEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const override; + SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps) const override; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const = 0; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2859,10 +2859,9 @@ } SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps, + SelectionDAG &DAG, int Enabled, + int &RefinementSteps, bool &UseOneConstNR) const { - SelectionDAG &DAG = DCI.DAG; EVT VT = Operand.getValueType(); if (VT == MVT::f32) { @@ -2877,9 +2876,8 @@ } SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const { - SelectionDAG &DAG = DCI.DAG; + SelectionDAG &DAG, int Enabled, + int &RefinementSteps) const { EVT VT = Operand.getValueType(); if (VT == MVT::f32) { Index: llvm/trunk/lib/Target/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/CMakeLists.txt +++ llvm/trunk/lib/Target/CMakeLists.txt @@ -6,7 +6,6 @@ TargetLoweringObjectFile.cpp TargetMachine.cpp TargetMachineC.cpp - TargetRecip.cpp TargetSubtargetInfo.cpp ADDITIONAL_HEADER_DIRS Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -967,11 +967,11 @@ SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps, + SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps, bool &UseOneConstNR) const override; - SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const override; + SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps) const override; unsigned combineRepeatedFPDivisors() const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -901,23 +901,6 @@ setTargetDAGCombine(ISD::FSQRT); } - // For the estimates, convergence is quadratic, so we essentially double the - // number of digits correct after every iteration. For both FRE and FRSQRTE, - // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), - // this is 2^-14. IEEE float has 23 digits and double has 52 digits. - unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3, - RefinementSteps64 = RefinementSteps + 1; - - ReciprocalEstimates.set("sqrtf", true, RefinementSteps); - ReciprocalEstimates.set("vec-sqrtf", true, RefinementSteps); - ReciprocalEstimates.set("divf", true, RefinementSteps); - ReciprocalEstimates.set("vec-divf", true, RefinementSteps); - - ReciprocalEstimates.set("sqrtd", true, RefinementSteps64); - ReciprocalEstimates.set("vec-sqrtd", true, RefinementSteps64); - ReciprocalEstimates.set("divd", true, RefinementSteps64); - ReciprocalEstimates.set("vec-divd", true, RefinementSteps64); - // Darwin long double math library functions have $LDBL128 appended. if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); @@ -9639,22 +9622,19 @@ // Target Optimization Hooks //===----------------------------------------------------------------------===// -static std::string getRecipOp(const char *Base, EVT VT) { - std::string RecipOp(Base); +static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { + // For the estimates, convergence is quadratic, so we essentially double the + // number of digits correct after every iteration. For both FRE and FRSQRTE, + // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), + // this is 2^-14. IEEE float has 23 digits and double has 52 digits. + int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) - RecipOp += "d"; - else - RecipOp += "f"; - - if (VT.isVector()) - RecipOp = "vec-" + RecipOp; - - return RecipOp; + RefinementSteps++; + return RefinementSteps; } -SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps, +SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, + int Enabled, int &RefinementSteps, bool &UseOneConstNR) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || @@ -9663,21 +9643,18 @@ (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - std::string RecipOp = getRecipOp("sqrt", VT); - if (!Recips.isEnabled(RecipOp)) - return SDValue(); + if (RefinementSteps == ReciprocalEstimate::Unspecified) + RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); - RefinementSteps = Recips.getRefinementSteps(RecipOp); UseOneConstNR = true; - return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); + return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } return SDValue(); } -SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const { +SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, + int Enabled, + int &RefinementSteps) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRES()) || (VT == MVT::f64 && Subtarget.hasFRE()) || @@ -9685,13 +9662,9 @@ (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - std::string RecipOp = getRecipOp("div", VT); - if (!Recips.isEnabled(RecipOp)) - return SDValue(); - - RefinementSteps = Recips.getRefinementSteps(RecipOp); - return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); + if (RefinementSteps == ReciprocalEstimate::Unspecified) + RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); + return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } return SDValue(); } Index: llvm/trunk/lib/Target/TargetRecip.cpp =================================================================== --- llvm/trunk/lib/Target/TargetRecip.cpp +++ llvm/trunk/lib/Target/TargetRecip.cpp @@ -1,210 +0,0 @@ -//===-------------------------- TargetRecip.cpp ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class is used to customize machine-specific reciprocal estimate code -// generation in a target-independent way. -// If a target does not support operations in this specification, then code -// generation will default to using supported operations. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetRecip.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -// These are the names of the individual reciprocal operations. These are -// the key strings for queries and command-line inputs. -// In addition, the command-line interface recognizes the global parameters -// "all", "none", and "default". -static const char *const RecipOps[] = { - "divd", - "divf", - "vec-divd", - "vec-divf", - "sqrtd", - "sqrtf", - "vec-sqrtd", - "vec-sqrtf", -}; - -/// All operations are disabled by default and refinement steps are set to zero. -TargetRecip::TargetRecip() { - unsigned NumStrings = llvm::array_lengthof(RecipOps); - for (unsigned i = 0; i < NumStrings; ++i) - RecipMap.insert(std::make_pair(RecipOps[i], RecipParams())); -} - -static bool parseRefinementStep(StringRef In, size_t &Position, - uint8_t &Value) { - const char RefStepToken = ':'; - Position = In.find(RefStepToken); - if (Position == StringRef::npos) - return false; - - StringRef RefStepString = In.substr(Position + 1); - // Allow exactly one numeric character for the additional refinement - // step parameter. - if (RefStepString.size() == 1) { - char RefStepChar = RefStepString[0]; - if (RefStepChar >= '0' && RefStepChar <= '9') { - Value = RefStepChar - '0'; - return true; - } - } - report_fatal_error("Invalid refinement step for -recip."); -} - -bool TargetRecip::parseGlobalParams(const std::string &Arg) { - StringRef ArgSub = Arg; - - // Look for an optional setting of the number of refinement steps needed - // for this type of reciprocal operation. - size_t RefPos; - uint8_t RefSteps; - StringRef RefStepString; - if (parseRefinementStep(ArgSub, RefPos, RefSteps)) { - // Split the string for further processing. - RefStepString = ArgSub.substr(RefPos + 1); - ArgSub = ArgSub.substr(0, RefPos); - } - bool Enable; - bool UseDefaults; - if (ArgSub == "all") { - UseDefaults = false; - Enable = true; - } else if (ArgSub == "none") { - UseDefaults = false; - Enable = false; - } else if (ArgSub == "default") { - UseDefaults = true; - } else { - // Any other string is invalid or an individual setting. - return false; - } - - // All enable values will be initialized to target defaults if 'default' was - // specified. - if (!UseDefaults) - for (auto &KV : RecipMap) - KV.second.Enabled = Enable; - - // Custom refinement count was specified with all, none, or default. - if (!RefStepString.empty()) - for (auto &KV : RecipMap) - KV.second.RefinementSteps = RefSteps; - - return true; -} - -void TargetRecip::parseIndividualParams(const std::vector &Args) { - static const char DisabledPrefix = '!'; - unsigned NumArgs = Args.size(); - - for (unsigned i = 0; i != NumArgs; ++i) { - StringRef Val = Args[i]; - - bool IsDisabled = Val[0] == DisabledPrefix; - // Ignore the disablement token for string matching. - if (IsDisabled) - Val = Val.substr(1); - - size_t RefPos; - uint8_t RefSteps; - StringRef RefStepString; - if (parseRefinementStep(Val, RefPos, RefSteps)) { - // Split the string for further processing. - RefStepString = Val.substr(RefPos + 1); - Val = Val.substr(0, RefPos); - } - - RecipIter Iter = RecipMap.find(Val); - if (Iter == RecipMap.end()) { - // Try again specifying float suffix. - Iter = RecipMap.find(Val.str() + 'f'); - if (Iter == RecipMap.end()) { - Iter = RecipMap.find(Val.str() + 'd'); - assert(Iter == RecipMap.end() && "Float entry missing from map"); - report_fatal_error("Invalid option for -recip."); - } - } - - // Mark the matched option as found. Do not allow duplicate specifiers. - Iter->second.Enabled = !IsDisabled; - if (!RefStepString.empty()) - Iter->second.RefinementSteps = RefSteps; - - // If the precision was not specified, the double entry is also initialized. - if (Val.back() != 'f' && Val.back() != 'd') { - RecipParams &Params = RecipMap[Val.str() + 'd']; - Params.Enabled = !IsDisabled; - if (!RefStepString.empty()) - Params.RefinementSteps = RefSteps; - } - } -} - -void TargetRecip::set(StringRef &RecipString) { - SmallVector RecipStringVector; - SplitString(RecipString, RecipStringVector, ","); - std::vector RecipVector; - for (unsigned i = 0; i < RecipStringVector.size(); ++i) - RecipVector.push_back(RecipStringVector[i].str()); - - unsigned NumArgs = RecipVector.size(); - - // Check if "all", "default", or "none" was specified. - if (NumArgs == 1 && parseGlobalParams(RecipVector[0])) - return; - - parseIndividualParams(RecipVector); -} - -bool TargetRecip::isEnabled(StringRef Key) const { - ConstRecipIter Iter = RecipMap.find(Key); - assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); - return Iter->second.Enabled; -} - -unsigned TargetRecip::getRefinementSteps(StringRef Key) const { - ConstRecipIter Iter = RecipMap.find(Key); - assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); - return Iter->second.RefinementSteps; -} - -void TargetRecip::set(StringRef Key, bool Enable, unsigned RefSteps) { - if (Key == "all") { - for (auto &KV : RecipMap) { - RecipParams &RP = KV.second; - RP.Enabled = Enable; - RP.RefinementSteps = RefSteps; - } - } else { - RecipParams &RP = RecipMap[Key]; - RP.Enabled = Enable; - RP.RefinementSteps = RefSteps; - } -} - -bool TargetRecip::operator==(const TargetRecip &Other) const { - for (const auto &KV : RecipMap) { - StringRef Op = KV.first; - const RecipParams &RP = KV.second; - const RecipParams &OtherRP = Other.RecipMap.find(Op)->second; - if (RP.RefinementSteps != OtherRP.RefinementSteps) - return false; - if (RP.Enabled != OtherRP.Enabled) - return false; - } - return true; -} Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -1266,13 +1266,13 @@ bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override; /// Use rsqrt* to speed up sqrt calculations. - SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps, + SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps, bool &UseOneConstNR) const override; /// Use rcp* to speed up fdiv calculations. - SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const override; + SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps) const override; /// Reassociate floating point divisions into multiply by reciprocal. unsigned combineRepeatedFPDivisors() const override; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -53,7 +53,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRecip.h" #include "X86IntrinsicsInfo.h" #include #include @@ -85,15 +84,6 @@ // X86-SSE is even stranger. It uses -1 or 0 for vector masks. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - // By default (and when -ffast-math is on), enable estimate codegen with 1 - // refinement step for floats (not doubles) except scalar division. Scalar - // division estimates are disabled because they break too much real-world - // code. These defaults are intended to match GCC behavior. - ReciprocalEstimates.set("sqrtf", true, 1); - ReciprocalEstimates.set("divf", false, 1); - ReciprocalEstimates.set("vec-sqrtf", true, 1); - ReciprocalEstimates.set("vec-divf", true, 1); - // For 64-bit, since we have so many registers, use the ILP scheduler. // For 32-bit, use the register pressure specific scheduling. // For Atom, always use ILP scheduling. @@ -15251,11 +15241,10 @@ /// The minimum architected relative accuracy is 2^-12. We need one /// Newton-Raphson step to have a good float result (24 bits of precision). SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps, + SelectionDAG &DAG, int Enabled, + int &RefinementSteps, bool &UseOneConstNR) const { EVT VT = Op.getValueType(); - const char *RecipOp; // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps. // TODO: Add support for AVX512 (v16f32). @@ -15264,30 +15253,24 @@ // instructions: convert to single, rsqrtss, convert back to double, refine // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if (VT == MVT::f32 && Subtarget.hasSSE1()) - RecipOp = "sqrtf"; - else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) || - (VT == MVT::v8f32 && Subtarget.hasAVX())) - RecipOp = "vec-sqrtf"; - else - return SDValue(); + if ((VT == MVT::f32 && Subtarget.hasSSE1()) || + (VT == MVT::v4f32 && Subtarget.hasSSE1()) || + (VT == MVT::v8f32 && Subtarget.hasAVX())) { + if (RefinementSteps == ReciprocalEstimate::Unspecified) + RefinementSteps = 1; - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - if (!Recips.isEnabled(RecipOp)) - return SDValue(); - - RefinementSteps = Recips.getRefinementSteps(RecipOp); - UseOneConstNR = false; - return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); + UseOneConstNR = false; + return DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); + } + return SDValue(); } /// The minimum architected relative accuracy is 2^-12. We need one /// Newton-Raphson step to have a good float result (24 bits of precision). -SDValue X86TargetLowering::getRecipEstimate(SDValue Op, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const { +SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG, + int Enabled, + int &RefinementSteps) const { EVT VT = Op.getValueType(); - const char *RecipOp; // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps. // TODO: Add support for AVX512 (v16f32). @@ -15296,20 +15279,22 @@ // 15 instructions: convert to single, rcpss, convert back to double, refine // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if (VT == MVT::f32 && Subtarget.hasSSE1()) - RecipOp = "divf"; - else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) || - (VT == MVT::v8f32 && Subtarget.hasAVX())) - RecipOp = "vec-divf"; - else - return SDValue(); - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - if (!Recips.isEnabled(RecipOp)) - return SDValue(); + if ((VT == MVT::f32 && Subtarget.hasSSE1()) || + (VT == MVT::v4f32 && Subtarget.hasSSE1()) || + (VT == MVT::v8f32 && Subtarget.hasAVX())) { + // Enable estimate codegen with 1 refinement step for vector division. + // Scalar division estimates are disabled because they break too much + // real-world code. These defaults are intended to match GCC behavior. + if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified) + return SDValue(); - RefinementSteps = Recips.getRefinementSteps(RecipOp); - return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); + if (RefinementSteps == ReciprocalEstimate::Unspecified) + RefinementSteps = 1; + + return DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); + } + return SDValue(); } /// If we have at least two divisions that use the same divisor, convert to