Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -61,7 +61,6 @@ class MCSymbol; template class SmallVectorImpl; class DataLayout; - struct TargetRecip; class TargetRegisterClass; class TargetLibraryInfo; class TargetLoweringObjectFile; @@ -247,6 +246,37 @@ return false; } + /// Reciprocal estimate status values used by the functions below. + enum ReciprocalEstimate : int { + Unspecified = -1, + Disabled = 0, + Enabled = 1 + }; + + /// Return a ReciprocalEstimate enum value for a square root of the given type + /// based on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getSqrtEnabled(EVT VT, MachineFunction &MF) const; + + /// Return a ReciprocalEstimate enum value for a division of the given type + /// based on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getDivEnabled(EVT VT, MachineFunction &MF) const; + + /// Return the refinement step count for a square root of the given type based + /// on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; + + /// Return the refinement step count for a division of the given type based + /// on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; + /// Returns true if target has indicated at least one type should be bypassed. bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } @@ -539,12 +569,6 @@ } } - /// Return the reciprocal estimate code generation preferences for this target - /// after potentially overriding settings using the function's attributes. - /// FIXME: Like all unsafe-math target settings, this should really be an - /// instruction-level attribute/metadata/FMF. - TargetRecip getTargetRecipForFunc(MachineFunction &MF) const; - /// Vector types are broken down into some number of legal first class types. /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 @@ -2154,7 +2178,6 @@ /// sequence of memory operands that is recognized by PrologEpilogInserter. MachineBasicBlock *emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const; - TargetRecip ReciprocalEstimates; }; /// This class defines information used to lower LLVM code to legal SelectionDAG Index: include/llvm/Target/TargetOptions.h =================================================================== --- include/llvm/Target/TargetOptions.h +++ include/llvm/Target/TargetOptions.h @@ -15,7 +15,6 @@ #ifndef LLVM_TARGET_TARGETOPTIONS_H #define LLVM_TARGET_TARGETOPTIONS_H -#include "llvm/Target/TargetRecip.h" #include "llvm/MC/MCTargetOptions.h" namespace llvm { Index: include/llvm/Target/TargetRecip.h =================================================================== --- include/llvm/Target/TargetRecip.h +++ include/llvm/Target/TargetRecip.h @@ -1,71 +0,0 @@ -//===--------------------- llvm/Target/TargetRecip.h ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class is used to customize machine-specific reciprocal estimate code -// generation in a target-independent way. -// If a target does not support operations in this specification, then code -// generation will default to using supported operations. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_TARGETRECIP_H -#define LLVM_TARGET_TARGETRECIP_H - -#include -#include -#include -#include - -namespace llvm { - -class StringRef; - -struct TargetRecip { -public: - TargetRecip(); - - /// Parse a comma-separated string of reciprocal settings to set values in - /// this struct. - void set(StringRef &Args); - - /// Set enablement and refinement steps for a particular reciprocal operation. - /// Use "all" to give all operations the same values. - void set(StringRef Key, bool Enable, unsigned RefSteps); - - /// Return true if the reciprocal operation has been enabled. - bool isEnabled(StringRef Key) const; - - /// Return the number of iterations necessary to refine the - /// the result of a machine instruction for the given reciprocal operation. - unsigned getRefinementSteps(StringRef Key) const; - - bool operator==(const TargetRecip &Other) const; - -private: - // TODO: We should be able to use special values (enums) to simplify this into - // just an int, but we have to be careful because the user is allowed to - // specify "default" as a setting and just change the refinement step count. - struct RecipParams { - bool Enabled; - int8_t RefinementSteps; - - RecipParams() : Enabled(false), RefinementSteps(0) {} - }; - - std::map RecipMap; - typedef std::map::iterator RecipIter; - typedef std::map::const_iterator ConstRecipIter; - - bool parseGlobalParams(const std::string &Arg); - void parseIndividualParams(const std::vector &Args); -}; - -} // end namespace llvm - -#endif // LLVM_TARGET_TARGETRECIP_H Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -14,6 +14,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -837,7 +838,6 @@ InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple()); InitCmpLibcallCCs(CmpLibcallCCs); InitLibcallCallingConvs(LibcallCallingConvs); - ReciprocalEstimates.set("all", false, 0); } void TargetLoweringBase::initActions() { @@ -1485,22 +1485,6 @@ return MVT::i32; // return the default value } -TargetRecip -TargetLoweringBase::getTargetRecipForFunc(MachineFunction &MF) const { - const Function *F = MF.getFunction(); - StringRef RecipAttrName = "reciprocal-estimates"; - if (!F->hasFnAttribute(RecipAttrName)) - return ReciprocalEstimates; - - // Make a copy of the target's default reciprocal codegen settings. - TargetRecip Recips = ReciprocalEstimates; - - // Override any settings that are customized for this function. - StringRef RecipString = F->getFnAttribute(RecipAttrName).getValueAsString(); - Recips.set(RecipString); - return Recips; -} - /// getVectorTypeBreakdown - Vector types are broken down into some number of /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. @@ -1859,3 +1843,187 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { MaximumJumpTableSize = Val; } + +//===----------------------------------------------------------------------===// +// Reciprocal Estimates +//===----------------------------------------------------------------------===// + +/// Get the reciprocal estimate attribute string for a function that will +/// override the target defaults. +static StringRef getRecipEstimateForFunc(MachineFunction &MF) { + const Function *F = MF.getFunction(); + StringRef RecipAttrName = "reciprocal-estimates"; + if (!F->hasFnAttribute(RecipAttrName)) + return StringRef(); + + return F->getFnAttribute(RecipAttrName).getValueAsString(); +} + +/// Construct a string for the given reciprocal operation of the given type. +/// This string should match the corresponding option to the front-end's +/// "-mrecip" flag assuming those strings have been passed through in an +/// attribute string. For example, "vec-divf" for a division of a vXf32. +static std::string getReciprocalOpName(bool IsSqrt, EVT VT) { + std::string Name = VT.isVector() ? "vec-" : ""; + + Name += IsSqrt ? "sqrt" : "div"; + + // TODO: Handle "half" or other float types? + if (VT.getScalarType() == MVT::f64) { + Name += "d"; + } else { + assert(VT.getScalarType() == MVT::f32 && + "Unexpected FP type for reciprocal estimate"); + Name += "f"; + } + + return Name; +} + +/// Return the character position and value (a single numeric character) of a +/// customized refinement operation in the input string if it exists. Return +/// false if there is no customized refinement step count. +static bool parseRefinementStep(StringRef In, size_t &Position, + uint8_t &Value) { + const char RefStepToken = ':'; + Position = In.find(RefStepToken); + if (Position == StringRef::npos) + return false; + + StringRef RefStepString = In.substr(Position + 1); + // Allow exactly one numeric character for the additional refinement + // step parameter. + if (RefStepString.size() == 1) { + char RefStepChar = RefStepString[0]; + if (RefStepChar >= '0' && RefStepChar <= '9') { + Value = RefStepChar - '0'; + return true; + } + } + report_fatal_error("Invalid refinement step for -recip."); +} + +/// For the input attribute string, return one of the ReciprocalEstimate enum +/// status values (enabled, disabled, or not specified) for this operation on +/// the specified data type. +static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) { + if (Override.empty()) + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + + SmallVector OverrideVector; + SplitString(Override, OverrideVector, ","); + unsigned NumArgs = OverrideVector.size(); + + // Check if "all", "none", or "default" was specified. + if (NumArgs == 1) { + // Look for an optional setting of the number of refinement steps needed + // for this type of reciprocal operation. + size_t RefPos; + uint8_t RefSteps; + if (parseRefinementStep(Override, RefPos, RefSteps)) { + // Split the string for further processing. + Override = Override.substr(0, RefPos); + } + + // All reciprocal types are enabled. + if (Override == "all") + return TargetLoweringBase::ReciprocalEstimate::Enabled; + + // All reciprocal types are disabled. + if (Override == "none") + return TargetLoweringBase::ReciprocalEstimate::Disabled; + + // Target defaults for enablement are used. + if (Override == "default") + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + } + + // The attribute string may omit the size suffix ('f'/'d'). + StringRef VTName = getReciprocalOpName(IsSqrt, VT); + StringRef VTNameNoSize = VTName.drop_back(); + static const char DisabledPrefix = '!'; + + for (StringRef RecipType : OverrideVector) { + size_t RefPos; + uint8_t RefSteps; + if (parseRefinementStep(RecipType, RefPos, RefSteps)) + RecipType = RecipType.substr(0, RefPos); + + // Ignore the disablement token for string matching. + bool IsDisabled = RecipType[0] == DisabledPrefix; + if (IsDisabled) + RecipType = RecipType.substr(1); + + if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize)) + return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled + : TargetLoweringBase::ReciprocalEstimate::Enabled; + } + + return TargetLoweringBase::ReciprocalEstimate::Unspecified; +} + +/// For the input attribute string, return the customized refinement step count +/// for this operation on the specified data type. If the step count does not +/// exist, return the ReciprocalEstimate enum value for unspecified. +static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) { + if (Override.empty()) + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + + SmallVector OverrideVector; + SplitString(Override, OverrideVector, ","); + unsigned NumArgs = OverrideVector.size(); + + // Check if "all", "default", or "none" was specified. + if (NumArgs == 1) { + // Look for an optional setting of the number of refinement steps needed + // for this type of reciprocal operation. + size_t RefPos; + uint8_t RefSteps; + if (!parseRefinementStep(Override, RefPos, RefSteps)) + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + + // Split the string for further processing. + Override = Override.substr(0, RefPos); + assert(Override != "none" && + "Disabled reciprocals, but specifed refinement steps?"); + + // If this is a general override, return the specified number of steps. + if (Override == "all" || Override == "default") + return RefSteps; + } + + // The attribute string may omit the size suffix ('f'/'d'). + StringRef VTName = getReciprocalOpName(IsSqrt, VT); + StringRef VTNameNoSize = VTName.drop_back(); + + for (StringRef RecipType : OverrideVector) { + size_t RefPos; + uint8_t RefSteps; + if (!parseRefinementStep(RecipType, RefPos, RefSteps)) + continue; + + RecipType = RecipType.substr(0, RefPos); + if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize)) + return RefSteps; + } + + return TargetLoweringBase::ReciprocalEstimate::Unspecified; +} + +int TargetLoweringBase::getSqrtEnabled(EVT VT, MachineFunction &MF) const { + return getOpEnabled(true, VT, getRecipEstimateForFunc(MF)); +} + +int TargetLoweringBase::getDivEnabled(EVT VT, MachineFunction &MF) const { + return getOpEnabled(false, VT, getRecipEstimateForFunc(MF)); +} + +int TargetLoweringBase::getSqrtRefinementSteps(EVT VT, + MachineFunction &MF) const { + return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF)); +} + +int TargetLoweringBase::getDivRefinementSteps(EVT VT, + MachineFunction &MF) const { + return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF)); +} Index: lib/Target/CMakeLists.txt =================================================================== --- lib/Target/CMakeLists.txt +++ lib/Target/CMakeLists.txt @@ -6,7 +6,6 @@ TargetLoweringObjectFile.cpp TargetMachine.cpp TargetMachineC.cpp - TargetRecip.cpp TargetSubtargetInfo.cpp ADDITIONAL_HEADER_DIRS Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -901,23 +901,6 @@ setTargetDAGCombine(ISD::FSQRT); } - // For the estimates, convergence is quadratic, so we essentially double the - // number of digits correct after every iteration. For both FRE and FRSQRTE, - // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), - // this is 2^-14. IEEE float has 23 digits and double has 52 digits. - unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3, - RefinementSteps64 = RefinementSteps + 1; - - ReciprocalEstimates.set("sqrtf", true, RefinementSteps); - ReciprocalEstimates.set("vec-sqrtf", true, RefinementSteps); - ReciprocalEstimates.set("divf", true, RefinementSteps); - ReciprocalEstimates.set("vec-divf", true, RefinementSteps); - - ReciprocalEstimates.set("sqrtd", true, RefinementSteps64); - ReciprocalEstimates.set("vec-sqrtd", true, RefinementSteps64); - ReciprocalEstimates.set("divd", true, RefinementSteps64); - ReciprocalEstimates.set("vec-divd", true, RefinementSteps64); - // Darwin long double math library functions have $LDBL128 appended. if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); @@ -9639,19 +9622,6 @@ // Target Optimization Hooks //===----------------------------------------------------------------------===// -static std::string getRecipOp(const char *Base, EVT VT) { - std::string RecipOp(Base); - if (VT.getScalarType() == MVT::f64) - RecipOp += "d"; - else - RecipOp += "f"; - - if (VT.isVector()) - RecipOp = "vec-" + RecipOp; - - return RecipOp; -} - SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, @@ -9663,12 +9633,23 @@ (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - std::string RecipOp = getRecipOp("sqrt", VT); - if (!Recips.isEnabled(RecipOp)) + int Enabled = getSqrtEnabled(VT, DCI.DAG.getMachineFunction()); + if (Enabled == ReciprocalEstimate::Disabled) return SDValue(); - RefinementSteps = Recips.getRefinementSteps(RecipOp); + // Reciprocals may be explicitly enabled for this type with a custom + // number of refinement steps. + RefinementSteps = getSqrtRefinementSteps(VT, DCI.DAG.getMachineFunction()); + if ((int)RefinementSteps == ReciprocalEstimate::Unspecified) { + // For the estimates, convergence is quadratic, so we essentially double + // the number of digits correct after every iteration. For both FRE and + // FRSQRTE, the minimum architected relative accuracy is 2^-5. When + // hasRecipPrec(), this is 2^-14. IEEE float has 23 digits and double has + // 52 digits. + RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; + if (VT.getScalarType() == MVT::f64) + RefinementSteps++; + } UseOneConstNR = true; return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } @@ -9685,12 +9666,23 @@ (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - std::string RecipOp = getRecipOp("div", VT); - if (!Recips.isEnabled(RecipOp)) + int Enabled = getDivEnabled(VT, DCI.DAG.getMachineFunction()); + if (Enabled == ReciprocalEstimate::Disabled) return SDValue(); - RefinementSteps = Recips.getRefinementSteps(RecipOp); + // Reciprocals may be explicitly enabled for this type with a custom + // number of refinement steps. + RefinementSteps = getDivRefinementSteps(VT, DCI.DAG.getMachineFunction()); + if ((int)RefinementSteps == ReciprocalEstimate::Unspecified) { + // For the estimates, convergence is quadratic, so we essentially double + // the number of digits correct after every iteration. For both FRE and + // FRSQRTE, the minimum architected relative accuracy is 2^-5. When + // hasRecipPrec(), this is 2^-14. IEEE float has 23 digits and double has + // 52 digits. + RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; + if (VT.getScalarType() == MVT::f64) + RefinementSteps++; + } return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } return SDValue(); Index: lib/Target/TargetRecip.cpp =================================================================== --- lib/Target/TargetRecip.cpp +++ lib/Target/TargetRecip.cpp @@ -1,210 +0,0 @@ -//===-------------------------- TargetRecip.cpp ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class is used to customize machine-specific reciprocal estimate code -// generation in a target-independent way. -// If a target does not support operations in this specification, then code -// generation will default to using supported operations. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetRecip.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -// These are the names of the individual reciprocal operations. These are -// the key strings for queries and command-line inputs. -// In addition, the command-line interface recognizes the global parameters -// "all", "none", and "default". -static const char *const RecipOps[] = { - "divd", - "divf", - "vec-divd", - "vec-divf", - "sqrtd", - "sqrtf", - "vec-sqrtd", - "vec-sqrtf", -}; - -/// All operations are disabled by default and refinement steps are set to zero. -TargetRecip::TargetRecip() { - unsigned NumStrings = llvm::array_lengthof(RecipOps); - for (unsigned i = 0; i < NumStrings; ++i) - RecipMap.insert(std::make_pair(RecipOps[i], RecipParams())); -} - -static bool parseRefinementStep(StringRef In, size_t &Position, - uint8_t &Value) { - const char RefStepToken = ':'; - Position = In.find(RefStepToken); - if (Position == StringRef::npos) - return false; - - StringRef RefStepString = In.substr(Position + 1); - // Allow exactly one numeric character for the additional refinement - // step parameter. - if (RefStepString.size() == 1) { - char RefStepChar = RefStepString[0]; - if (RefStepChar >= '0' && RefStepChar <= '9') { - Value = RefStepChar - '0'; - return true; - } - } - report_fatal_error("Invalid refinement step for -recip."); -} - -bool TargetRecip::parseGlobalParams(const std::string &Arg) { - StringRef ArgSub = Arg; - - // Look for an optional setting of the number of refinement steps needed - // for this type of reciprocal operation. - size_t RefPos; - uint8_t RefSteps; - StringRef RefStepString; - if (parseRefinementStep(ArgSub, RefPos, RefSteps)) { - // Split the string for further processing. - RefStepString = ArgSub.substr(RefPos + 1); - ArgSub = ArgSub.substr(0, RefPos); - } - bool Enable; - bool UseDefaults; - if (ArgSub == "all") { - UseDefaults = false; - Enable = true; - } else if (ArgSub == "none") { - UseDefaults = false; - Enable = false; - } else if (ArgSub == "default") { - UseDefaults = true; - } else { - // Any other string is invalid or an individual setting. - return false; - } - - // All enable values will be initialized to target defaults if 'default' was - // specified. - if (!UseDefaults) - for (auto &KV : RecipMap) - KV.second.Enabled = Enable; - - // Custom refinement count was specified with all, none, or default. - if (!RefStepString.empty()) - for (auto &KV : RecipMap) - KV.second.RefinementSteps = RefSteps; - - return true; -} - -void TargetRecip::parseIndividualParams(const std::vector &Args) { - static const char DisabledPrefix = '!'; - unsigned NumArgs = Args.size(); - - for (unsigned i = 0; i != NumArgs; ++i) { - StringRef Val = Args[i]; - - bool IsDisabled = Val[0] == DisabledPrefix; - // Ignore the disablement token for string matching. - if (IsDisabled) - Val = Val.substr(1); - - size_t RefPos; - uint8_t RefSteps; - StringRef RefStepString; - if (parseRefinementStep(Val, RefPos, RefSteps)) { - // Split the string for further processing. - RefStepString = Val.substr(RefPos + 1); - Val = Val.substr(0, RefPos); - } - - RecipIter Iter = RecipMap.find(Val); - if (Iter == RecipMap.end()) { - // Try again specifying float suffix. - Iter = RecipMap.find(Val.str() + 'f'); - if (Iter == RecipMap.end()) { - Iter = RecipMap.find(Val.str() + 'd'); - assert(Iter == RecipMap.end() && "Float entry missing from map"); - report_fatal_error("Invalid option for -recip."); - } - } - - // Mark the matched option as found. Do not allow duplicate specifiers. - Iter->second.Enabled = !IsDisabled; - if (!RefStepString.empty()) - Iter->second.RefinementSteps = RefSteps; - - // If the precision was not specified, the double entry is also initialized. - if (Val.back() != 'f' && Val.back() != 'd') { - RecipParams &Params = RecipMap[Val.str() + 'd']; - Params.Enabled = !IsDisabled; - if (!RefStepString.empty()) - Params.RefinementSteps = RefSteps; - } - } -} - -void TargetRecip::set(StringRef &RecipString) { - SmallVector RecipStringVector; - SplitString(RecipString, RecipStringVector, ","); - std::vector RecipVector; - for (unsigned i = 0; i < RecipStringVector.size(); ++i) - RecipVector.push_back(RecipStringVector[i].str()); - - unsigned NumArgs = RecipVector.size(); - - // Check if "all", "default", or "none" was specified. - if (NumArgs == 1 && parseGlobalParams(RecipVector[0])) - return; - - parseIndividualParams(RecipVector); -} - -bool TargetRecip::isEnabled(StringRef Key) const { - ConstRecipIter Iter = RecipMap.find(Key); - assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); - return Iter->second.Enabled; -} - -unsigned TargetRecip::getRefinementSteps(StringRef Key) const { - ConstRecipIter Iter = RecipMap.find(Key); - assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); - return Iter->second.RefinementSteps; -} - -void TargetRecip::set(StringRef Key, bool Enable, unsigned RefSteps) { - if (Key == "all") { - for (auto &KV : RecipMap) { - RecipParams &RP = KV.second; - RP.Enabled = Enable; - RP.RefinementSteps = RefSteps; - } - } else { - RecipParams &RP = RecipMap[Key]; - RP.Enabled = Enable; - RP.RefinementSteps = RefSteps; - } -} - -bool TargetRecip::operator==(const TargetRecip &Other) const { - for (const auto &KV : RecipMap) { - StringRef Op = KV.first; - const RecipParams &RP = KV.second; - const RecipParams &OtherRP = Other.RecipMap.find(Op)->second; - if (RP.RefinementSteps != OtherRP.RefinementSteps) - return false; - if (RP.Enabled != OtherRP.Enabled) - return false; - } - return true; -} Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -53,7 +53,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRecip.h" #include "X86IntrinsicsInfo.h" #include #include @@ -85,15 +84,6 @@ // X86-SSE is even stranger. It uses -1 or 0 for vector masks. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - // By default (and when -ffast-math is on), enable estimate codegen with 1 - // refinement step for floats (not doubles) except scalar division. Scalar - // division estimates are disabled because they break too much real-world - // code. These defaults are intended to match GCC behavior. - ReciprocalEstimates.set("sqrtf", true, 1); - ReciprocalEstimates.set("divf", false, 1); - ReciprocalEstimates.set("vec-sqrtf", true, 1); - ReciprocalEstimates.set("vec-divf", true, 1); - // For 64-bit, since we have so many registers, use the ILP scheduler. // For 32-bit, use the register pressure specific scheduling. // For Atom, always use ILP scheduling. @@ -15199,7 +15189,6 @@ unsigned &RefinementSteps, bool &UseOneConstNR) const { EVT VT = Op.getValueType(); - const char *RecipOp; // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps. // TODO: Add support for AVX512 (v16f32). @@ -15208,21 +15197,23 @@ // instructions: convert to single, rsqrtss, convert back to double, refine // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if (VT == MVT::f32 && Subtarget.hasSSE1()) - RecipOp = "sqrtf"; - else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) || - (VT == MVT::v8f32 && Subtarget.hasAVX())) - RecipOp = "vec-sqrtf"; - else - return SDValue(); + if ((VT == MVT::f32 && Subtarget.hasSSE1()) || + (VT == MVT::v4f32 && Subtarget.hasSSE1()) || + (VT == MVT::v8f32 && Subtarget.hasAVX())) { + int Enabled = getSqrtEnabled(VT, DCI.DAG.getMachineFunction()); + if (Enabled == ReciprocalEstimate::Disabled) + return SDValue(); - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - if (!Recips.isEnabled(RecipOp)) - return SDValue(); + // Reciprocals may be explicitly enabled for this type with a custom number + // of refinement steps. + RefinementSteps = getSqrtRefinementSteps(VT, DCI.DAG.getMachineFunction()); + if ((int)RefinementSteps == ReciprocalEstimate::Unspecified) + RefinementSteps = 1; - RefinementSteps = Recips.getRefinementSteps(RecipOp); - UseOneConstNR = false; - return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); + UseOneConstNR = false; + return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); + } + return SDValue(); } /// The minimum architected relative accuracy is 2^-12. We need one @@ -15231,28 +15222,35 @@ DAGCombinerInfo &DCI, unsigned &RefinementSteps) const { EVT VT = Op.getValueType(); - const char *RecipOp; // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps. // TODO: Add support for AVX512 (v16f32). // It is likely not profitable to do this for f64 because a double-precision - // reciprocal estimate with refinement on x86 prior to FMA requires - // 15 instructions: convert to single, rcpss, convert back to double, refine + // reciprocal estimate with refinement on x86 prior to FMA requires 15 + // instructions: convert to single, rcpss, convert back to double, refine // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if (VT == MVT::f32 && Subtarget.hasSSE1()) - RecipOp = "divf"; - else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) || - (VT == MVT::v8f32 && Subtarget.hasAVX())) - RecipOp = "vec-divf"; - else - return SDValue(); - TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); - if (!Recips.isEnabled(RecipOp)) + // Enable estimate codegen with 1 refinement step for vector division. Scalar + // division estimates are disabled because they break too much real-world + // code. These defaults are intended to match GCC behavior. + int Enabled = getDivEnabled(VT, DCI.DAG.getMachineFunction()); + if (VT == MVT::f32 && Subtarget.hasSSE1()) { + if (Enabled != ReciprocalEstimate::Enabled) + return SDValue(); + } else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) || + (VT == MVT::v8f32 && Subtarget.hasAVX())) { + if (Enabled == ReciprocalEstimate::Disabled) + return SDValue(); + } else { return SDValue(); + } + // Reciprocals may be explicitly enabled for this type with a custom number + // of refinement steps. + RefinementSteps = getDivRefinementSteps(VT, DCI.DAG.getMachineFunction()); + if ((int)RefinementSteps == ReciprocalEstimate::Unspecified) + RefinementSteps = 1; - RefinementSteps = Recips.getRefinementSteps(RecipOp); return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); }