Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -13187,7 +13187,6 @@ declare @llvm.experimental.constrained.frem( , , - metadata , metadata ) Overview: @@ -13204,10 +13203,7 @@ intrinsic must be :ref:`floating-point ` or :ref:`vector ` of floating-point values. Both arguments must have identical types. -The third and fourth arguments specify the rounding mode and exception -behavior as described above. The rounding mode argument has no effect, since -the result of frem is never rounded, but the argument is included for -consistency with the other constrained floating-point intrinsics. +The third argument specifies the exception behavior as described above. Semantics: """""""""" @@ -13252,6 +13248,141 @@ operand computed with infinite precision, and then rounded to the target precision. +'``llvm.experimental.constrained.fptoui``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fptoui( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fptoui``' intrinsic returns the result of a +conversion of a floating point operand to an unsigned integer. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.fptoui``' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +The result produced is an unsigned integer converted from the floating +point operand. The value is truncated, so it is rounded towards zero. + +'``llvm.experimental.constrained.fptosi``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fptosi( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fptosi``' intrinsic returns the result of a +conversion of a floating point operand to a signed integer. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.fptoui``' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +The result produced is a signed integer converted from the floating +point operand. The value is truncated, so it is rounded towards zero. + +'``llvm.experimental.constrained.fptrunc``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fptrunc( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fptrunc``' intrinsic returns the result of +a truncating of a floating point operand into a smaller floating point result. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.round``' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. This argument must be larger in size +than the result. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +The result produced is a floating point value truncated to be smaller in size +than the operand. + +'``llvm.experimental.constrained.fpext``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fpext( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fpext``' intrinsic returns the result of +an enlarging of a floating point operand. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.fpext`' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. This argument must be smaller in size +than the result. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +The result produced is a floating point value extended to be larger in size +than the operand. All restrictions that apply to the fpext instruction also +apply to this intrinsic. + Constrained libm-equivalent Intrinsics -------------------------------------- Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -531,6 +531,11 @@ /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type. FP_EXTEND, + STRICT_FP_TO_SINT, + STRICT_FP_TO_UINT, + STRICT_FP_ROUND, + STRICT_FP_EXTEND, + /// BITCAST - This operator converts between integer, vector and FP /// values, as if the value was stored to memory with one type and loaded /// from the same address with the other type (or equivalently for vector Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -437,6 +437,8 @@ /// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp FunctionPass *createCFIInstrInserter(); + // Experimental pass with transforms needed for strict fp + FunctionPass *createStrictFPPass(TargetMachine *); } // End llvm namespace #endif Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -647,6 +647,10 @@ case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: return true; } } Index: include/llvm/IR/IntrinsicInst.h =================================================================== --- include/llvm/IR/IntrinsicInst.h +++ include/llvm/IR/IntrinsicInst.h @@ -192,6 +192,10 @@ case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -493,7 +493,6 @@ def int_experimental_constrained_frem : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, - llvm_metadata_ty, llvm_metadata_ty ]>; def int_experimental_constrained_fma : Intrinsic<[ llvm_anyfloat_ty ], @@ -503,6 +502,22 @@ llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_fptosi : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + + def int_experimental_constrained_fptoui : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + + def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + + def int_experimental_constrained_fpext : Intrinsic<[ llvm_anyfloat_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + // These intrinsics are sensitive to the rounding mode so we need constrained // versions of each of them. When strict rounding and exception control are // not required the non-constrained versions of these intrinsics should be Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -363,6 +363,7 @@ void initializeStackProtectorPass(PassRegistry&); void initializeStackSlotColoringPass(PassRegistry&); void initializeStraightLineStrengthReducePass(PassRegistry&); +void initializeStrictFPPassPass(PassRegistry&); void initializeStripDeadDebugInfoPass(PassRegistry&); void initializeStripDeadPrototypesLegacyPassPass(PassRegistry&); void initializeStripDebugDeclarePass(PassRegistry&); Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -143,6 +143,7 @@ StackMaps.cpp StackProtector.cpp StackSlotColoring.cpp + StrictFP.cpp TailDuplication.cpp TailDuplicator.cpp TargetFrameLoweringImpl.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -94,6 +94,7 @@ initializeStackMapLivenessPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); + initializeStrictFPPassPass(Registry); initializeTailDuplicatePass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -965,6 +965,10 @@ case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; + case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break; + case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break; + case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; } auto Action = TLI.getOperationAction(EqOpc, VT); @@ -1135,6 +1139,10 @@ case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -3009,12 +3017,14 @@ break; } case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: case ISD::BITCAST: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getOperand(0).getValueType(), Node->getValueType(0), dl); @@ -3072,6 +3082,7 @@ Results.push_back(Tmp1); break; case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; @@ -3098,6 +3109,9 @@ Results.push_back(Tmp1); break; } + case ISD::STRICT_FP_TO_UINT: + llvm_unreachable("Expansion of STRICT_FP_TO_UINT missed in earlier pass!"); + break; case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7086,16 +7086,37 @@ NewOpc = ISD::FNEARBYINT; IsUnary = true; break; + case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; + case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; IsUnary = true; break; + case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; IsUnary = true; break; } // We're taking this node out of the chain, so we need to re-link things. - SDValue InputChain = Node->getOperand(0); - SDValue OutputChain = SDValue(Node, 1); - ReplaceAllUsesOfValueWith(OutputChain, InputChain); + if (OrigOpc != ISD::STRICT_FP_TO_SINT) { + SDValue InputChain = Node->getOperand(0); + SDValue OutputChain = SDValue(Node, 1); + ReplaceAllUsesOfValueWith(OutputChain, InputChain); + } - SDVTList VTs = getVTList(Node->getOperand(1).getValueType()); + SDVTList VTs; SDNode *Res = nullptr; - if (IsUnary) + + switch (OrigOpc) { + default: + VTs = getVTList(Node->getOperand(1).getValueType()); + break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: + VTs = getVTList(Node->ValueList[0]); + break; + } + + if (OrigOpc == ISD::STRICT_FP_TO_SINT) + Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(0) }); + else if (IsUnary) Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) }); else if (IsTernary) Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5508,6 +5508,10 @@ case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -6139,6 +6143,18 @@ case Intrinsic::experimental_constrained_fma: Opcode = ISD::STRICT_FMA; break; + case Intrinsic::experimental_constrained_fptosi: + Opcode = ISD::STRICT_FP_TO_SINT; + break; + case Intrinsic::experimental_constrained_fptoui: + Opcode = ISD::STRICT_FP_TO_UINT; + break; + case Intrinsic::experimental_constrained_fptrunc: + Opcode = ISD::STRICT_FP_ROUND; + break; + case Intrinsic::experimental_constrained_fpext: + Opcode = ISD::STRICT_FP_EXTEND; + break; case Intrinsic::experimental_constrained_sqrt: Opcode = ISD::STRICT_FSQRT; break; @@ -6180,12 +6196,16 @@ SDValue Chain = getRoot(); SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); - ValueVTs.push_back(MVT::Other); // Out chain + if (Opcode != ISD::STRICT_FP_TO_SINT) + ValueVTs.push_back(MVT::Other); // Out chain SDVTList VTs = DAG.getVTList(ValueVTs); SDValue Result; - if (FPI.isUnaryOp()) + if (Opcode == ISD::STRICT_FP_TO_SINT) Result = DAG.getNode(Opcode, sdl, VTs, + { getValue(FPI.getArgOperand(0)) }); + else if (FPI.isUnaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, { Chain, getValue(FPI.getArgOperand(0)) }); else if (FPI.isTernaryOp()) Result = DAG.getNode(Opcode, sdl, VTs, @@ -6197,9 +6217,11 @@ { Chain, getValue(FPI.getArgOperand(0)), getValue(FPI.getArgOperand(1)) }); - assert(Result.getNode()->getNumValues() == 2); - SDValue OutChain = Result.getValue(1); - DAG.setRoot(OutChain); + if (Opcode != ISD::STRICT_FP_TO_SINT) { + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + DAG.setRoot(OutChain); + } SDValue FPResult = Result.getValue(0); setValue(&FPI, FPResult); } Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -290,14 +290,18 @@ case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; + case ISD::STRICT_FP_ROUND: return "strict_fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; case ISD::FP_ROUND_INREG: return "fp_round_inreg"; case ISD::FP_EXTEND: return "fp_extend"; + case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; Index: lib/CodeGen/StrictFP.cpp =================================================================== --- lib/CodeGen/StrictFP.cpp +++ lib/CodeGen/StrictFP.cpp @@ -0,0 +1,206 @@ +//===----- StrictFP.cpp - Required transforms for strict FP ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains transforms necessary for strict floating point +/// operations. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "constrained-fp-transforms" + +STATISTIC(NumStrictFPOps, "Number of strict floating point ops transformed"); + +namespace { + +class StrictFPPass : public FunctionPass { +public: + static char ID; + + std::vector IntrinsicWorkList; + const DataLayout *DL; + TargetMachine *TM = nullptr; + + StrictFPPass() : FunctionPass(ID) { + initializeStrictFPPassPass(*PassRegistry::getPassRegistry()); + } + + StrictFPPass(TargetMachine *TM) : FunctionPass(ID), TM(TM) { + initializeStrictFPPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &) override; + +private: + void inspectIntrinsicCall(IntrinsicInst *, const TargetLowering *); + + bool processIntrinsicCall(LLVMContext &, IntrinsicInst *); + + void replaceConstrainedFPToUI(LLVMContext &, IntrinsicInst *); +}; + +bool StrictFPPass::runOnFunction(Function &F) { + bool Changed = false; + DL = &F.getParent()->getDataLayout(); + + LLVMContext &Context = F.getParent()->getContext(); + + auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + + for (auto &BB : F) { + for (auto &I : BB) { + if (auto *Call = dyn_cast(&I)) + inspectIntrinsicCall(Call, TLI); + } + } + + for (auto *I : IntrinsicWorkList) { + Changed |= processIntrinsicCall(Context, I); + } + + IntrinsicWorkList.clear(); + + return Changed; +} + +void StrictFPPass::inspectIntrinsicCall(IntrinsicInst *I, + const TargetLowering *TLI) { + + switch (Intrinsic::ID IID = I->getIntrinsicID()) { + default: + return; + case Intrinsic::experimental_constrained_fptoui: + Value *IntDst = cast(I); + Type *IntDstType = IntDst->getType(); + EVT VT = EVT::getEVT(IntDstType, true); + + auto Action = TLI->getOperationAction(ISD::FP_TO_UINT, VT); + + // We don't currently handle Custom or Promote for strict FP pseudo-ops. + // For now, we just expand for those cases. + if (Action != TargetLowering::Legal) + Action = TargetLowering::Expand; + + if (Action == TargetLowering::Expand) + IntrinsicWorkList.push_back(I); + + break; + } + return; +} + +bool StrictFPPass::processIntrinsicCall(LLVMContext &Context, + IntrinsicInst *Call) { + switch (Intrinsic::ID IID = Call->getIntrinsicID()) { + default: + return false; + case Intrinsic::experimental_constrained_fptoui: + replaceConstrainedFPToUI(Context, Call); + break; + } + return true; +} + +void StrictFPPass::replaceConstrainedFPToUI(LLVMContext &Context, + IntrinsicInst *I) { + + // Four blocks: + // #1 Gets the compare instruction, is the original block + // #2 Gets conversion instructions when in signed range + // #3 Conversion instructions when out of signed range + // #4 Gets the PHI plus the remainder of the original block + // + // The original call gets replaced with the PHI + + SmallVector Operands(I->arg_operands()); + Value *IntDst = cast(I); + Value *FPSrc = Operands[0]; + Value *ExBehavior = Operands[1]; + + auto *t = cast(I->getType()); + APInt IntMaxAP(DL->getTypeStoreSize(t) * 8, t->getSignBit()); + APFloat FPMaxAP((double)0); + FPMaxAP.convertFromAPInt(IntMaxAP, false, APFloat::rmNearestTiesToEven); + Constant *FPMaxSIntV = ConstantFP::get(FPSrc->getType(), FPMaxAP); + Constant *IntMaxSIntV = ConstantInt::get(IntDst->getType(), IntMaxAP); + + /* TODO: should this be FCMP_OLT? Ordered? */ + /* TODO: strict version of compare? */ + FCmpInst *FPCompare = new FCmpInst(I, FCmpInst::FCMP_ULT, FPSrc, FPMaxSIntV, + "not_too_high_sint"); + + TerminatorInst *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(FPCompare, I, &ThenTerm, &ElseTerm); + + BasicBlock *StraightConvBB = ThenTerm->getParent(); + BasicBlock *NotInRangeBB = ElseTerm->getParent(); + BasicBlock *ExitBB = I->getParent(); + Function *F2SI = Intrinsic::getDeclaration( + ExitBB->getModule(), Intrinsic::experimental_constrained_fptosi, + {I->getType(), FPSrc->getType()}); + Function *FSUB = Intrinsic::getDeclaration( + ExitBB->getModule(), Intrinsic::experimental_constrained_fsub, + FPSrc->getType()); + + MDString *SubRoundingMDS = MDString::get(Context, "round.dynamic"); + Value *SubRounding = MetadataAsValue::get(Context, SubRoundingMDS); + + CallInst *ThenSIntCall; + CallInst *ElseSIntCall; + Instruction *BiasedFPSrc; + Instruction *ElseSIntResult; + + ThenSIntCall = CallInst::Create(F2SI, {FPSrc, ExBehavior}, "", ThenTerm); + BranchInst::Create(ExitBB, StraightConvBB); + ThenTerm->eraseFromParent(); + + BiasedFPSrc = CallInst::Create( + FSUB, {FPSrc, FPMaxSIntV, SubRounding, ExBehavior}, "", ElseTerm); + ElseSIntCall = + CallInst::Create(F2SI, {BiasedFPSrc, ExBehavior}, "", ElseTerm); + ElseSIntResult = BinaryOperator::Create(BinaryOperator::Xor, ElseSIntCall, + IntMaxSIntV, "", ElseTerm); + BranchInst::Create(ExitBB, NotInRangeBB); + ElseTerm->eraseFromParent(); + + PHINode *PN = PHINode::Create(ElseSIntResult->getType(), 2, "", I); + PN->addIncoming(ThenSIntCall, ThenSIntCall->getParent()); + PN->addIncoming(ElseSIntResult, ElseSIntResult->getParent()); + I->replaceAllUsesWith(PN); + I->eraseFromParent(); + + ++NumStrictFPOps; +} + +} // End anonymous namespace + +char StrictFPPass::ID = 0; +INITIALIZE_PASS(StrictFPPass, DEBUG_TYPE, "Force constrained floating point", + false, false) + +FunctionPass *llvm::createStrictFPPass(TargetMachine *TM) { + return new StrictFPPass(TM); +} Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -162,6 +162,10 @@ clEnumValN(CFLAAType::Both, "both", "Enable both variants of CFL-AA"))); +static cl::opt + StrictFP("strict-fp-transforms", cl::init(true), cl::Hidden, + cl::desc("Enable transformations needed for strict FP.")); + /// Option names for limiting the codegen pipeline. /// Those are used in error reporting and we didn't want /// to duplicate their names all over the place. @@ -557,6 +561,10 @@ /// Add common target configurable passes that perform LLVM IR to IR transforms /// following machine independent optimization. void TargetPassConfig::addIRPasses() { + // Experimental pass with transforms needed for strict FP. + if (StrictFP) + addPass(createStrictFPPass(TM)); + switch (UseCFLAA) { case CFLAAType::Steensgaard: addPass(createCFLSteensAAWrapperPass()); Index: lib/IR/IntrinsicInst.cpp =================================================================== --- lib/IR/IntrinsicInst.cpp +++ lib/IR/IntrinsicInst.cpp @@ -134,6 +134,10 @@ switch (getIntrinsicID()) { default: return false; + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_sin: case Intrinsic::experimental_constrained_cos: Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -4039,6 +4039,10 @@ case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -4442,17 +4446,89 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { unsigned NumOperands = FPI.getNumArgOperands(); - Assert(((NumOperands == 5 && FPI.isTernaryOp()) || - (NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)), + bool HasExceptionMD = false; + bool HasRoundingMD = false; + switch (FPI.getIntrinsicID()) + { + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + Assert(((NumOperands == 5 && FPI.isTernaryOp()) || + (NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)), + "invalid arguments for constrained FP intrinsic", &FPI); + HasExceptionMD = true; + HasRoundingMD = true; + break; + + case Intrinsic::experimental_constrained_frem: + Assert((NumOperands == 3), "invalid arguments for constrained FP intrinsic", &FPI); - Assert(isa(FPI.getArgOperand(NumOperands-1)), - "invalid exception behavior argument", &FPI); - Assert(isa(FPI.getArgOperand(NumOperands-2)), - "invalid rounding mode argument", &FPI); - Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid, - "invalid rounding mode argument", &FPI); - Assert(FPI.getExceptionBehavior() != ConstrainedFPIntrinsic::ebInvalid, - "invalid exception behavior argument", &FPI); + HasExceptionMD = true; + break; + + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: { + Assert((NumOperands == 2), + "invalid arguments for constrained FP intrinsic", &FPI); + HasExceptionMD = true; + Value *Operand = FPI.getArgOperand(0); + Assert(Operand->getType()->isFloatingPointTy(), + "Constrained FP intrinsic first argument must be floating point", + &FPI); + Operand = &FPI; + Assert(Operand->getType()->isIntegerTy(), + "Constrained FP intrinsic result must be an integer", + &FPI); + } + break; + + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: { + Assert((NumOperands == 2), + "invalid arguments for constrained FP intrinsic", &FPI); + HasExceptionMD = true; + Value *Operand = FPI.getArgOperand(0); + Assert(Operand->getType()->isFloatingPointTy(), + "Constrained FP intrinsic first argument must be floating point", + &FPI); + Operand = &FPI; + Assert(Operand->getType()->isFloatingPointTy(), + "Constrained FP intrinsic result must be floating point", + &FPI); + } + break; + + default: + break; + } + + if (HasExceptionMD) { + Assert(isa(FPI.getArgOperand(NumOperands-1)), + "invalid exception behavior argument", &FPI); + Assert(FPI.getExceptionBehavior() != ConstrainedFPIntrinsic::ebInvalid, + "invalid exception behavior argument", &FPI); + } + if (HasRoundingMD) { + int RoundingOffset = (HasExceptionMD ? 2 : 1); + Assert(isa(FPI.getArgOperand(NumOperands-RoundingOffset)), + "invalid rounding mode argument", &FPI); + Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid, + "invalid rounding mode argument", &FPI); + } } void Verifier::visitDbgIntrinsic(StringRef Kind, DbgInfoIntrinsic &DII) { Index: test/CodeGen/AArch64/O0-pipeline.ll =================================================================== --- test/CodeGen/AArch64/O0-pipeline.ll +++ test/CodeGen/AArch64/O0-pipeline.ll @@ -16,6 +16,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand Atomic instructions +; CHECK-NEXT: Force constrained floating point ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Module Verifier Index: test/CodeGen/AArch64/O3-pipeline.ll =================================================================== --- test/CodeGen/AArch64/O3-pipeline.ll +++ test/CodeGen/AArch64/O3-pipeline.ll @@ -17,6 +17,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand Atomic instructions +; CHECK-NEXT: Force constrained floating point ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information Index: test/CodeGen/X86/O0-pipeline.ll =================================================================== --- test/CodeGen/X86/O0-pipeline.ll +++ test/CodeGen/X86/O0-pipeline.ll @@ -16,6 +16,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand Atomic instructions +; CHECK-NEXT: Force constrained floating point ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Module Verifier Index: test/CodeGen/X86/O3-pipeline.ll =================================================================== --- test/CodeGen/X86/O3-pipeline.ll +++ test/CodeGen/X86/O3-pipeline.ll @@ -17,6 +17,7 @@ ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand Atomic instructions +; CHECK-NEXT: Force constrained floating point ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Module Verifier Index: test/CodeGen/X86/fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/fp-intrinsics.ll +++ test/CodeGen/X86/fp-intrinsics.ll @@ -274,6 +274,56 @@ ret double %result } +; Verify that fptoui(42.1) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +; CHECK-LABEL: @f19 +; COMMON: movsd +; COMMON: subsd +define zeroext i32 @f19() { +entry: + %result = call zeroext i32 @llvm.experimental.constrained.fptoui.f64( + double 42.1, + metadata !"fpexcept.strict") + ret i32 %result +} + +; Verify that fptosi(42.1) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +; CHECK-LABEL: @f20 +; COMMON: cvttsd2si +define i32 @f20() { +entry: + %result = call i32 @llvm.experimental.constrained.fptosi.f64(double 42.1, + metadata !"fpexcept.strict") + ret i32 %result +} + +; Verify that round(42.1) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +; CHECK-LABEL: @f21 +; COMMON: cvtsd2ss +define float @f21() { +entry: + %result = call float @llvm.experimental.constrained.fptrunc.f32(double 42.1, + metadata !"fpexcept.strict") + ret float %result +} + +; Verify that fpext(42.1) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +; CHECK-LABEL: @f22 +; COMMON: cvtss2sd +define double @f22(float %x) { +entry: + %result = call double @llvm.experimental.constrained.fpext.f32(float %x, + metadata !"fpexcept.strict") + ret double %result +} + @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) @@ -293,3 +343,8 @@ declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare zeroext i32 @llvm.experimental.constrained.fptoui.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptosi.f64(double, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32(double, metadata) +declare double @llvm.experimental.constrained.fpext.f32(float, metadata) + Index: test/Feature/fp-intrinsics.ll =================================================================== --- test/Feature/fp-intrinsics.ll +++ test/Feature/fp-intrinsics.ll @@ -242,6 +242,51 @@ ret double %result } +; Verify that fptoui(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f18 +; CHECK: call zeroext i32 @llvm.experimental.constrained.fptoui +define zeroext i32 @f18() { +entry: + %result = call zeroext i32 @llvm.experimental.constrained.fptoui.f64( + double 42.1, + metadata !"fpexcept.strict") + ret i32 %result +} + +; Verify that fptosi(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f19 +; CHECK: call i32 @llvm.experimental.constrained.fptosi +define i32 @f19() { +entry: + %result = call i32 @llvm.experimental.constrained.fptosi.f64(double 42.1, + metadata !"fpexcept.strict") + ret i32 %result +} + +; Verify that fptrunc(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f20 +; CHECK: call float @llvm.experimental.constrained.fptrunc +define float @f20() { +entry: + %result = call float @llvm.experimental.constrained.fptrunc.f32(double 42.1, + metadata !"fpexcept.strict") + ret float %result +} + +; Verify that fpext(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f21 +; CHECK: call double @llvm.experimental.constrained.fpext +define double @f21() { +entry: + %result = call double @llvm.experimental.constrained.fpext.f64(double 42.1, + metadata !"fpexcept.strict") + ret double %result +} + @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) @@ -260,3 +305,7 @@ declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare zeroext i32 @llvm.experimental.constrained.fptoui.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptosi.f64(double, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32(double, metadata) +declare double @llvm.experimental.constrained.fpext.f64(double, metadata)