diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -46,6 +46,8 @@ DebugLoc DbgLoc; // 'dbg' Metadata cache. enum { + /// Indicates that this instruction depends on floating point environment. + DependsOnFPEnvironment = 1 << 14, /// This is a bit stored in the SubClassData field which indicates whether /// this instruction has metadata attached to it or not. HasMetadataBit = 1 << 15 @@ -139,6 +141,11 @@ return isIndirectTerminator(getOpcode()); } + /// Returns true if the instruction depends on floating point environment. + bool dependsOnFPEnvironment() const { + return (getSubclassDataFromValue() & DependsOnFPEnvironment) != 0; + } + static const char* getOpcodeName(unsigned OpCode); static inline bool isTerminator(unsigned OpCode) { @@ -754,20 +761,29 @@ (V ? HasMetadataBit : 0)); } + void setDependsOnFPEnvironment(bool V) { + setValueSubclassData( + (getSubclassDataFromValue() & ~DependsOnFPEnvironment) | + (V ? DependsOnFPEnvironment : 0)); + } + void setParent(BasicBlock *P); protected: // Instruction subclasses can stick up to 15 bits of stuff into the // SubclassData field of instruction with these members. - // Verify that only the low 15 bits are used. + // Verify that only the low 14 bits are used. void setInstructionSubclassData(unsigned short D) { - assert((D & HasMetadataBit) == 0 && "Out of range value put into field"); - setValueSubclassData((getSubclassDataFromValue() & HasMetadataBit) | D); + assert((D & (DependsOnFPEnvironment | HasMetadataBit)) == 0 && + "Out of range value put into field"); + setValueSubclassData((getSubclassDataFromValue() & + (DependsOnFPEnvironment | HasMetadataBit)) | D); } unsigned getSubclassDataFromInstruction() const { - return getSubclassDataFromValue() & ~HasMetadataBit; + return getSubclassDataFromValue() & + ~(DependsOnFPEnvironment | HasMetadataBit); } Instruction(Type *Ty, unsigned iType, Use *Ops, unsigned NumOps, diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/ADT/DenseSet.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/FloatingPoint.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" @@ -24,6 +25,9 @@ Instruction *InsertBefore) : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) { + if (getConstrainedIntrinsic(*this) != Intrinsic::not_intrinsic) + setDependsOnFPEnvironment(true); + // If requested, insert this instruction into a basic block... if (InsertBefore) { BasicBlock *BB = InsertBefore->getParent(); diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -279,6 +279,10 @@ bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; + bool HostFuncIsStrictFP; + bool InlinedFuncHasFPOps = false; + + Instruction *cloneInstruction(BasicBlock::const_iterator II); public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, @@ -286,7 +290,10 @@ const char *nameSuffix, ClonedCodeInfo *codeInfo) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), - CodeInfo(codeInfo) {} + CodeInfo(codeInfo) { + HostFuncIsStrictFP = newFunc->getAttributes().hasAttribute( + AttributeList::FunctionIndex, Attribute::StrictFP); + } /// The specified block is found to be reachable, clone it and /// anything that it can reach. @@ -296,6 +303,40 @@ }; } +Instruction * +PruningFunctionCloner::cloneInstruction(BasicBlock::const_iterator II) { + const Instruction &OldInst = *II; + Instruction *NewInst; + if (HostFuncIsStrictFP && OldInst.dependsOnFPEnvironment()) { + // Instead of cloning the instruction, a call to constrained intrinsic + // should be created. + Intrinsic::ID CIID = getConstrainedIntrinsic(OldInst); + assert(CIID != Intrinsic::not_intrinsic); + Function *IFn = Intrinsic::getDeclaration(NewFunc->getParent(), CIID, + OldInst.getType()); + + // Assume that the first arguments of constrained intrinsics are the same as + // the operands of original instruction. + SmallVector Args; + for (auto &Op : OldInst.operands()) + Args.push_back(Op); + + // The last two arguments of a constrained intrinsic are metadata that + // represent rounding mode and exception behavior. The inlined function + // uses default settings. + LLVMContext &Ctx = NewFunc->getContext(); + Value *RoundingMode = MetadataAsValue::get(Ctx, MDString::get(Ctx, "round.tonearest")); + Value *ExBehavior = MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore")); + Args.push_back(RoundingMode); + Args.push_back(ExBehavior); + + NewInst = CallInst::Create(IFn, Args, OldInst.getName() + ".strict"); + } else { + NewInst = II->clone(); + } + return NewInst; +} + /// The specified block is found to be reachable, clone it and /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, @@ -333,7 +374,7 @@ for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { - Instruction *NewInst = II->clone(); + Instruction *NewInst = cloneInstruction(II); // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1577,6 +1577,15 @@ BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); + // Do not inline strictfp function into non-strictfp for now. It would require + // conversion of all FP operations in host function to constrained intrinsics. + if (CalledFunc->getAttributes().hasAttribute( + AttributeList::FunctionIndex, Attribute::StrictFP) && + !Caller->getAttributes().hasAttribute( + AttributeList::FunctionIndex, Attribute::StrictFP)) { + return "incompatible strictfp attributes"; + } + // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. diff --git a/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll b/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll --- a/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll +++ b/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll @@ -73,10 +73,10 @@ attributes #0 = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory - sanitize_thread ssp sspreq sspstrong strictfp uwtable "foo"="bar" + sanitize_thread ssp sspreq sspstrong uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" } -; CHECK: attributes [[FN_ATTRS]] = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread ssp sspreq sspstrong strictfp uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" } +; CHECK: attributes [[FN_ATTRS]] = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread ssp sspreq sspstrong uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" } ; attributes to drop attributes #1 = { diff --git a/llvm/test/Transforms/Inline/inline_strictfp.ll b/llvm/test/Transforms/Inline/inline_strictfp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_strictfp.ll @@ -0,0 +1,63 @@ +; RUN: opt -inline %s -S | FileCheck %s + + +; Ordinary function is inlined into strictfp function. + +define float @func_01(float %a) { +entry: + %add = fadd float %a, %a + ret float %add +} + +define float @func_02(float %a) strictfp { +entry: + %0 = tail call float @func_01(float %a) + %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %add +; CHECK_LABEL: @func_02 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.tonearest", metadata !"fpexcept.ignore") +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") +} + + +; strictfp function is inlined into another strictfp function. + +define float @func_03(float %a) strictfp { +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.maytrap") + ret float %add +} + +define float @func_04(float %a) strictfp { +entry: + %0 = tail call float @func_03(float %a) + %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %add +; CHECK_LABEL: @func_04 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.downward", metadata !"fpexcept.maytrap") +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") +} + + +; strictfp function is NOT inlined into ordinary function. + +define float @func_05(float %a) strictfp { +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.maytrap") + ret float %add +} + +define float @func_06(float %a) { +entry: + %0 = tail call float @func_05(float %a) + %add = fadd float %0, 2.000000e+00 + ret float %add +; CHECK_LABEL: @func_06 +; CHECK: call float @func_05(float %a) +; CHECK: fadd float %0, 2.000000e+00 +} + + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #1 + +