diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -279,6 +279,10 @@ bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; + bool HostFuncIsStrictFP; + bool InlinedFuncHasFPOps = false; + + Instruction *cloneInstruction(BasicBlock::const_iterator II); public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, @@ -286,7 +290,10 @@ const char *nameSuffix, ClonedCodeInfo *codeInfo) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), - CodeInfo(codeInfo) {} + CodeInfo(codeInfo) { + HostFuncIsStrictFP = newFunc->getAttributes().hasAttribute( + AttributeList::FunctionIndex, Attribute::StrictFP); + } /// The specified block is found to be reachable, clone it and /// anything that it can reach. @@ -296,6 +303,64 @@ }; } +static bool hasRoundingModeOperand(Intrinsic::ID CIID) { + switch (CIID) { +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ + case Intrinsic::INTRINSIC: return ROUND_MODE == 1; +#define FUNCTION INSTRUCTION +#include "llvm/IR/ConstrainedOps.def" + default: + llvm_unreachable("Unexpected constrained intrinsic id"); + } +} + +Instruction * +PruningFunctionCloner::cloneInstruction(BasicBlock::const_iterator II) { + const Instruction &OldInst = *II; + Instruction *NewInst = nullptr; + if (HostFuncIsStrictFP) { + Intrinsic::ID CIID = getConstrainedIntrinsicID(OldInst); + if (CIID != Intrinsic::not_intrinsic) { + // Instead of cloning the instruction, a call to constrained intrinsic + // should be created. + SmallVector TParams; + TParams.push_back(OldInst.getType()); + // Unlike other constrained intrinsics, functions corresponding to FPExt + // and FPTrunc have two template parameters. + if (CIID == Intrinsic::experimental_constrained_fpext || + CIID == Intrinsic::experimental_constrained_fptrunc) { + TParams.push_back(OldInst.getOperand(0)->getType()); + } + Function *IFn = Intrinsic::getDeclaration(NewFunc->getParent(), CIID, + TParams); + + // Assume that the first arguments of constrained intrinsics are the same + // as the operands of original instruction. + SmallVector Args; + for (auto &Op : OldInst.operands()) + Args.push_back(Op); + + // The last arguments of a constrained intrinsic are metadata that + // represent rounding mode (absents in some intrinsics) and exception + // behavior. The inlined function uses default settings. + LLVMContext &Ctx = NewFunc->getContext(); + if (hasRoundingModeOperand(CIID)) { + Value *RoundingMode = MetadataAsValue::get(Ctx, + MDString::get(Ctx, "round.tonearest")); + Args.push_back(RoundingMode); + } + Value *ExBehavior = MetadataAsValue::get(Ctx, + MDString::get(Ctx, "fpexcept.ignore")); + Args.push_back(ExBehavior); + + NewInst = CallInst::Create(IFn, Args, OldInst.getName() + ".strict"); + } + } + if (!NewInst) + NewInst = II->clone(); + return NewInst; +} + /// The specified block is found to be reachable, clone it and /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, @@ -333,7 +398,15 @@ for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { - Instruction *NewInst = II->clone(); + Instruction *NewInst = cloneInstruction(II); + + if (HostFuncIsStrictFP) { + // All function calls in the inlined function must get 'strictfp' + // attribute to prevent undesirable optimizations. + if (auto *Call = dyn_cast(NewInst)) { + Call->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP); + } + } // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1571,6 +1571,15 @@ BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); + // Do not inline strictfp function into non-strictfp for now. It would require + // conversion of all FP operations in host function to constrained intrinsics. + if (CalledFunc->getAttributes().hasAttribute( + AttributeList::FunctionIndex, Attribute::StrictFP) && + !Caller->getAttributes().hasAttribute( + AttributeList::FunctionIndex, Attribute::StrictFP)) { + return InlineResult::failure("incompatible strictfp attributes"); + } + // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. diff --git a/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll b/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll --- a/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll +++ b/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll @@ -73,10 +73,10 @@ attributes #0 = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory - sanitize_thread ssp sspreq sspstrong strictfp uwtable "foo"="bar" + sanitize_thread ssp sspreq sspstrong uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" } -; CHECK: attributes [[FN_ATTRS]] = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread ssp sspreq sspstrong strictfp uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" } +; CHECK: attributes [[FN_ATTRS]] = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread ssp sspreq sspstrong uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" } ; attributes to drop attributes #1 = { diff --git a/llvm/test/Transforms/Inline/inline_strictfp.ll b/llvm/test/Transforms/Inline/inline_strictfp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_strictfp.ll @@ -0,0 +1,107 @@ +; RUN: opt -inline %s -S | FileCheck %s + + +; Ordinary function is inlined into strictfp function. + +define float @func_01(float %a) { +entry: + %add = fadd float %a, %a + ret float %add +} + +define float @func_02(float %a) #0 { +entry: + %0 = tail call float @func_01(float %a) #0 + %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %add +; CHECK_LABEL: @func_02 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 +} + + +; strictfp function is inlined into another strictfp function. + +define float @func_03(float %a) #0 { +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.maytrap") #0 + ret float %add +} + +define float @func_04(float %a) #0 { +entry: + %0 = tail call float @func_03(float %a) #0 + %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %add +; CHECK_LABEL: @func_04 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.downward", metadata !"fpexcept.maytrap") #0 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 +} + + +; strictfp function is NOT inlined into ordinary function. + +define float @func_05(float %a) strictfp { +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.maytrap") #0 + ret float %add +} + +define float @func_06(float %a) { +entry: + %0 = tail call float @func_05(float %a) + %add = fadd float %0, 2.000000e+00 + ret float %add +; CHECK_LABEL: @func_06 +; CHECK: call float @func_05(float %a) +; CHECK: fadd float %0, 2.000000e+00 +} + + +; Calls in inlined function must get strictfp attribute. + +declare float @func_ext(float); + +define float @func_07(float %a) { +entry: + %0 = call float @func_ext(float %a) + %add = fadd float %0, %a + + ret float %add +} + +define float @func_08(float %a) #0 { +entry: + %0 = tail call float @func_07(float %a) #0 + %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %add +; CHECK_LABEL: @func_02 +; CHECK: call float @func_ext(float {{.*}}) #0 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 +} + + +; Some constrained intrinsics do not have rounding mode operand. + +define double @func_09(float %a) { +entry: + %t = fpext float %a to double + ret double %t +} + +define double @func_10(float %a) #0 { +entry: + %0 = tail call double @func_09(float %a) #0 + %add = call double @llvm.experimental.constrained.fadd.f64(double %0, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %add +; CHECK_LABEL: @func_02 +; CHECK: call double @llvm.experimental.constrained.fpext.f64.f32(float {{.*}}, metadata !"fpexcept.ignore") #0 +; CHECK: call double @llvm.experimental.constrained.fadd.f64(double {{.*}}, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 +} + +attributes #0 = { strictfp } + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)