diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -322,6 +322,9 @@ bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; + bool HostFuncIsStrictFP; + + Instruction *cloneInstruction(BasicBlock::const_iterator II); public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, @@ -329,7 +332,10 @@ const char *nameSuffix, ClonedCodeInfo *codeInfo) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), - CodeInfo(codeInfo) {} + CodeInfo(codeInfo) { + HostFuncIsStrictFP = + newFunc->getAttributes().hasFnAttr(Attribute::StrictFP); + } /// The specified block is found to be reachable, clone it and /// anything that it can reach. @@ -338,6 +344,89 @@ }; } // namespace +static bool hasRoundingModeOperand(Intrinsic::ID CIID) { + switch (CIID) { +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ + case Intrinsic::INTRINSIC: \ + return ROUND_MODE == 1; +#define FUNCTION INSTRUCTION +#include "llvm/IR/ConstrainedOps.def" + default: + llvm_unreachable("Unexpected constrained intrinsic id"); + } +} + +Instruction * +PruningFunctionCloner::cloneInstruction(BasicBlock::const_iterator II) { + const Instruction &OldInst = *II; + Instruction *NewInst = nullptr; + if (HostFuncIsStrictFP) { + Intrinsic::ID CIID = getConstrainedIntrinsicID(OldInst); + if (CIID != Intrinsic::not_intrinsic) { + // Instead of cloning the instruction, a call to constrained intrinsic + // should be created. + // Assume the first arguments of constrained intrinsics are the same as + // the operands of original instruction. + + // Determine overloaded types of the intrinsic. + SmallVector TParams; + SmallVector Descriptor; + getIntrinsicInfoTableEntries(CIID, Descriptor); + for (unsigned I = 0, E = Descriptor.size(); I != E; ++I) { + Intrinsic::IITDescriptor Operand = Descriptor[I]; + switch (Operand.Kind) { + case Intrinsic::IITDescriptor::Argument: + if (Operand.getArgumentKind() != + Intrinsic::IITDescriptor::AK_MatchType) { + if (I == 0) + TParams.push_back(OldInst.getType()); + else + TParams.push_back(OldInst.getOperand(I - 1)->getType()); + } + break; + case Intrinsic::IITDescriptor::SameVecWidthArgument: + ++I; + break; + default: + break; + } + } + + // Create intrinsic call. + LLVMContext &Ctx = NewFunc->getContext(); + Function *IFn = + Intrinsic::getDeclaration(NewFunc->getParent(), CIID, TParams); + SmallVector Args; + unsigned NumOperands = OldInst.getNumOperands(); + if (isa(OldInst)) + --NumOperands; + for (unsigned I = 0; I < NumOperands; ++I) { + Value *Op = OldInst.getOperand(I); + Args.push_back(Op); + } + if (const auto *CmpI = dyn_cast(&OldInst)) { + FCmpInst::Predicate Pred = CmpI->getPredicate(); + StringRef PredName = FCmpInst::getPredicateName(Pred); + Args.push_back(MetadataAsValue::get(Ctx, MDString::get(Ctx, PredName))); + } + + // The last arguments of a constrained intrinsic are metadata that + // represent rounding mode (absents in some intrinsics) and exception + // behavior. The inlined function uses default settings. + if (hasRoundingModeOperand(CIID)) + Args.push_back( + MetadataAsValue::get(Ctx, MDString::get(Ctx, "round.tonearest"))); + Args.push_back( + MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore"))); + + NewInst = CallInst::Create(IFn, Args, OldInst.getName() + ".strict"); + } + } + if (!NewInst) + NewInst = II->clone(); + return NewInst; +} + /// The specified block is found to be reachable, clone it and /// anything that it can reach. void PruningFunctionCloner::CloneBlock( @@ -377,7 +466,14 @@ for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { - Instruction *NewInst = II->clone(); + Instruction *NewInst = cloneInstruction(II); + + if (HostFuncIsStrictFP) { + // All function calls in the inlined function must get 'strictfp' + // attribute to prevent undesirable optimizations. + if (auto *Call = dyn_cast(NewInst)) + Call->addFnAttr(Attribute::StrictFP); + } // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1788,6 +1788,13 @@ BasicBlock *OrigBB = CB.getParent(); Function *Caller = OrigBB->getParent(); + // Do not inline strictfp function into non-strictfp one. It would require + // conversion of all FP operations in host function to constrained intrinsics. + if (CalledFunc->getAttributes().hasFnAttr(Attribute::StrictFP) && + !Caller->getAttributes().hasFnAttr(Attribute::StrictFP)) { + return InlineResult::failure("incompatible strictfp attributes"); + } + // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. diff --git a/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll b/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll --- a/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll +++ b/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll @@ -73,11 +73,11 @@ attributes #0 = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory - sanitize_thread ssp sspreq sspstrong strictfp uwtable "foo"="bar" + sanitize_thread ssp sspreq sspstrong uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" } ; CHECK: attributes [[FN_ATTRS0]] = { ssp -; CHECK: attributes [[FN_ATTRS]] = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread ssp sspreq sspstrong strictfp uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" } +; CHECK: attributes [[FN_ATTRS]] = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread ssp sspreq sspstrong uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" } ; attributes to drop attributes #1 = { diff --git a/llvm/test/Transforms/Inline/inline-strictfp.ll b/llvm/test/Transforms/Inline/inline-strictfp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-strictfp.ll @@ -0,0 +1,145 @@ +; RUN: opt -inline %s -S | FileCheck %s + + +; Ordinary function is inlined into strictfp function. + +define float @inlined_01(float %a) { +entry: + %add = fadd float %a, %a + ret float %add +} + +define float @host_02(float %a) #0 { +entry: + %0 = call float @inlined_01(float %a) #0 + %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %add +; CHECK_LABEL: @host_02 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 +} + + +; strictfp function is inlined into another strictfp function. + +define float @inlined_03(float %a) #0 { +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.maytrap") #0 + ret float %add +} + +define float @host_04(float %a) #0 { +entry: + %0 = call float @inlined_03(float %a) #0 + %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %add +; CHECK_LABEL: @host_04 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.downward", metadata !"fpexcept.maytrap") #0 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 +} + + +; strictfp function is NOT inlined into ordinary function. + +define float @inlined_05(float %a) strictfp { +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.maytrap") #0 + ret float %add +} + +define float @host_06(float %a) { +entry: + %0 = call float @inlined_05(float %a) + %add = fadd float %0, 2.000000e+00 + ret float %add +; CHECK_LABEL: @host_06 +; CHECK: call float @inlined_05(float %a) +; CHECK: fadd float %0, 2.000000e+00 +} + + +; Calls in inlined function must get strictfp attribute. + +declare float @func_ext(float); + +define float @inlined_07(float %a) { +entry: + %0 = call float @func_ext(float %a) + %add = fadd float %0, %a + + ret float %add +} + +define float @host_08(float %a) #0 { +entry: + %0 = call float @inlined_07(float %a) #0 + %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %add +; CHECK_LABEL: @host_08 +; CHECK: call float @func_ext(float {{.*}}) #0 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 +} + + +; Cloning particular instructions. + +; fpext has two overloaded types. +define double @inlined_09(float %a) { +entry: + %t = fpext float %a to double + ret double %t +} + +define double @host_10(float %a) #0 { +entry: + %0 = call double @inlined_09(float %a) #0 + %add = call double @llvm.experimental.constrained.fadd.f64(double %0, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %add +; CHECK_LABEL: @host_10 +; CHECK: call double @llvm.experimental.constrained.fpext.f64.f32(float {{.*}}, metadata !"fpexcept.ignore") #0 +; CHECK: call double @llvm.experimental.constrained.fadd.f64(double {{.*}}, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 +} + +; fcmp does not depend on rounding mode and has metadata argument. +define i1 @inlined_11(float %a, float %b) { +entry: + %t = fcmp oeq float %a, %b + ret i1 %t +} + +define i1 @host_12(float %a, float %b) #0 { +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + %cmp = call i1 @inlined_11(float %a, float %b) #0 + ret i1 %cmp +; CHECK_LABEL: @host_12 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 +; CHECK: call i1 @llvm.experimental.constrained.fcmp.f32(float {{.*}}, metadata !"oeq", metadata !"fpexcept.ignore") #0 +} + +; Intrinsic 'ceil' has constrained variant. +define float @inlined_13(float %a) { +entry: + %t = call float @llvm.ceil.f32(float %a) + ret float %t +} + +define float @host_14(float %a) #0 { +entry: + %0 = call float @inlined_13(float %a) #0 + %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %add +; CHECK_LABEL: @host_14 +; CHECK: call float @llvm.experimental.constrained.ceil.f32(float %a, metadata !"fpexcept.ignore") #0 +; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 +} + +attributes #0 = { strictfp } + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.ceil.f32(float, metadata) +declare float @llvm.ceil.f32(float)