Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -23,6 +23,7 @@ class Pass; class Target; class TargetMachine; +class TargetOptions; class PassRegistry; class Module; @@ -52,7 +53,7 @@ FunctionPass *createSIInsertWaitsPass(); FunctionPass *createSIInsertWaitcntsPass(); FunctionPass *createSIFixWWMLivenessPass(); -FunctionPass *createAMDGPUSimplifyLibCallsPass(); +FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &); FunctionPass *createAMDGPUUseNativeCallsPass(); FunctionPass *createAMDGPUCodeGenPreparePass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); Index: lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" #include #include @@ -168,10 +169,13 @@ AMDGPULibCalls Simplifier; + const TargetOptions Options; + public: static char ID; // Pass identification - AMDGPUSimplifyLibCalls() : FunctionPass(ID) { + AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions()) + : FunctionPass(ID), Options(Opt) { initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } @@ -1680,14 +1684,34 @@ } // Public interface to the Simplify LibCalls pass. -FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass() { - return new AMDGPUSimplifyLibCalls(); +FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) { + return new AMDGPUSimplifyLibCalls(Opt); } FunctionPass *llvm::createAMDGPUUseNativeCallsPass() { return new AMDGPUUseNativeCalls(); } +static bool setFastFlags(Function &F, const TargetOptions &Options) { + AttrBuilder B; + + if (Options.UnsafeFPMath || Options.NoInfsFPMath) + B.addAttribute("no-infs-fp-math", "true"); + if (Options.UnsafeFPMath || Options.NoNaNsFPMath) + B.addAttribute("no-nans-fp-math", "true"); + if (Options.UnsafeFPMath) { + B.addAttribute("less-precise-fpmad", "true"); + B.addAttribute("unsafe-fp-math", "true"); + } + + if (!B.hasAttributes()) + return false; + + F.addAttributes(AttributeList::FunctionIndex, B); + + return true; +} + bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) { if (skipFunction(F)) return false; @@ -1699,6 +1723,9 @@ F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); + if (!EnablePreLink) + Changed |= setFastFlags(F, Options); + for (auto &BB : F) { for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) { // Ignore non-calls. Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -370,17 +370,18 @@ PM.add(createAMDGPUAlwaysInlinePass(false)); }); + const auto &Opt = Options; Builder.addExtension( PassManagerBuilder::EP_EarlyAsPossible, - [AMDGPUAA, LibCallSimplify](const PassManagerBuilder &, - legacy::PassManagerBase &PM) { + [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &, + legacy::PassManagerBase &PM) { if (AMDGPUAA) { PM.add(createAMDGPUAAWrapperPass()); PM.add(createAMDGPUExternalAAWrapperPass()); } PM.add(llvm::createAMDGPUUseNativeCallsPass()); if (LibCallSimplify) - PM.add(llvm::createAMDGPUSimplifyLibCallsPass()); + PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt)); }); Builder.addExtension( Index: test/CodeGen/AMDGPU/inline-attr.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/inline-attr.ll @@ -0,0 +1,33 @@ +; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-unsafe-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=UNSAFE %s +; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-nans-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NONANS %s +; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s + +; GCN: define float @foo(float %x) local_unnamed_addr #0 { +; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 { +; GCN: %mul.i = fmul float %load, 1.500000e+01 + +; UNSAFE: attributes #0 = { norecurse nounwind readnone "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } +; UNSAFE: attributes #1 = { norecurse nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } + +; NOINFS: attributes #0 = { norecurse nounwind readnone "no-infs-fp-math"="true" } +; NOINFS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" } + +; NONANS: attributes #0 = { norecurse nounwind readnone "no-nans-fp-math"="true" } +; NONANS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" } + +define float @foo(float %x) #0 { +entry: + %mul = fmul float %x, 1.500000e+01 + ret float %mul +} + +define amdgpu_kernel void @caller(float addrspace(1)* %p) #1 { +entry: + %load = load float, float addrspace(1)* %p, align 4 + %call = call fast float @foo(float %load) #0 + store float %call, float addrspace(1)* %p, align 4 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }