Index: clang/include/clang/CodeGen/CodeGenAction.h =================================================================== --- clang/include/clang/CodeGen/CodeGenAction.h +++ clang/include/clang/CodeGen/CodeGenAction.h @@ -23,11 +23,28 @@ class CodeGenAction : public ASTFrontendAction { private: + // Let BackendConsumer access LinkModule. + friend class BackendConsumer; + + /// Info about module to link into a module we're generating. + struct LinkModule { + /// The module to link in. + std::unique_ptr Module; + + /// If true, we set attributes on Module's functions according to our + /// CodeGenOptions and LangOptions, as though we were generating the + /// function ourselves. + bool PropagateAttrs; + + /// Bitwise combination of llvm::LinkerFlags used when we link the module. + unsigned LinkFlags; + }; + unsigned Act; std::unique_ptr TheModule; - // Vector of {Linker::Flags, Module*} pairs to specify bitcode - // modules to link in using corresponding linker flags. - SmallVector, 4> LinkModules; + + /// Bitcode modules to link in to our module. + SmallVector LinkModules; llvm::LLVMContext *VMContext; bool OwnsVMContext; @@ -49,13 +66,6 @@ public: ~CodeGenAction() override; - /// setLinkModule - Set the link module to be used by this action. If a link - /// module is not provided, and CodeGenOptions::LinkBitcodeFile is non-empty, - /// the action will load it from the specified file. - void addLinkModule(llvm::Module *Mod, unsigned LinkFlags) { - LinkModules.push_back(std::make_pair(LinkFlags, Mod)); - } - /// Take the generated LLVM module, for use after the action has been run. /// The result may be null on failure. std::unique_ptr takeModule(); Index: clang/include/clang/Frontend/CodeGenOptions.h =================================================================== --- clang/include/clang/Frontend/CodeGenOptions.h +++ clang/include/clang/Frontend/CodeGenOptions.h @@ -130,8 +130,19 @@ /// The float precision limit to use, if non-empty. std::string LimitFloatPrecision; - /// The name of the bitcode file to link before optzns. - std::vector> LinkBitcodeFiles; + struct BitcodeFileToLink { + /// The filename of the bitcode file to link in. + std::string Filename; + /// If true, we set attributes functions in the bitcode library according to + /// our CodeGenOptions, much as we set attrs on functions that we generate + /// ourselves. + bool PropagateAttrs = false; + /// Bitwise combination of llvm::Linker::Flags, passed to the LLVM linker. + unsigned LinkFlags = 0; + }; + + /// The files specified here are linked in to the module before optimizations. + std::vector LinkBitcodeFiles; /// The user provided name for the "main file", if non-empty. This is useful /// in situations where the input file name does not match the original input Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -1620,15 +1620,113 @@ FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } +void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs) { + // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed. + if (!HasOptnone) { + if (CodeGenOpts.OptimizeSize) + FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize); + if (CodeGenOpts.OptimizeSize == 2) + FuncAttrs.addAttribute(llvm::Attribute::MinSize); + } + + if (CodeGenOpts.DisableRedZone) + FuncAttrs.addAttribute(llvm::Attribute::NoRedZone); + if (CodeGenOpts.NoImplicitFloat) + FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat); + + if (AttrOnCallSite) { + // Attributes that should go on the call site only. + if (!CodeGenOpts.SimplifyLibCalls || + CodeGenOpts.isNoBuiltinFunc(Name.data())) + FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin); + if (!CodeGenOpts.TrapFuncName.empty()) + FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName); + } else { + // Attributes that should go on the function, but not the call site. + if (!CodeGenOpts.DisableFPElim) { + FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); + } else if (CodeGenOpts.OmitLeafFramePointer) { + FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); + FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); + } else { + FuncAttrs.addAttribute("no-frame-pointer-elim", "true"); + FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); + } + + FuncAttrs.addAttribute("less-precise-fpmad", + llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); + + if (!CodeGenOpts.FPDenormalMode.empty()) + FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode); + + FuncAttrs.addAttribute("no-trapping-math", + llvm::toStringRef(CodeGenOpts.NoTrappingMath)); + + // TODO: Are these all needed? + // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags. + FuncAttrs.addAttribute("no-infs-fp-math", + llvm::toStringRef(CodeGenOpts.NoInfsFPMath)); + FuncAttrs.addAttribute("no-nans-fp-math", + llvm::toStringRef(CodeGenOpts.NoNaNsFPMath)); + FuncAttrs.addAttribute("unsafe-fp-math", + llvm::toStringRef(CodeGenOpts.UnsafeFPMath)); + FuncAttrs.addAttribute("use-soft-float", + llvm::toStringRef(CodeGenOpts.SoftFloat)); + FuncAttrs.addAttribute("stack-protector-buffer-size", + llvm::utostr(CodeGenOpts.SSPBufferSize)); + FuncAttrs.addAttribute("no-signed-zeros-fp-math", + llvm::toStringRef(CodeGenOpts.NoSignedZeros)); + FuncAttrs.addAttribute( + "correctly-rounded-divide-sqrt-fp-math", + llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt)); + + // TODO: Reciprocal estimate codegen options should apply to instructions? + std::vector &Recips = getTarget().getTargetOpts().Reciprocals; + if (!Recips.empty()) + FuncAttrs.addAttribute("reciprocal-estimates", + llvm::join(Recips.begin(), Recips.end(), ",")); + + if (CodeGenOpts.StackRealignment) + FuncAttrs.addAttribute("stackrealign"); + if (CodeGenOpts.Backchain) + FuncAttrs.addAttribute("backchain"); + } + + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all functions and calls in CUDA as convergent + // (meaning, they may call an intrinsically convergent op, such as + // __syncthreads(), and so can't have certain optimizations applied around + // them). LLVM will remove this attribute where it safely can. + FuncAttrs.addAttribute(llvm::Attribute::Convergent); + + // Exceptions aren't supported in CUDA device code. + FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + + // Respect -fcuda-flush-denormals-to-zero. + if (getLangOpts().CUDADeviceFlushDenormalsToZero) + FuncAttrs.addAttribute("nvptx-f32ftz", "true"); + } +} + +void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { + llvm::AttrBuilder FuncAttrs; + ConstructDefaultFnAttrList(F.getName(), + F.hasFnAttribute(llvm::Attribute::OptimizeNone), + /* AttrOnCallsite = */ false, FuncAttrs); + llvm::AttributeSet AS = llvm::AttributeSet::get( + getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs); + F.addAttributes(llvm::AttributeSet::FunctionIndex, AS); +} + void CodeGenModule::ConstructAttributeList( StringRef Name, const CGFunctionInfo &FI, CGCalleeInfo CalleeInfo, AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite) { llvm::AttrBuilder FuncAttrs; llvm::AttrBuilder RetAttrs; - bool HasOptnone = false; CallingConv = FI.getEffectiveCallingConvention(); - if (FI.isNoReturn()) FuncAttrs.addAttribute(llvm::Attribute::NoReturn); @@ -1639,7 +1737,7 @@ const Decl *TargetDecl = CalleeInfo.getCalleeDecl(); - bool HasAnyX86InterruptAttr = false; + bool HasOptnone = false; // FIXME: handle sseregparm someday... if (TargetDecl) { if (TargetDecl->hasAttr()) @@ -1679,7 +1777,6 @@ if (TargetDecl->hasAttr()) RetAttrs.addAttribute(llvm::Attribute::NonNull); - HasAnyX86InterruptAttr = TargetDecl->hasAttr(); HasOptnone = TargetDecl->hasAttr(); if (auto *AllocSize = TargetDecl->getAttr()) { Optional NumElemsParam; @@ -1691,86 +1788,19 @@ } } - // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed. - if (!HasOptnone) { - if (CodeGenOpts.OptimizeSize) - FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize); - if (CodeGenOpts.OptimizeSize == 2) - FuncAttrs.addAttribute(llvm::Attribute::MinSize); - } + ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs); - if (CodeGenOpts.DisableRedZone) - FuncAttrs.addAttribute(llvm::Attribute::NoRedZone); - if (CodeGenOpts.NoImplicitFloat) - FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat); if (CodeGenOpts.EnableSegmentedStacks && !(TargetDecl && TargetDecl->hasAttr())) FuncAttrs.addAttribute("split-stack"); - if (AttrOnCallSite) { - // Attributes that should go on the call site only. - if (!CodeGenOpts.SimplifyLibCalls || - CodeGenOpts.isNoBuiltinFunc(Name.data())) - FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin); - if (!CodeGenOpts.TrapFuncName.empty()) - FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName); - } else { - // Attributes that should go on the function, but not the call site. - if (!CodeGenOpts.DisableFPElim) { - FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); - } else if (CodeGenOpts.OmitLeafFramePointer) { - FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); - FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); - } else { - FuncAttrs.addAttribute("no-frame-pointer-elim", "true"); - FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); - } - + if (!AttrOnCallSite) { bool DisableTailCalls = - CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr || - (TargetDecl && TargetDecl->hasAttr()); - FuncAttrs.addAttribute( - "disable-tail-calls", - llvm::toStringRef(DisableTailCalls)); - - FuncAttrs.addAttribute("less-precise-fpmad", - llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); - - if (!CodeGenOpts.FPDenormalMode.empty()) - FuncAttrs.addAttribute("denormal-fp-math", - CodeGenOpts.FPDenormalMode); - - FuncAttrs.addAttribute("no-trapping-math", - llvm::toStringRef(CodeGenOpts.NoTrappingMath)); - - // TODO: Are these all needed? - // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags. - FuncAttrs.addAttribute("no-infs-fp-math", - llvm::toStringRef(CodeGenOpts.NoInfsFPMath)); - FuncAttrs.addAttribute("no-nans-fp-math", - llvm::toStringRef(CodeGenOpts.NoNaNsFPMath)); - FuncAttrs.addAttribute("unsafe-fp-math", - llvm::toStringRef(CodeGenOpts.UnsafeFPMath)); - FuncAttrs.addAttribute("use-soft-float", - llvm::toStringRef(CodeGenOpts.SoftFloat)); - FuncAttrs.addAttribute("stack-protector-buffer-size", - llvm::utostr(CodeGenOpts.SSPBufferSize)); - FuncAttrs.addAttribute("no-signed-zeros-fp-math", - llvm::toStringRef(CodeGenOpts.NoSignedZeros)); - FuncAttrs.addAttribute( - "correctly-rounded-divide-sqrt-fp-math", - llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt)); - - // TODO: Reciprocal estimate codegen options should apply to instructions? - std::vector &Recips = getTarget().getTargetOpts().Reciprocals; - if (!Recips.empty()) - FuncAttrs.addAttribute("reciprocal-estimates", - llvm::join(Recips.begin(), Recips.end(), ",")); - - if (CodeGenOpts.StackRealignment) - FuncAttrs.addAttribute("stackrealign"); - if (CodeGenOpts.Backchain) - FuncAttrs.addAttribute("backchain"); + CodeGenOpts.DisableTailCalls || + (TargetDecl && (TargetDecl->hasAttr() || + TargetDecl->hasAttr())); + FuncAttrs.addAttribute("disable-tail-calls", + llvm::toStringRef(DisableTailCalls)); // Add target-cpu and target-features attributes to functions. If // we have a decl for the function and it has a target attribute then @@ -1819,21 +1849,6 @@ } } - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { - // Conservatively, mark all functions and calls in CUDA as convergent - // (meaning, they may call an intrinsically convergent op, such as - // __syncthreads(), and so can't have certain optimizations applied around - // them). LLVM will remove this attribute where it safely can. - FuncAttrs.addAttribute(llvm::Attribute::Convergent); - - // Exceptions aren't supported in CUDA device code. - FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); - - // Respect -fcuda-flush-denormals-to-zero. - if (getLangOpts().CUDADeviceFlushDenormalsToZero) - FuncAttrs.addAttribute("nvptx-f32ftz", "true"); - } - ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); QualType RetTy = FI.getReturnType(); Index: clang/lib/CodeGen/CodeGenAction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenAction.cpp +++ clang/lib/CodeGen/CodeGenAction.cpp @@ -7,6 +7,8 @@ // //===----------------------------------------------------------------------===// +#include "clang/CodeGen/CodeGenAction.h" +#include "CodeGenModule.h" #include "CoverageMappingGen.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" @@ -16,7 +18,6 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/BackendUtil.h" -#include "clang/CodeGen/CodeGenAction.h" #include "clang/CodeGen/ModuleBuilder.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" @@ -41,6 +42,8 @@ namespace clang { class BackendConsumer : public ASTConsumer { + using LinkModule = CodeGenAction::LinkModule; + virtual void anchor(); DiagnosticsEngine &Diags; BackendAction Action; @@ -61,43 +64,37 @@ std::unique_ptr Gen; - SmallVector>, 4> - LinkModules; + SmallVector LinkModules; // This is here so that the diagnostic printer knows the module a diagnostic // refers to. llvm::Module *CurLinkModule = nullptr; public: - BackendConsumer( - BackendAction Action, DiagnosticsEngine &Diags, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, const LangOptions &LangOpts, - bool TimePasses, const std::string &InFile, - const SmallVectorImpl> &LinkModules, - std::unique_ptr OS, LLVMContext &C, - CoverageSourceInfo *CoverageInfo = nullptr) + BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, + const HeaderSearchOptions &HeaderSearchOpts, + const PreprocessorOptions &PPOpts, + const CodeGenOptions &CodeGenOpts, + const TargetOptions &TargetOpts, + const LangOptions &LangOpts, bool TimePasses, + const std::string &InFile, + SmallVector LinkModules, + std::unique_ptr OS, LLVMContext &C, + CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(std::move(OS)), Context(nullptr), LLVMIRGeneration("irgen", "LLVM IR Generation Time"), LLVMIRGenerationRefCount(0), Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts, - CodeGenOpts, C, CoverageInfo)) { + CodeGenOpts, C, CoverageInfo)), + LinkModules(std::move(LinkModules)) { llvm::TimePassesIsEnabled = TimePasses; - for (auto &I : LinkModules) - this->LinkModules.push_back( - std::make_pair(I.first, std::unique_ptr(I.second))); } llvm::Module *getModule() const { return Gen->GetModule(); } std::unique_ptr takeModule() { return std::unique_ptr(Gen->ReleaseModule()); } - void releaseLinkModules() { - for (auto &I : LinkModules) - I.second.release(); - } void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override { Gen->HandleCXXStaticMemberVarInstantiation(VD); @@ -159,6 +156,21 @@ HandleTopLevelDecl(D); } + // Links each entry in LinkModules into our module. Returns true on error. + bool LinkInModules() { + for (auto &LM : LinkModules) { + if (LM.PropagateAttrs) + for (Function &F : *LM.Module) + Gen->CGM().AddDefaultFnAttrs(F); + + CurLinkModule = LM.Module.get(); + if (Linker::linkModules(*getModule(), std::move(LM.Module), + LM.LinkFlags)) + return true; + } + return false; // success + } + void HandleTranslationUnit(ASTContext &C) override { { PrettyStackTraceString CrashInfo("Per-file LLVM IR generation"); @@ -216,13 +228,9 @@ Ctx.setDiagnosticHotnessRequested(true); } - // Link LinkModule into this module if present, preserving its validity. - for (auto &I : LinkModules) { - unsigned LinkFlags = I.first; - CurLinkModule = I.second.get(); - if (Linker::linkModules(*getModule(), std::move(I.second), LinkFlags)) - return; - } + // Link each LinkModule into our module. + if (LinkInModules()) + return; EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef()); @@ -729,10 +737,6 @@ if (!getCompilerInstance().hasASTConsumer()) return; - // Take back ownership of link modules we passed to consumer. - if (!LinkModules.empty()) - BEConsumer->releaseLinkModules(); - // Steal the module from the consumer. TheModule = BEConsumer->takeModule(); } @@ -775,13 +779,12 @@ // Load bitcode modules to link with, if we need to. if (LinkModules.empty()) - for (auto &I : CI.getCodeGenOpts().LinkBitcodeFiles) { - const std::string &LinkBCFile = I.second; - - auto BCBuf = CI.getFileManager().getBufferForFile(LinkBCFile); + for (const CodeGenOptions::BitcodeFileToLink &F : + CI.getCodeGenOpts().LinkBitcodeFiles) { + auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); if (!BCBuf) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << LinkBCFile << BCBuf.getError().message(); + << F.Filename << BCBuf.getError().message(); LinkModules.clear(); return nullptr; } @@ -791,12 +794,13 @@ if (!ModuleOrErr) { handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << LinkBCFile << EIB.message(); + << F.Filename << EIB.message(); }); LinkModules.clear(); return nullptr; } - addLinkModule(ModuleOrErr.get().release(), I.first); + LinkModules.push_back( + {std::move(ModuleOrErr.get()), F.PropagateAttrs, F.LinkFlags}); } CoverageSourceInfo *CoverageInfo = nullptr; @@ -810,8 +814,8 @@ std::unique_ptr Result(new BackendConsumer( BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(), - CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules, - std::move(OS), *VMContext, CoverageInfo)); + CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, + std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo)); BEConsumer = Result.get(); return std::move(Result); } Index: clang/lib/CodeGen/CodeGenModule.h =================================================================== --- clang/lib/CodeGen/CodeGenModule.h +++ clang/lib/CodeGen/CodeGenModule.h @@ -1022,6 +1022,14 @@ CGCalleeInfo CalleeInfo, AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite); + /// Adds attributes to F according to our CodeGenOptions and LangOptions, as + /// though we had emitted it ourselves. We remove any attributes on F that + /// conflict with the attributes we add here. + /// + /// This is useful for adding attrs to bitcode modules that you want to link + /// with but don't control, such as CUDA's libdevice. + void AddDefaultFnAttrs(llvm::Function &F); + // Fills in the supplied string map with the set of target features for the // passed in function. void getFunctionFeatureMap(llvm::StringMap &FeatureMap, @@ -1303,6 +1311,12 @@ /// Check whether we can use a "simpler", more core exceptions personality /// function. void SimplifyPersonality(); + + /// Helper function for ConstructAttributeList and AddDefaultFnAttrs. + /// Constructs an AttrList for a function with the given properties. + void ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs); }; } // end namespace CodeGen } // end namespace clang Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -722,11 +722,16 @@ Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations); Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); for (auto A : Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_cuda_bitcode)) { - unsigned LinkFlags = llvm::Linker::Flags::None; - if (A->getOption().matches(OPT_mlink_cuda_bitcode)) - LinkFlags = llvm::Linker::Flags::LinkOnlyNeeded | - llvm::Linker::Flags::InternalizeLinkedSymbols; - Opts.LinkBitcodeFiles.push_back(std::make_pair(LinkFlags, A->getValue())); + CodeGenOptions::BitcodeFileToLink F; + F.Filename = A->getValue(); + if (A->getOption().matches(OPT_mlink_cuda_bitcode)) { + F.LinkFlags = llvm::Linker::Flags::LinkOnlyNeeded | + llvm::Linker::Flags::InternalizeLinkedSymbols; + // When linking CUDA bitcode, propagate function attributes so that + // e.g. libdevice gets fast-math attrs if we're building with fast-math. + F.PropagateAttrs = true; + } + Opts.LinkBitcodeFiles.push_back(F); } Opts.SanitizeCoverageType = getLastArgIntValue(Args, OPT_fsanitize_coverage_type, 0, Diags); Index: clang/test/CodeGenCUDA/propagate-metadata.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/propagate-metadata.cu @@ -0,0 +1,62 @@ +// Check that when we link a bitcode module into a file using +// -mlink-cuda-bitcode, we apply the same attributes to the functions in that +// bitcode module as we apply to functions we generate. +// +// In particular, we check that ftz and unsafe-math are propagated into the +// bitcode library as appropriate. +// +// In addition, we set -ftrapping-math on the bitcode library, but then set +// -fno-trapping-math on the main compilations, and ensure that the latter flag +// overrides the flag on the bitcode library. + +// Build the bitcode library. This is not built in CUDA mode, otherwise it +// might have incompatible attributes. This mirrors how libdevice is built. +// RUN: %clang_cc1 -x c++ -emit-llvm-bc -ftrapping-math -DLIB \ +// RUN: %s -o %t.bc -triple nvptx-unknown-unknown + +// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc -o - \ +// RUN: -fno-trapping-math -fcuda-is-device -triple nvptx-unknown-unknown \ +// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ --check-prefix=NOFAST + +// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc \ +// RUN: -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \ +// RUN: -fcuda-is-device -triple nvptx-unknown-unknown \ +// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ \ +// RUN: --check-prefix=NOFAST + +// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc \ +// RUN: -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \ +// RUN: -fcuda-is-device -menable-unsafe-fp-math -triple nvptx-unknown-unknown \ +// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FAST + +// Wrap everything in extern "C" so we don't ahve to worry about name mangling +// in the IR. +extern "C" { +#ifdef LIB + +// This function is defined in the library and only declared in the main +// compilation. +void lib_fn() {} + +#else + +#include "Inputs/cuda.h" +__device__ void lib_fn(); +__global__ void kernel() { lib_fn(); } + +#endif +} + +// The kernel and lib function should have the same attributes. +// CHECK: define void @kernel() [[attr:#[0-9]+]] +// CHECK: define internal void @lib_fn() [[attr]] + +// Check the attribute list. +// CHECK: attributes [[attr]] = { +// CHECK: "no-trapping-math"="true" + +// FTZ-SAME: "nvptx-f32ftz"="true" +// NOFTZ-NOT: "nvptx-f32ftz"="true" + +// FAST-SAME: "unsafe-fp-math"="true" +// NOFAST-NOT: "unsafe-fp-math"="true"