diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -936,6 +936,7 @@ if (getCodeGenOpts().SkipRaxSetup) getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1); + getTargetCodeGenInfo().emitTargetGlobals(*this); getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); EmitBackendOptionsMetadata(getCodeGenOpts()); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -76,6 +76,9 @@ CodeGen::CodeGenModule &CGM, const llvm::MapVector &MangledDeclNames) const {} + /// Provides a convenient hook to handle extra target-specific globals. + virtual void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const {} + /// Any further codegen related checks that need to be done on a function call /// in a target specific manner. virtual void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/IntrinsicsS390.h" #include "llvm/IR/Type.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" #include @@ -9288,6 +9289,8 @@ void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F, CodeGenModule &CGM) const; + void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override; + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; @@ -9403,6 +9406,79 @@ } } +/// Emits control constants used to change per-architecture behaviour in the +/// AMDGPU ROCm device libraries. +void AMDGPUTargetCodeGenInfo::emitTargetGlobals( + CodeGen::CodeGenModule &CGM) const { + if (!CGM.getTriple().isAMDGCN()) + return; + StringRef CPU = CGM.getTarget().getTargetOpts().CPU; + llvm::AMDGPU::GPUKind Kind = llvm::AMDGPU::parseArchAMDGCN(CPU); + unsigned Features = llvm::AMDGPU::getArchAttrAMDGCN(Kind); + if (Kind == llvm::AMDGPU::GK_NONE) + return; + + unsigned Minor; + unsigned Major; + StringRef Identifier = CPU.drop_while([](char C) { return !isDigit(C); }); + if (Identifier.take_back(2).getAsInteger(16, Minor) || + Identifier.drop_back(2).getAsInteger(10, Major)) + return; + + auto AddGlobal = [&](StringRef Name, unsigned Value, unsigned Size, + llvm::GlobalValue::LinkageTypes Linkage = + llvm::GlobalValue::LinkOnceAnyLinkage) { + if (CGM.getModule().getNamedGlobal(Name)) + return; + + auto *Type = + llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), Size); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), Type, true, + Linkage, + llvm::ConstantInt::get(Type, Value), Name, nullptr, + llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant)); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); + GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility); + GV->setAlignment(CGM.getDataLayout().getABITypeAlign(Type)); + }; + + // The wavefront size is 64 if defined by the target or explicitly specified + // by the user. + bool Wavefront64 = + !(Features & llvm::AMDGPU::FEATURE_WAVE32) || + llvm::is_contained(CGM.getTarget().getTargetOpts().FeaturesAsWritten, + "+wavefrontsize64"); + + // Different math flags set by the current floating point contract. + bool RelaxedMath = CGM.getLangOpts().FastMath; + bool UnsafeMath = CGM.getLangOpts().UnsafeFPMath; + bool DenormAreZero = CGM.getCodeGenOpts().FP32DenormalMode == + llvm::DenormalMode::getPreserveSign(); + bool FiniteOnly = + CGM.getLangOpts().NoHonorInfs && CGM.getLangOpts().NoHonorNaNs; + + // Set correct square root rounding depending on the target lanauge. + bool CorrectSqrt = CGM.getLangOpts().OpenCL + ? CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt + : CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt; + + // Control constants for math operations. + AddGlobal("__oclc_wavefrontsize64", Wavefront64, /*Size=*/8); + AddGlobal("__oclc_daz_opt", DenormAreZero, /*Size=*/8); + AddGlobal("__oclc_finite_only_opt", FiniteOnly || RelaxedMath, /*Size=*/8); + AddGlobal("__oclc_unsafe_math_opt", UnsafeMath || RelaxedMath, /*Size=*/8); + AddGlobal("__oclc_correctly_rounded_sqrt32", CorrectSqrt, /*Size=*/8); + + // Control constants for the system. + AddGlobal("__oclc_ISA_version", Minor + Major * 1000, /*Size=*/32, + llvm::GlobalValue::LinkOnceODRLinkage); + AddGlobal("__oclc_ABI_version", + CGM.getTarget().getTargetOpts().CodeObjectVersion, /*Size=*/32, + llvm::GlobalValue::LinkOnceODRLinkage); +} + void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (requiresAMDGPUProtectedVisibility(D, GV)) { diff --git a/clang/test/CodeGen/amdgcn-control-constants.c b/clang/test/CodeGen/amdgcn-control-constants.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/amdgcn-control-constants.c @@ -0,0 +1,49 @@ +// Check that we generate all the expected default features for the target. +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck %s --check-prefix=GFX90A +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1030 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=GFX1030 + +// GFX90A: @__oclc_wavefrontsize64 = linkonce hidden local_unnamed_addr addrspace(4) constant i8 1 +// GFX90A: @__oclc_daz_opt = linkonce hidden local_unnamed_addr addrspace(4) constant i8 0 +// GFX90A: @__oclc_finite_only_opt = linkonce hidden local_unnamed_addr addrspace(4) constant i8 0 +// GFX90A: @__oclc_unsafe_math_opt = linkonce hidden local_unnamed_addr addrspace(4) constant i8 0 +// GFX90A: @__oclc_correctly_rounded_sqrt32 = linkonce hidden local_unnamed_addr addrspace(4) constant i8 1 +// GFX90A: @__oclc_ISA_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 9010 +// GFX90A: @__oclc_ABI_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 400 + +// GFX1030: @__oclc_wavefrontsize64 = linkonce hidden local_unnamed_addr addrspace(4) constant i8 0 +// GFX1030: @__oclc_daz_opt = linkonce hidden local_unnamed_addr addrspace(4) constant i8 0 +// GFX1030: @__oclc_finite_only_opt = linkonce hidden local_unnamed_addr addrspace(4) constant i8 0 +// GFX1030: @__oclc_unsafe_math_opt = linkonce hidden local_unnamed_addr addrspace(4) constant i8 0 +// GFX1030: @__oclc_correctly_rounded_sqrt32 = linkonce hidden local_unnamed_addr addrspace(4) constant i8 1 +// GFX1030: @__oclc_ISA_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 10048 +// GFX1030: @__oclc_ABI_version = linkonce_odr hidden local_unnamed_addr addrspace(4) constant i32 400 + +// Check that we can override the wavefront features. +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1030 -target-feature +wavefrontsize64 \ +// RUN: -S -emit-llvm -o - %s | FileCheck %s --check-prefix=WAVEFRONT +// WAVEFRONT: @__oclc_wavefrontsize64 = linkonce hidden local_unnamed_addr addrspace(4) constant i8 1 + +// Check that we can enable denormalization at zero. +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx90a -fdenormal-fp-math-f32=preserve-sign,preserve-sign \ +// RUN: -S -emit-llvm -o - %s | FileCheck %s --check-prefix=DENORM-AT-ZERO +// DENORM-AT-ZERO: @__oclc_daz_opt = linkonce hidden local_unnamed_addr addrspace(4) constant i8 1 + +// Check that we can enable finite math. +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx90a -ffinite-math-only \ +// RUN: -S -emit-llvm -o - %s | FileCheck %s --check-prefix=FINITE-MATH +// FINITE-MATH: @__oclc_finite_only_opt = linkonce hidden local_unnamed_addr addrspace(4) constant i8 1 +// FINITE-MATH: @__oclc_unsafe_math_opt = linkonce hidden local_unnamed_addr addrspace(4) constant i8 0 + +// Check that we can enable unsafe math. +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx90a -menable-unsafe-fp-math \ +// RUN: -S -emit-llvm -o - %s | FileCheck %s --check-prefix=UNSAFE-MATH +// UNSAFE-MATH: @__oclc_finite_only_opt = linkonce hidden local_unnamed_addr addrspace(4) constant i8 0 +// UNSAFE-MATH: @__oclc_unsafe_math_opt = linkonce hidden local_unnamed_addr addrspace(4) constant i8 1 + +// Check that we can disable/enable correctly rounded square roots. +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx90a -fno-hip-fp32-correctly-rounded-divide-sqrt \ +// RUN: -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CORRECT-SQRT +// CORRECT-SQRT: @__oclc_correctly_rounded_sqrt32 = linkonce hidden local_unnamed_addr addrspace(4) constant i8 0 +// RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa -target-cpu gfx90a -cl-fp32-correctly-rounded-divide-sqrt \ +// RUN: -disable-llvm-optzns -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CL-CORRECT-SQRT +// CL-CORRECT-SQRT: @__oclc_correctly_rounded_sqrt32 = linkonce hidden local_unnamed_addr addrspace(4) constant i8 1