diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -395,10 +395,17 @@ bool isExternallyDestructed() const { return IsExternallyDestructed; } }; -/// Helper to add attributes to \p F according to the CodeGenOptions and +/// If \p F "target-features" are incompatible with the \p TargetOpts features, +/// it is correct to drop the function. \return true if \p F is dropped +bool dropFunctionWithIncompatibleAttributes(llvm::Function &F, + const TargetOptions &TargetOpts); + /// LangOptions without requiring a CodeGenModule to be constructed. +/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as +/// though we had emitted it ourselves. We remove any attributes on F that +/// conflict with the attributes we add here. void mergeDefaultFunctionDefinitionAttributes(llvm::Function &F, - const CodeGenOptions CodeGenOpts, + const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts, const TargetOptions &TargetOpts, bool WillInternalize); diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2002,10 +2002,41 @@ } } -/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as -/// though we had emitted it ourselves. We remove any attributes on F that -/// conflict with the attributes we add here. -static void mergeDefaultFunctionDefinitionAttributes( +static void +overrideFunctionFeaturesWithTargetFeatures(llvm::AttrBuilder &FuncAttr, + const llvm::Function &F, + const TargetOptions &TargetOpts) { + auto FFeatures = F.getFnAttribute("target-features"); + + SmallVector MergedFeatures(TargetOpts.Features.begin(), + TargetOpts.Features.end()); + + if (FFeatures.isValid()) { + const auto &TFeatures = TargetOpts.FeatureMap; + for (StringRef Feature : llvm::split(FFeatures.getValueAsString(), ',')) { + bool EnabledForFunc = Feature[0] == '+'; + StringRef Name = Feature.substr(1); + auto TEntry = TFeatures.find(Name); + + // if the feature is not set for the target-opts, it must be preserved + if (TEntry == TFeatures.end()) { + MergedFeatures.push_back(Feature); + continue; + } + + // if the feature is enabled for one and disabled for the other, they are + // not compatible + bool EnabledForTarget = TEntry->second; + if (EnabledForTarget != EnabledForFunc) + return; + } + } + + if (!MergedFeatures.empty()) + FuncAttr.addAttribute("target-features", llvm::join(MergedFeatures, ",")); +} + +void CodeGen::mergeDefaultFunctionDefinitionAttributes( llvm::Function &F, const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts, const TargetOptions &TargetOpts, bool WillInternalize) { @@ -2062,16 +2093,30 @@ F.removeFnAttrs(AttrsToRemove); addDenormalModeAttrs(Merged, MergedF32, FuncAttrs); + + overrideFunctionFeaturesWithTargetFeatures(FuncAttrs, F, TargetOpts); + F.addFnAttrs(FuncAttrs); } -void clang::CodeGen::mergeDefaultFunctionDefinitionAttributes( - llvm::Function &F, const CodeGenOptions CodeGenOpts, - const LangOptions &LangOpts, const TargetOptions &TargetOpts, - bool WillInternalize) { +bool CodeGen::dropFunctionWithIncompatibleAttributes( + llvm::Function &F, const TargetOptions &TargetOpts) { + auto FFeatures = F.getFnAttribute("target-features"); + if (!FFeatures.isValid()) + return false; - ::mergeDefaultFunctionDefinitionAttributes(F, CodeGenOpts, LangOpts, - TargetOpts, WillInternalize); + const auto &TFeatures = TargetOpts.FeatureMap; + for (StringRef Feature : llvm::split(FFeatures.getValueAsString(), ',')) { + bool EnabledForFunc = Feature[0] == '+'; + StringRef Name = Feature.substr(1); + auto TEntry = TFeatures.find(Name); + if (TEntry != TFeatures.end() && TEntry->second != EnabledForFunc) { + F.replaceAllUsesWith(llvm::ConstantPointerNull::get(F.getType())); + F.eraseFromParent(); + return true; + } + } + return false; } void CodeGenModule::getTrivialDefaultFunctionAttributes( diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -266,15 +266,20 @@ bool LinkInModules(llvm::Module *M) { for (auto &LM : LinkModules) { assert(LM.Module && "LinkModule does not actually have a module"); - if (LM.PropagateAttrs) - for (Function &F : *LM.Module) { + if (LM.PropagateAttrs) { + for (Function &F : llvm::make_early_inc_range(*LM.Module)) { // Skip intrinsics. Keep consistent with how intrinsics are created // in LLVM IR. if (F.isIntrinsic()) continue; + + if (CodeGen::dropFunctionWithIncompatibleAttributes(F, TargetOpts)) + continue; + CodeGen::mergeDefaultFunctionDefinitionAttributes( F, CodeGenOpts, LangOpts, TargetOpts, LM.Internalize); } + } CurLinkModule = LM.Module.get(); diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c --- a/clang/test/CodeGen/link-builtin-bitcode.c +++ b/clang/test/CodeGen/link-builtin-bitcode.c @@ -1,42 +1,47 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs --version 2 +// Build two version of the bitcode library, one with a target-cpu set and one without // RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx803 -DBITCODE -emit-llvm-bc -o %t-lib.bc %s +// RUN: %clang_cc1 -triple amdgcn-- -DBITCODE -emit-llvm-bc -o %t-lib.no-cpu.bc %s + // RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s // RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \ // RUN: -mlink-builtin-bitcode %t-lib.bc -o - %t.bc | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \ +// RUN: -mlink-builtin-bitcode %t-lib.no-cpu.bc -o - %t.bc | FileCheck %s + #ifdef BITCODE -int foo(void) { return 42; } +int no_attr(void) { return 42; } +int __attribute__((target("gfx8-insts"))) attr_in_target(void) { return 42; } +int __attribute__((target("extended-image-insts"))) attr_not_in_target(void) { return 42; } +int __attribute__((target("no-gfx9-insts"))) attr_incompatible(void) { return 42; } int x = 12; #endif -extern int foo(void); +extern int no_attr(void); +extern int attr_in_target(void); +extern int attr_not_in_target(void); +extern int attr_incompatible(void); extern int x; -int bar() { return foo() + x; } -//. +int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_incompatible() + x; } + // CHECK: @x = internal addrspace(1) global i32 12, align 4 -//. -// CHECK: Function Attrs: noinline nounwind optnone + // CHECK-LABEL: define dso_local i32 @bar -// CHECK-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[CALL:%.*]] = call i32 @foo() -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @x to ptr), align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP0]] -// CHECK-NEXT: ret i32 [[ADD]] +// CHECK-SAME: () #[[ATTR_BAR:[0-9]+]] { // -// -// CHECK: Function Attrs: convergent noinline nounwind optnone -// CHECK-LABEL: define internal i32 @foo -// CHECK-SAME: () #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: ret i32 42 -// -//. -// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #1 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -//. +// CHECK-LABEL: define internal i32 @no_attr +// CHECK-SAME: () #[[ATTR_COMPATIBLE:[0-9]+]] { + +// CHECK-LABEL: define internal i32 @attr_in_target +// CHECK-SAME: () #[[ATTR_COMPATIBLE:[0-9]+]] { + +// CHECK-LABEL: define internal i32 @attr_not_in_target +// CHECK-SAME: () #[[ATTR_EXTEND:[0-9]+]] { + +// CHECK-NOT: @attr_incompatible + +// CHECK: attributes #[[ATTR_BAR]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_COMPATIBLE]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_EXTEND]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+extended-image-insts" } diff --git a/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu b/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu --- a/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu +++ b/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu @@ -31,7 +31,7 @@ // CHECK: define {{.*}} i64 @do_intrin_stuff() #[[ATTR:[0-9]+]] -// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="+gfx11-insts" +// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="{{.*}}+gfx11-insts{{.*}}" // NOINTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-features"="+gfx11-insts" #define __device__ __attribute__((device))