Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -1790,12 +1790,45 @@ } } +static bool hasTargetFeature(llvm::StringRef FeatureList, + llvm::StringRef Feature) { + StringRef Rest = FeatureList; + while (!Rest.empty()) { + auto Split = Rest.split(','); + if (Split.first == Feature) + return true; + Rest = Split.second; + } + + return false; +} + void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { llvm::AttrBuilder FuncAttrs; ConstructDefaultFnAttrList(F.getName(), F.hasFnAttribute(llvm::Attribute::OptimizeNone), /* AttrOnCallsite = */ false, FuncAttrs); F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); + + if (getTriple().isNVPTX()) { + // Revision 329829 added the architecture as a "target-feature". Duplicate + // this information from "target-cpu" to maintain the ability to inline + // functions from bitcode files compiled with older versions of LLVM/Clang. + auto TargetCpu = F.getFnAttribute("target-cpu"); + if (TargetCpu.isStringAttribute()) { + llvm::StringRef CpuAttr = TargetCpu.getValueAsString(); + + auto TargetFeatures = F.getFnAttribute("target-features"); + if (TargetFeatures.isStringAttribute()) { + llvm::StringRef FeatureList = TargetFeatures.getValueAsString(); + if (!hasTargetFeature(FeatureList, CpuAttr.str())) { + F.addFnAttr("target-features", (FeatureList + ",+" + CpuAttr).str()); + } + } else { + F.addFnAttr("target-features", llvm::Twine("+", CpuAttr).str()); + } + } + } } void CodeGenModule::ConstructAttributeList( Index: test/CodeGenCUDA/Inputs/device-code-2.ll =================================================================== --- test/CodeGenCUDA/Inputs/device-code-2.ll +++ test/CodeGenCUDA/Inputs/device-code-2.ll @@ -2,11 +2,11 @@ target triple = "nvptx-unknown-cuda" -define double @__nv_sin(double %a) { +define double @__nv_sin(double %a) #0 { ret double 1.0 } -define double @__nv_exp(double %a) { +define double @__nv_exp(double %a) #0 { ret double 3.0 } @@ -14,3 +14,4 @@ ret double 2.0 } +attributes #0 = { "target-cpu"="sm_35" } Index: test/CodeGenCUDA/Inputs/device-code.ll =================================================================== --- test/CodeGenCUDA/Inputs/device-code.ll +++ test/CodeGenCUDA/Inputs/device-code.ll @@ -16,7 +16,7 @@ ret void } -define float @_Z17device_mul_or_addff(float %a, float %b) { +define float @_Z17device_mul_or_addff(float %a, float %b) #0 { %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*)) %cmp = icmp ne i32 %reflect, 0 br i1 %cmp, label %use_mul, label %use_add @@ -36,3 +36,5 @@ ret float %ret } + +attributes #0 = { "target-cpu"="sm_35" "target-features"="+ptx42" } Index: test/CodeGenCUDA/link-device-bitcode.cu =================================================================== --- test/CodeGenCUDA/link-device-bitcode.cu +++ test/CodeGenCUDA/link-device-bitcode.cu @@ -56,6 +56,7 @@ // Make sure device_mul_or_add() is present in IR, is internal and // calls __nvvm_reflect(). // CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff( +// CHECK-IR-SAME: [[MUL_OR_ADD:#[0-9]+]] { // CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff( // CHECK-IR: call i32 @__nvvm_reflect // CHECK-IR: ret float @@ -63,8 +64,16 @@ // Make sure we've linked in and internalized only needed functions // from the second bitcode file. // CHECK-IR-2-LABEL: define internal double @__nv_sin +// CHECK-IR-2-SAME: [[IR2ATTR:#[0-9]+]] { // CHECK-IR-2-LABEL: define internal double @__nv_exp +// CHECK-IR-2-SAME: [[IR2ATTR]] { // CHECK-IR-2-NOT: double @__unused +// CHECK-IR: attributes [[MUL_OR_ADD]] = { +// CHECK-IR-SAME: "target-features"="+ptx42,+sm_35" + +// CHECK-IR-2: attributes [[IR2ATTR]] = { +// CHECK-IR-2-SAME: "target-features"="+sm_35" + // Verify that NVVMReflect pass is among the passes run by NVPTX back-end. // CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1