diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -527,6 +527,32 @@ 1); } + if (!CodeGenOpts.SimplifyLibCalls) { + // Indicate that all built in functions are disabled. + getModule().addModuleFlag(llvm::Module::Max, "DisableAllBuiltins", 1); + } + + if (!CodeGenOpts.getNoBuiltinFuncs().empty()) { + std::vector Nodes; + for (auto &FuncName : CodeGenOpts.getNoBuiltinFuncs()) + Nodes.push_back(llvm::MDString::get(VMContext, FuncName)); + getModule().addModuleFlag(llvm::Module::AppendUnique, "NoBuiltins", + llvm::MDNode::get(VMContext, Nodes)); + } + + switch (CodeGenOpts.getVecLib()) { + case CodeGenOptions::Accelerate: + getModule().addModuleFlag(llvm::Module::Error, "VectorLibrary", + llvm::MDString::get(VMContext, "Accelerate")); + break; + case CodeGenOptions::SVML: + getModule().addModuleFlag(llvm::Module::Error, "VectorLibrary", + llvm::MDString::get(VMContext, "SVML")); + break; + default: + break; + } + if (LangOpts.CUDAIsDevice && getTriple().isNVPTX()) { // Indicate whether __nvvm_reflect should be configured to flush denormal // floating point values to 0. (This corresponds to its "__CUDA_FTZ" diff --git a/clang/test/CodeGen/nobuiltins.c b/clang/test/CodeGen/nobuiltins.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/nobuiltins.c @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -emit-llvm -ffreestanding %s -o - | FileCheck %s -check-prefix=DISABLEALL +// RUN: %clang_cc1 -emit-llvm -fno-builtin %s -o - | FileCheck %s -check-prefix=DISABLEALL +// RUN: %clang_cc1 -emit-llvm -fno-builtin-memset -fno-builtin-memcpy %s -o - | FileCheck %s -check-prefix=DISABLESOME +// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s -check-prefix=NONE + +// ACCELERATE-DAG: !{{[0-9]+}} = !{i32 1, !"VectorLibrary", !"Accelerate"} +// DISABLEALL-DAG: !{{[0-9]+}} = !{i32 7, !"DisableAllBuiltins", i32 1} +// DISABLESOME-DAG: !{{[0-9]+}} = !{i32 6, !"NoBuiltins", ![[LIST:[0-9]+]] +// DISABLESOME-DAG: ![[LIST]] = !{!"memset", !"memcpy"} +// DISABLEMEMSET-DAG: !{{[0-9]+}} = !{i32 6, !"NoBuiltins", ![[LIST:[0-9]+]] +// DISABLEMEMSET-DAG: ![[LIST]] = !{!"memset"} +// NONE-NOT: Builtins + +// Now ensure merging gets the expected behavior +// RUN: %clang -c -flto %s -o %t0.o +// RUN: %clang -c -flto -fno-builtin %s -o %t1.o +// RUN: %clang -c -flto -fno-builtin-memset %s -o %t2.o +// RUN: %clang -c -flto -fno-builtin-memcpy %s -o %t3.o +// Merge none with -fno-builtin -> DisableAllBuiltins set +// RUN: llvm-lto %t0.o %t1.o -o %t4.o -save-merged-module +// RUN: llvm-dis %t4.o.merged.bc -o - | FileCheck %s --check-prefix=DISABLEALL +// Merge -fno-builtin -fno-builtin-memset -> Both DisableAllBuiltins set and +// NoBuiltins=memset. +// RUN: llvm-lto %t1.o %t2.o -o %t4.o -save-merged-module +// RUN: llvm-dis %t4.o.merged.bc -o - | FileCheck %s --check-prefix=DISABLEALL --check-prefix=DISABLEMEMSET +// Merge -fno-builtin-memset -fno-builtin-memcpy -> NoBuiltins=memset,memcpy. +// RUN: llvm-lto %t2.o %t3.o -o %t4.o -save-merged-module +// RUN: llvm-dis %t4.o.merged.bc -o - | FileCheck %s --check-prefix=DISABLESOME diff --git a/clang/test/CodeGen/svml-calls.ll b/clang/test/CodeGen/svml-calls.ll new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/svml-calls.ll @@ -0,0 +1,40 @@ +; Test to ensure that -fveclib=SVML module flag is handled properly in +; the ThinLTO distributed backend. + +; RUN: opt -module-summary -o %t.o %s +; RUN: llvm-lto -thinlto -o %t %t.o +; RUN: %clang -target x86_64-unknown-linux-gnu -O3 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj -mllvm -force-vector-width=4 -mllvm -force-vector-interleave=1 -mavx +; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s + +; RUN opt -vector-library=SVML -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare double @sin(double) #0 + +define void @sin_f64(double* nocapture %varray) { +; CHECK-LABEL: @sin_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sin(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"VectorLibrary", !"SVML"} diff --git a/clang/test/CodeGen/thinlto_backend_nobuiltin.ll b/clang/test/CodeGen/thinlto_backend_nobuiltin.ll new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/thinlto_backend_nobuiltin.ll @@ -0,0 +1,23 @@ +; Make sure that -fno-builtin/-ffreestanding module flag is handled properly in +; the ThinLTO distributed backend. +; REQUIRES: x86-registered-target + +; RUN: opt -module-summary -o %t.o %s +; RUN: llvm-lto -thinlto -o %t %t.o + +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj +; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s --check-prefix=NOBUILTIN + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i8* @memset(i8*, i8, i64) + +define i8* @test(i8* %mem, i8 %val, i64 %size) { + ; NOBUILTIN: call i8* @memset + %ret = call i8* @memset(i8* %mem, i8 %val, i64 %size) + ret i8* %ret +} + +!llvm.module.flags = !{!1} +!1 = !{i32 7, !"DisableAllBuiltins", i32 1} diff --git a/clang/test/CodeGen/thinlto_backend_nobuiltin_memset.ll b/clang/test/CodeGen/thinlto_backend_nobuiltin_memset.ll new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/thinlto_backend_nobuiltin_memset.ll @@ -0,0 +1,25 @@ +; Make sure that -fno-builtin-memset module flag is handled properly in +; the ThinLTO distributed backend. +; REQUIRES: x86-registered-target + +; RUN: opt -module-summary -o %t.o %s +; RUN: llvm-lto -thinlto -o %t %t.o + +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj +; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s --check-prefix=NOBUILTIN + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i8* @memset(i8*, i8, i64) + +define i8* @test(i8* %mem, i8 %val, i64 %size) { + ; NOBUILTIN: call i8* @memset + %ret = call i8* @memset(i8* %mem, i8 %val, i64 %size) + ret i8* %ret +} + +!llvm.module.flags = !{!1} +!1 = !{i32 6, !"NoBuiltins", !2} +!2 = !{!"memset"} + diff --git a/clang/test/CodeGen/veclib-calls.ll b/clang/test/CodeGen/veclib-calls.ll new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/veclib-calls.ll @@ -0,0 +1,38 @@ +; Test to ensure that -fveclib=Accelerate module flag is handled properly in +; the ThinLTO distributed backend. + +; RUN: opt -module-summary -o %t.o %s +; RUN: llvm-lto -thinlto -o %t %t.o +; RUN: %clang -target x86_64-unknown-linux-gnu -O3 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj +; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;CHECK-LABEL: @sqrt_f32( +;CHECK: vsqrtf{{.*}}<4 x float> +;CHECK: ret void +declare float @sqrtf(float) nounwind readnone +define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @sqrtf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"VectorLibrary", !"Accelerate"} diff --git a/clang/test/CodeGen/veclib.c b/clang/test/CodeGen/veclib.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/veclib.c @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -emit-llvm -fveclib=Accelerate %s -o - | FileCheck %s -check-prefix=ACCELERATE +// RUN: %clang_cc1 -emit-llvm -fveclib=SVML %s -o - | FileCheck %s -check-prefix=SVML +// RUN: %clang_cc1 -emit-llvm -fveclib=none %s -o - | FileCheck %s -check-prefix=NONE +// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s -check-prefix=NONE + +// ACCELERATE: !{{[0-9]+}} = !{i32 1, !"VectorLibrary", !"Accelerate"} +// SVML: !{{[0-9]+}} = !{i32 1, !"VectorLibrary", !"SVML"} +// NONE-NOT: VectorLibrary + +// Now ensure merging gets the expected behavior +// RUN: %clang -c -flto %s -o %t0.o +// RUN: %clang -c -flto -fveclib=Accelerate %s -o %t1.o +// RUN: %clang -c -flto -fveclib=SVML %s -o %t2.o +// Merge none with -fveclib=Accelerate -> VectorLibrary=Accelerate +// RUN: llvm-lto %t0.o %t1.o -o %t3.o -save-merged-module +// RUN: llvm-dis %t3.o.merged.bc -o - | FileCheck %s --check-prefix=ACCELERATE +// Merge none with -fveclib=SVML -> VectorLibrary=SVML +// RUN: llvm-lto %t0.o %t2.o -o %t3.o -save-merged-module +// RUN: llvm-dis %t3.o.merged.bc -o - | FileCheck %s --check-prefix=SVML +// Merge -fveclib=Accelerate with -fveclib=SVML -> Error +// RUN: not llvm-lto %t1.o %t2.o -o %t3.o -save-merged-module diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -218,6 +218,34 @@ // FIXME (davide): verify the output. } +static TargetLibraryInfoImpl *createTLII(Module &Mod, TargetMachine *TM) { + TargetLibraryInfoImpl *TLII = + new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())); + if (auto *MD = mdconst::extract_or_null( + Mod.getModuleFlag("DisableAllBuiltins"))) { + if (MD->getZExtValue()) + TLII->disableAllFunctions(); + } else if (Metadata *Val = Mod.getModuleFlag("NoBuiltins")) { + // Disable individual libc/libm calls in TargetLibraryInfo. + LibFunc F; + for (const MDOperand &FuncName : cast(Val)->operands()) + if (TLII->getLibFunc(cast(*FuncName).getString(), F)) + TLII->setUnavailable(F); + } + + if (MDString *VL = + dyn_cast_or_null(Mod.getModuleFlag("VectorLibrary"))) { + if (VL->getString() == "Accelerate") + TLII->addVectorizableFunctionsFromVecLib( + TargetLibraryInfoImpl::Accelerate); + else if (VL->getString() == "SVML") + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML); + else + llvm_unreachable("Invalid vector library module flag"); + } + return TLII; +} + static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM, std::string PipelineDesc, std::string AAPipelineDesc, @@ -239,6 +267,10 @@ // Register the AA manager first so that our version is the one used. FAM.registerPass([&] { return std::move(AA); }); + std::unique_ptr TLII(createTLII(Mod, TM)); + FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); + MAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); + // Register all the basic analyses with the managers. PB.registerModuleAnalyses(MAM); PB.registerCGSCCAnalyses(CGAM); @@ -268,7 +300,7 @@ passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); PassManagerBuilder PMB; - PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())); + PMB.LibraryInfo = createTLII(Mod, TM); PMB.Inliner = createFunctionInliningPass(); PMB.ExportSummary = ExportSummary; PMB.ImportSummary = ImportSummary;