diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -110,6 +110,7 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/AbstractCallSite.h" @@ -1076,6 +1077,10 @@ /// NOTE: The mechanics of adding a new "concrete" abstract attribute are /// described in the file comment. struct Attributor { + + using OptimizationRemarkGetter = + function_ref; + /// Constructor /// /// \param Functions The set of functions we are deriving attributes for. @@ -1091,7 +1096,29 @@ bool RewriteSignatures = true) : Allocator(InfoCache.Allocator), Functions(Functions), InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), - DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures) {} + DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), + OREGetter(None), PassName("") {} + + /// Constructor + /// + /// \param Functions The set of functions we are deriving attributes for. + /// \param InfoCache Cache to hold various information accessible for + /// the abstract attributes. + /// \param CGUpdater Helper to update an underlying call graph. + /// \param Allowed If not null, a set limiting the attribute opportunities. + /// \param DeleteFns Whether to delete functions + /// \param OREGetter A callback function that returns an ORE object from a + /// Function pointer. + /// \param PassName The name of the pass emitting remarks. + Attributor(SetVector &Functions, InformationCache &InfoCache, + CallGraphUpdater &CGUpdater, DenseSet *Allowed, + bool DeleteFns, bool RewriteSignatures, + OptimizationRemarkGetter OREGetter, const char *PassName) + : Allocator(InfoCache.Allocator), Functions(Functions), + InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), + DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), + OREGetter(Optional(OREGetter)), + PassName(PassName) {} ~Attributor(); @@ -1494,6 +1521,41 @@ const AbstractAttribute &QueryingAA, const Value &V, DepClassTy LivenessDepClass = DepClassTy::OPTIONAL); + /// Emit a remark generically. + /// + /// This template function can be used to generically emit a remark. The + /// RemarkKind should be one of the following: + /// - OptimizationRemark to indicate a successful optimization attempt + /// - OptimizationRemarkMissed to report a failed optimization attempt + /// - OptimizationRemarkAnalysis to provide additional information about an + /// optimization attempt + /// + /// The remark is built using a callback function \p RemarkCB that takes a + /// RemarkKind as input and returns a RemarkKind. + template + void emitRemark(Instruction *I, StringRef RemarkName, + RemarkCallBack &&RemarkCB) const { + if (!OREGetter) + return; + + Function *F = I->getFunction(); + auto &ORE = OREGetter.getValue()(F); + + ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); }); + } + + /// Emit a remark on a function. + template + void emitRemark(Function *F, StringRef RemarkName, + RemarkCallBack &&RemarkCB) const { + if (!OREGetter) + return; + + auto &ORE = OREGetter.getValue()(F); + + ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); }); + } + /// Helper struct used in the communication between an abstract attribute (AA) /// that wants to change the signature of a function and the Attributor which /// applies the changes. The struct is partially initialized with the @@ -1830,6 +1892,12 @@ SmallDenseSet ToBeDeletedInsts; ///} + /// Callback to get an OptimizationRemarkEmitter from a Function *. + Optional OREGetter; + + /// The name of the pass to emit remarks for. + const char *PassName = ""; + friend AADepGraph; friend AttributorCallGraph; }; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5048,6 +5048,17 @@ LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall << "\n"); + auto Remark = [&](OptimizationRemark OR) { + LibFunc IsAllocShared; + if (auto *CB = dyn_cast(MallocCall)) { + TLI->getLibFunc(*CB, IsAllocShared); + if (IsAllocShared == LibFunc___kmpc_alloc_shared) + return OR << "Moving globalized variable to the stack."; + } + return OR << "Moving memory allocation from the heap to the stack."; + }; + A.emitRemark(MallocCall, "HeapToStack", Remark); + Align Alignment; Value *Size; if (isCallocLikeFn(MallocCall, TLI)) { @@ -5194,6 +5205,23 @@ if (!NoCaptureAA.isAssumedNoCapture() || !ArgNoFreeAA.isAssumedNoFree()) { + + // Emit a missed remark if this is missed OpenMP globalization. + auto Remark = [&](OptimizationRemarkMissed ORM) { + return ORM << "Could not move globalized variable to the stack. " + << "Variable is potentially " + << ((!NoCaptureAA.isAssumedNoCapture()) ? "captured." + : "freed."); + }; + + LibFunc IsAllocShared; + if (auto *AllocShared = dyn_cast(&I)) { + TLI->getLibFunc(*AllocShared, IsAllocShared); + if (IsAllocShared == LibFunc___kmpc_alloc_shared) + A.emitRemark( + AllocShared, "HeapToStackFailed", Remark); + } + LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n"); ValidUsesOnly = false; } diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2503,6 +2503,15 @@ auto *NewBuffer = ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); + auto Remark = [&](OptimizationRemark OR) { + return OR << "Replaced globalized variable with " + << ore::NV("SharedMemory", AllocSize->getZExtValue()) + << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ") + << "of shared memory"; + }; + A.emitRemark(CB, "OpenMPReplaceGlobalization", + Remark); + SharedMem->setAlignment(MaybeAlign(32)); A.changeValueAfterManifest(*CB, *NewBuffer); @@ -2657,7 +2666,8 @@ OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false); + Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, OREGetter, + DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(true); @@ -2712,7 +2722,8 @@ OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater, nullptr, false); + Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, OREGetter, + DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(false); @@ -2788,7 +2799,8 @@ *(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater, nullptr, false); + Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, + OREGetter, DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); return OMPOpt.run(false); diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s +; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" +; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured. +; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack. + @S = external local_unnamed_addr global i8* define void @kernel() { @@ -26,7 +30,7 @@ ; CHECK-NEXT: ret void ; entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4) + %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !9 call void @use(i8* %0) call void @__kmpc_free_shared(i8* %0) ret void @@ -42,7 +46,7 @@ ; CHECK-NEXT: ret void ; entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4) + %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10 call void @share(i8* %0) call void @__kmpc_free_shared(i8* %0) ret void @@ -81,3 +85,8 @@ !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{void ()* @kernel, !"kernel", i32 1} +!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 2, column: 2, scope: !6) +!10 = !DILocation(line: 4, column: 2, scope: !7) diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -1,9 +1,12 @@ ; RUN: opt -S -passes='openmp-opt' < %s | FileCheck %s +; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" @S = external local_unnamed_addr global i8* +; CHECK-REMARKS: remark: replace_globalization.c:5:7: Replaced globalized variable with 16 bytes of shared memory +; CHECK-REMARKS: remark: replace_globalization.c:5:14: Replaced globalized variable with 4 bytes of shared memory ; CHECK: [[SHARED_X:@.+]] = internal addrspace(3) global [16 x i8] undef ; CHECK: [[SHARED_Y:@.+]] = internal addrspace(3) global [4 x i8] undef @@ -68,7 +71,6 @@ define void @use(i8* %x) { entry: - %addr = alloca i8* store i8* %x, i8** @S ret void }