diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -109,6 +109,7 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/AbstractCallSite.h" @@ -1075,6 +1076,10 @@ /// NOTE: The mechanics of adding a new "concrete" abstract attribute are /// described in the file comment. struct Attributor { + + using OptimizationRemarkGetter = + function_ref; + /// Constructor /// /// \param Functions The set of functions we are deriving attributes for. @@ -1090,7 +1095,29 @@ bool RewriteSignatures = true) : Allocator(InfoCache.Allocator), Functions(Functions), InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), - DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures) {} + DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), + OREGetter(None), PassName("") {} + + /// Constructor + /// + /// \param Functions The set of functions we are deriving attributes for. + /// \param InfoCache Cache to hold various information accessible for + /// the abstract attributes. + /// \param CGUpdater Helper to update an underlying call graph. + /// \param Allowed If not null, a set limiting the attribute opportunities. + /// \param DeleteFns Whether to delete functions + /// \param OREGetter A callback function that returns an ORE object from a + /// Function pointer. + /// \param PassName The name of the pass emitting remarks. + Attributor(SetVector &Functions, InformationCache &InfoCache, + CallGraphUpdater &CGUpdater, DenseSet *Allowed, + bool DeleteFns, bool RewriteSignatures, + OptimizationRemarkGetter OREGetter, const char *PassName) + : Allocator(InfoCache.Allocator), Functions(Functions), + InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), + DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), + OREGetter(Optional(OREGetter)), + PassName(PassName) {} ~Attributor(); @@ -1457,6 +1484,41 @@ const AbstractAttribute &QueryingAA, const Value &V, DepClassTy LivenessDepClass = DepClassTy::OPTIONAL); + /// Emit a remark generically. + /// + /// This template function can be used to generically emit a remark. The + /// RemarkKind should be one of the following: + /// - OptimizationRemark to indicate a successful optimization attempt + /// - OptimizationRemarkMissed to report a failed optimization attempt + /// - OptimizationRemarkAnalysis to provide additional information about an + /// optimization attempt + /// + /// The remark is built using a callback function \p RemarkCB that takes a + /// RemarkKind as input and returns a RemarkKind. + template + void emitRemark(Instruction *I, StringRef RemarkName, + RemarkCallBack &&RemarkCB) const { + if (!OREGetter) + return; + + Function *F = I->getFunction(); + auto &ORE = OREGetter.getValue()(F); + + ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); }); + } + + /// Emit a remark on a function. + template + void emitRemark(Function *F, StringRef RemarkName, + RemarkCallBack &&RemarkCB) const { + if (!OREGetter) + return; + + auto &ORE = OREGetter.getValue()(F); + + ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); }); + } + /// Helper struct used in the communication between an abstract attribute (AA) /// that wants to change the signature of a function and the Attributor which /// applies the changes. The struct is partially initialized with the @@ -1793,6 +1855,12 @@ SmallDenseSet ToBeDeletedInsts; ///} + /// Callback to get an OptimizationRemarkEmitter from a Function *. + Optional OREGetter; + + /// The name of the pass to emit remarks for. + const char *PassName = ""; + friend AADepGraph; }; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5016,8 +5016,7 @@ : AAHeapToStack(IRP, A) {} const std::string getAsStr() const override { - return "[H2S] Mallocs: " + std::to_string(MallocCalls.size()) + "/" + - std::to_string(BadMallocCalls.size()); + return "[H2S] Mallocs: " + std::to_string(MallocCalls.size()); } bool isAssumedHeapToStack(CallBase &CB) const override { @@ -5030,16 +5029,6 @@ !BadMallocCalls.count(&CB); } - bool isAssumedHeapToStack(CallBase &CB) const override { - return getAssumed() && MallocCalls.contains(&CB) && - !BadMallocCalls.count(&CB); - } - - bool isKnownHeapToStack(CallBase &CB) const override { - return getKnown() && MallocCalls.contains(&CB) && - !BadMallocCalls.count(&CB); - } - ChangeStatus manifest(Attributor &A) override { assert(getState().isValidState() && "Attempted to manifest an invalid state!"); @@ -5062,6 +5051,11 @@ LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall << "\n"); + auto Remark = [&](OptimizationRemark OR) { + return OR << "Moving memory allocation from the heap to the stack."; + }; + A.emitRemark(MallocCall, "HeapToStack", Remark); + Align Alignment; Value *Size; if (isCallocLikeFn(MallocCall, TLI)) { @@ -5136,29 +5130,10 @@ MustBeExecutedContextExplorer &Explorer = A.getInfoCache().getMustBeExecutedContextExplorer(); - bool StackIsAccessibleByOtherThreads = - A.getInfoCache().stackIsAccessibleByOtherThreads(); - auto FreeCheck = [&](Instruction &I) { - // If the stack is not accessible by other threads, the "must-free" logic - // doesn't apply as the pointer could be shared and needs to be places in - // "sharable" memory. - if (!StackIsAccessibleByOtherThreads) { - auto &NoSyncAA = - A.getAAFor(*this, getIRPosition(), DepClassTy::OPTIONAL); - if (!NoSyncAA.isAssumedNoSync()) { - LLVM_DEBUG( - dbgs() << "[H2S] found an escaping use, stack is not accessible by " - "other threads and function is not nosync:\n"); - return false; - } - } const auto &Frees = FreesForMalloc.lookup(&I); - if (Frees.size() != 1) { - LLVM_DEBUG(dbgs() << "[H2S] did not find one free call but " - << Frees.size() << "\n"); + if (Frees.size() != 1) return false; - } Instruction *UniqueFree = *Frees.begin(); return Explorer.findInContextOf(UniqueFree, I.getNextNode()); }; @@ -5199,12 +5174,12 @@ const auto &NoCaptureAA = A.getAAFor( *this, IRPosition::callsite_argument(*CB, ArgNo), - DepClassTy::OPTIONAL); + DepClassTy::REQUIRED); // If a callsite argument use is nofree, we are fine. const auto &ArgNoFreeAA = A.getAAFor( *this, IRPosition::callsite_argument(*CB, ArgNo), - DepClassTy::OPTIONAL); + DepClassTy::REQUIRED); if (!NoCaptureAA.isAssumedNoCapture() || !ArgNoFreeAA.isAssumedNoFree()) { diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2533,6 +2533,15 @@ auto *NewBuffer = ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); + auto Remark = [&](OptimizationRemark OR) { + return OR << "Replaced globalized variable with " + << ore::NV("SharedMemory", AllocSize->getZExtValue()) + << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ") + << "of shared memory"; + }; + A.emitRemark(CB, "OpenMPReplaceGlobalization", + Remark); + SharedMem->setAlignment(MaybeAlign(8)); A.changeValueAfterManifest(*CB, *NewBuffer); @@ -2687,7 +2696,8 @@ OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false); + Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, OREGetter, + DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(true); @@ -2742,7 +2752,8 @@ OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater, nullptr, false); + Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, OREGetter, + DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(false); @@ -2818,7 +2829,8 @@ *(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater, nullptr, false); + Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, + OREGetter, DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); return OMPOpt.run(false); diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -1,8 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s +; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" +; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving memory allocation from the heap to the stack. + @S = external local_unnamed_addr global i8* define void @kernel() { @@ -26,7 +29,7 @@ ; CHECK-NEXT: ret void ; entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4) + %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !8 call void @use(i8* %0) call void @__kmpc_free_shared(i8* %0) ret void @@ -81,3 +84,6 @@ !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{void ()* @kernel, !"kernel", i32 1} +!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !7, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 2, column: 2, scope: !6) diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -1,9 +1,12 @@ ; RUN: opt -S -passes='openmp-opt' < %s | FileCheck %s +; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" @S = external local_unnamed_addr global i8* +; CHECK-REMARKS: remark: replace_globalization.c:5:7: Replaced globalized variable with 16 bytes of shared memory +; CHECK-REMARKS: remark: replace_globalization.c:5:14: Replaced globalized variable with 4 bytes of shared memory ; CHECK: [[SHARED_X:@.+]] = internal addrspace(3) global [16 x i8] undef ; CHECK: [[SHARED_Y:@.+]] = internal addrspace(3) global [4 x i8] undef