diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -523,6 +523,11 @@ EnumAttr(NoCapture)) : AttributeSet()) +__OMP_ATTRS_SET(NoCaptureAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoCapture)) + : AttributeSet(EnumAttr(NoCapture))) + #if 0 __OMP_ATTRS_SET(WriteOnlyPtrAttrs, OptimisticAttributes @@ -835,6 +840,10 @@ __OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_alloc_shared, DefaultAttrs, ReturnPtrAttrs, {}) +__OMP_RTL_ATTRS(__kmpc_free_shared, AllocAttrs, AttributeSet(), + ParamAttrs(NoCaptureAttrs)) + __OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, {}) __OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {}) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -4478,6 +4478,13 @@ AANoCaptureCallSiteReturned(const IRPosition &IRP, Attributor &A) : AANoCaptureImpl(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + const Function *F = getAnchorScope(); + // Check what state the associated function can actually capture. + determineFunctionCaptureCapabilities(getIRPosition(), *F, *this); + } + /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nocapture) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -517,6 +517,7 @@ // Recollect uses, in case Attributor deleted any. OMPInfoCache.recollectUses(); + Changed |= removeGlobalization(); Changed |= deleteParallelRegions(); if (HideMemoryTransferLatency) Changed |= hideMemTransfersLatency(); @@ -981,6 +982,44 @@ return Changed; } + bool removeGlobalization() { + auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; + + auto removeAllocCalls = [&](Use &U, Function &F) { + auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; + CallBase *CB = + dyn_cast(OpenMPOpt::getCallIfRegularCall(U, &RFI)); + if (!CB) + return false; + + IRPosition AllocPos = IRPosition::callsite_returned(*CB); + if (A.lookupAAFor(AllocPos)->isKnownNoCapture()) { + Constant *AllocSize = dyn_cast(CB->getArgOperand(0)); + if (!AllocSize) + return false; + + CallBase *FC = dyn_cast(CB->user_back()); + if (!FC || FC->getCalledFunction() != FreeCall.Declaration) + return false; + + const DataLayout &DL = M.getDataLayout(); + Type *Int8Ty = Type::getInt8Ty(M.getContext()); + AllocaInst *NewAlloca = + new AllocaInst(Int8Ty, DL.getAllocaAddrSpace(), AllocSize, + CB->getName(), &F.front().front()); + NewAlloca->setDebugLoc(CB->getDebugLoc()); + FC->eraseFromParent(); + CB->replaceAllUsesWith(NewAlloca); + CB->eraseFromParent(); + } + + return false; + }; + RFI.foreachUse(SCC, removeAllocCalls); + + return false; + } + /// Try to delete parallel regions if possible. bool deleteParallelRegions() { const unsigned CallbackCalleeOperand = 2; @@ -1600,6 +1639,24 @@ GetterRFI.foreachUse(SCC, CreateAA); } + + RuntimeFunction GlobalizationRuntimeIDs[] = {OMPRTL___kmpc_free_shared, + OMPRTL___kmpc_alloc_shared}; + for (const auto GlobalizationCallID : GlobalizationRuntimeIDs) { + auto &RFI = OMPInfoCache.RFIs[GlobalizationCallID]; + auto CreateAA = [&](Use &U, Function &Decl) { + CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI); + if (!CI) + return false; + + auto &CB = cast(*CI); + + IRPosition CBPos = IRPosition::callsite_returned(CB); + A.getOrCreateAAFor(CBPos); + return false; + }; + RFI.foreachUse(SCC, CreateAA); + } } }; diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -0,0 +1,51 @@ +; RUN: opt -S -attributor -openmpopt < %s | FileCheck %s +; RUN: opt -S -passes='attributor,cgscc(openmpopt)' < %s | FileCheck %s +target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +target triple = "nvptx64" + +@S = external local_unnamed_addr global i8* + +; CHECK: {{.*}} = alloca i8, i64 4 +; CHECK-NOT: {{.*}} = call i8* @__kmpc_alloc_shared({{.*}}) +; CHECK-NOT: call void @__kmpc_free_shared({{.*}}) +define void @foo() #0 { +entry: + %0 = call i8* @__kmpc_alloc_shared(i64 4) + %x_on_stack = bitcast i8* %0 to i32* + %1 = bitcast i32* %x_on_stack to i8* + call void @use(i8* %1) + call void @__kmpc_free_shared(i8* %0) + ret void +} + +; CHECK-NOT: {{.*}} = alloca i8, i64 4 +; CHECK: {{.*}} = call i8* @__kmpc_alloc_shared({{.*}}) +; CHECK: call void @__kmpc_free_shared({{.*}}) +define void @bar() #0 { +entry: + %0 = call i8* @__kmpc_alloc_shared(i64 4) + %x_on_stack = bitcast i8* %0 to i32* + %1 = bitcast i32* %x_on_stack to i8* + call void @share(i8* %1) + call void @__kmpc_free_shared(i8* %0) + ret void +} + +define void @use(i8* %x) { +entry: + %.addr = alloca i8* + store i8* %x, i8** %.addr + ret void +} + +define void @share(i8* %x) { +entry: + store i8* %x, i8** @S + ret void +} + +; CHECK: declare i8* @__kmpc_alloc_shared(i64) +declare i8* @__kmpc_alloc_shared(i64) + +; CHECK: declare void @__kmpc_free_shared(i8* nocapture) +declare void @__kmpc_free_shared(i8*)