diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -705,24 +705,29 @@ } void analysisGlobalization() { - auto &RFI = - OMPInfoCache.RFIs[OMPRTL___kmpc_data_sharing_coalesced_push_stack]; - - auto checkGlobalization = [&](Use &U, Function &Decl) { - if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { - auto Remark = [&](OptimizationRemarkAnalysis ORA) { - return ORA - << "Found thread data sharing on the GPU. " - << "Expect degraded performance due to data globalization."; - }; - emitRemark(CI, "OpenMPGlobalization", - Remark); - } + RuntimeFunction GlobalizationRuntimeIDs[] = { + OMPRTL___kmpc_data_sharing_coalesced_push_stack, + OMPRTL___kmpc_data_sharing_push_stack}; + + for (const auto GlobalizationCallID : GlobalizationRuntimeIDs) { + auto &RFI = OMPInfoCache.RFIs[GlobalizationCallID]; + + auto CheckGlobalization = [&](Use &U, Function &Decl) { + if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA + << "Found thread data sharing on the GPU. " + << "Expect degraded performance due to data globalization."; + }; + emitRemark(CI, "OpenMPGlobalization", + Remark); + } - return false; - }; + return false; + }; - RFI.foreachUse(SCC, checkGlobalization); + RFI.foreachUse(SCC, CheckGlobalization); + } return; } diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll --- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll @@ -59,7 +59,7 @@ br i1 %.not, label %.non-spmd, label %.exit .non-spmd: ; preds = %entry - %1 = tail call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 128, i16 0) #4, !dbg !31 + %1 = tail call i8* @__kmpc_data_sharing_push_stack(i64 128, i16 0) #4, !dbg !31 %2 = bitcast i8* %1 to %struct._globalized_locals_ty* br label %.exit @@ -86,6 +86,8 @@ declare i8* @__kmpc_data_sharing_coalesced_push_stack(i64, i16) local_unnamed_addr +declare i8* @__kmpc_data_sharing_push_stack(i64, i16) local_unnamed_addr + ; Function Attrs: nounwind readnone declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1