diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -30,6 +30,8 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsNVPTX.h" using namespace llvm; using namespace omp; @@ -2329,7 +2331,6 @@ }; ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { - auto &OMPInfoCache = static_cast(A.getInfoCache()); Function *F = getAnchorScope(); ReversePostOrderTraversal RPOT(F); auto NumSingleThreadedBBs = SingleThreadedBBs.size(); @@ -2365,17 +2366,9 @@ if (!C || !C->isZero()) return false; - if (auto *CB = dyn_cast(Cmp->getOperand(0))) { - RuntimeFunction ThreadNumRuntimeIDs[] = {OMPRTL_omp_get_thread_num, - OMPRTL___kmpc_master, - OMPRTL___kmpc_global_thread_num}; - - for (const auto ThreadNumRuntimeID : ThreadNumRuntimeIDs) { - auto &RFI = OMPInfoCache.RFIs[ThreadNumRuntimeID]; - if (CB->getCalledFunction() == RFI.Declaration) - return true; - } - } + if (auto *II = dyn_cast(Cmp->getOperand(0))) + if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x) + return true; return false; }; diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -2,12 +2,6 @@ ; REQUIRES: asserts ; ModuleID = 'single_threaded_exeuction.c' -%struct.ident_t = type { i32, i32, i32, i32, i8* } - -@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 -@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 - ; CHECK: [openmp-opt] Basic block @bar entry is executed by a single thread. ; Function Attrs: noinline nounwind uwtable define internal void @bar() { @@ -21,7 +15,8 @@ ; Function Attrs: noinline nounwind uwtable define dso_local void @foo() { entry: - %call = call i32 @omp_get_thread_num() + %dummy = call i32 @omp_get_thread_num() + %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %cmp = icmp eq i32 %call, 0 br i1 %cmp, label %if.then, label %if.end @@ -35,6 +30,8 @@ declare dso_local i32 @omp_get_thread_num() +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() + !llvm.module.flags = !{!0} !llvm.ident = !{!1}