Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -3340,9 +3340,6 @@ } bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) { - if (!mayContainParallelRegion()) - return false; - auto &OMPInfoCache = static_cast(A.getInfoCache()); if (!SPMDCompatibilityTracker.isAssumed()) { @@ -3401,222 +3398,275 @@ // We will now unconditionally modify the IR, indicate a change. Changed = ChangeStatus::CHANGED; - auto CreateGuardedRegion = [&](Instruction *RegionStartI, - Instruction *RegionEndI) { - LoopInfo *LI = nullptr; - DominatorTree *DT = nullptr; - MemorySSAUpdater *MSU = nullptr; - using InsertPointTy = OpenMPIRBuilder::InsertPointTy; - - BasicBlock *ParentBB = RegionStartI->getParent(); - Function *Fn = ParentBB->getParent(); - Module &M = *Fn->getParent(); - - // Create all the blocks and logic. - // ParentBB: - // goto RegionCheckTidBB - // RegionCheckTidBB: - // Tid = __kmpc_hardware_thread_id() - // if (Tid != 0) - // goto RegionBarrierBB - // RegionStartBB: - // - // goto RegionEndBB - // RegionEndBB: - // - // goto RegionBarrierBB - // RegionBarrierBB: - // __kmpc_simple_barrier_spmd() - // // second barrier is omitted if lacking escaping values. - // - // __kmpc_simple_barrier_spmd() - // goto RegionExitBB - // RegionExitBB: - // - - BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(), - DT, LI, MSU, "region.guarded.end"); - BasicBlock *RegionBarrierBB = - SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI, - MSU, "region.barrier"); - BasicBlock *RegionExitBB = - SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(), - DT, LI, MSU, "region.exit"); - BasicBlock *RegionStartBB = - SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded"); - - assert(ParentBB->getUniqueSuccessor() == RegionStartBB && - "Expected a different CFG"); + auto &Ctx = getAnchorValue().getContext(); - BasicBlock *RegionCheckTidBB = SplitBlock( - ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid"); + if (mayContainParallelRegion()) { + auto CreateGuardedRegion = [&](Instruction *RegionStartI, + Instruction *RegionEndI) { + LoopInfo *LI = nullptr; + DominatorTree *DT = nullptr; + MemorySSAUpdater *MSU = nullptr; + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + + BasicBlock *ParentBB = RegionStartI->getParent(); + Function *Fn = ParentBB->getParent(); + Module &M = *Fn->getParent(); + + // Create all the blocks and logic. + // ParentBB: + // goto RegionCheckTidBB + // RegionCheckTidBB: + // Tid = __kmpc_hardware_thread_id() + // if (Tid != 0) + // goto RegionBarrierBB + // RegionStartBB: + // + // goto RegionEndBB + // RegionEndBB: + // + // goto RegionBarrierBB + // RegionBarrierBB: + // __kmpc_simple_barrier_spmd() + // // second barrier is omitted if lacking escaping values. + // + // __kmpc_simple_barrier_spmd() + // goto RegionExitBB + // RegionExitBB: + // + + BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(), + DT, LI, MSU, "region.guarded.end"); + BasicBlock *RegionBarrierBB = + SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI, + MSU, "region.barrier"); + BasicBlock *RegionExitBB = + SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(), + DT, LI, MSU, "region.exit"); + BasicBlock *RegionStartBB = + SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded"); + + assert(ParentBB->getUniqueSuccessor() == RegionStartBB && + "Expected a different CFG"); + + BasicBlock *RegionCheckTidBB = SplitBlock( + ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid"); + + // Register basic blocks with the Attributor. + A.registerManifestAddedBasicBlock(*RegionEndBB); + A.registerManifestAddedBasicBlock(*RegionBarrierBB); + A.registerManifestAddedBasicBlock(*RegionExitBB); + A.registerManifestAddedBasicBlock(*RegionStartBB); + A.registerManifestAddedBasicBlock(*RegionCheckTidBB); + + bool HasBroadcastValues = false; + // Find escaping outputs from the guarded region to outside users and + // broadcast their values to them. + for (Instruction &I : *RegionStartBB) { + SmallPtrSet OutsideUsers; + for (User *Usr : I.users()) { + Instruction &UsrI = *cast(Usr); + if (UsrI.getParent() != RegionStartBB) + OutsideUsers.insert(&UsrI); + } - // Register basic blocks with the Attributor. - A.registerManifestAddedBasicBlock(*RegionEndBB); - A.registerManifestAddedBasicBlock(*RegionBarrierBB); - A.registerManifestAddedBasicBlock(*RegionExitBB); - A.registerManifestAddedBasicBlock(*RegionStartBB); - A.registerManifestAddedBasicBlock(*RegionCheckTidBB); + if (OutsideUsers.empty()) + continue; - bool HasBroadcastValues = false; - // Find escaping outputs from the guarded region to outside users and - // broadcast their values to them. - for (Instruction &I : *RegionStartBB) { - SmallPtrSet OutsideUsers; - for (User *Usr : I.users()) { - Instruction &UsrI = *cast(Usr); - if (UsrI.getParent() != RegionStartBB) - OutsideUsers.insert(&UsrI); - } + HasBroadcastValues = true; - if (OutsideUsers.empty()) - continue; + // Emit a global variable in shared memory to store the broadcasted + // value. + auto *SharedMem = new GlobalVariable( + M, I.getType(), /* IsConstant */ false, + GlobalValue::InternalLinkage, UndefValue::get(I.getType()), + sanitizeForGlobalName( + (I.getName() + ".guarded.output.alloc").str()), + nullptr, GlobalValue::NotThreadLocal, + static_cast(AddressSpace::Shared)); - HasBroadcastValues = true; + // Emit a store instruction to update the value. + new StoreInst(&I, SharedMem, RegionEndBB->getTerminator()); - // Emit a global variable in shared memory to store the broadcasted - // value. - auto *SharedMem = new GlobalVariable( - M, I.getType(), /* IsConstant */ false, - GlobalValue::InternalLinkage, UndefValue::get(I.getType()), - sanitizeForGlobalName( - (I.getName() + ".guarded.output.alloc").str()), - nullptr, GlobalValue::NotThreadLocal, - static_cast(AddressSpace::Shared)); - - // Emit a store instruction to update the value. - new StoreInst(&I, SharedMem, RegionEndBB->getTerminator()); - - LoadInst *LoadI = new LoadInst(I.getType(), SharedMem, - I.getName() + ".guarded.output.load", - RegionBarrierBB->getTerminator()); - - // Emit a load instruction and replace uses of the output value. - for (Instruction *UsrI : OutsideUsers) - UsrI->replaceUsesOfWith(&I, LoadI); - } + LoadInst *LoadI = new LoadInst(I.getType(), SharedMem, + I.getName() + ".guarded.output.load", + RegionBarrierBB->getTerminator()); - auto &OMPInfoCache = static_cast(A.getInfoCache()); + // Emit a load instruction and replace uses of the output value. + for (Instruction *UsrI : OutsideUsers) + UsrI->replaceUsesOfWith(&I, LoadI); + } - // Go to tid check BB in ParentBB. - const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); - ParentBB->getTerminator()->eraseFromParent(); - OpenMPIRBuilder::LocationDescription Loc( - InsertPointTy(ParentBB, ParentBB->end()), DL); - OMPInfoCache.OMPBuilder.updateToLocation(Loc); - uint32_t SrcLocStrSize; - auto *SrcLocStr = - OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = - OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize); - BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); - - // Add check for Tid in RegionCheckTidBB - RegionCheckTidBB->getTerminator()->eraseFromParent(); - OpenMPIRBuilder::LocationDescription LocRegionCheckTid( - InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL); - OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid); - FunctionCallee HardwareTidFn = - OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( - M, OMPRTL___kmpc_get_hardware_thread_id_in_block); - CallInst *Tid = - OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {}); - Tid->setDebugLoc(DL); - OMPInfoCache.setCallingConvention(HardwareTidFn, Tid); - Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid); - OMPInfoCache.OMPBuilder.Builder - .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB) - ->setDebugLoc(DL); - - // First barrier for synchronization, ensures main thread has updated - // values. - FunctionCallee BarrierFn = - OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( - M, OMPRTL___kmpc_barrier_simple_spmd); - OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy( - RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt())); - CallInst *Barrier = - OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid}); - Barrier->setDebugLoc(DL); - OMPInfoCache.setCallingConvention(BarrierFn, Barrier); - - // Second barrier ensures workers have read broadcast values. - if (HasBroadcastValues) { - CallInst *Barrier = CallInst::Create(BarrierFn, {Ident, Tid}, "", - RegionBarrierBB->getTerminator()); + auto &OMPInfoCache = static_cast(A.getInfoCache()); + + // Go to tid check BB in ParentBB. + const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); + ParentBB->getTerminator()->eraseFromParent(); + OpenMPIRBuilder::LocationDescription Loc( + InsertPointTy(ParentBB, ParentBB->end()), DL); + OMPInfoCache.OMPBuilder.updateToLocation(Loc); + uint32_t SrcLocStrSize; + auto *SrcLocStr = + OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = + OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize); + BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); + + // Add check for Tid in RegionCheckTidBB + RegionCheckTidBB->getTerminator()->eraseFromParent(); + OpenMPIRBuilder::LocationDescription LocRegionCheckTid( + InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL); + OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid); + FunctionCallee HardwareTidFn = + OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_get_hardware_thread_id_in_block); + CallInst *Tid = + OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {}); + Tid->setDebugLoc(DL); + OMPInfoCache.setCallingConvention(HardwareTidFn, Tid); + Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid); + OMPInfoCache.OMPBuilder.Builder + .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB) + ->setDebugLoc(DL); + + // First barrier for synchronization, ensures main thread has updated + // values. + FunctionCallee BarrierFn = + OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_barrier_simple_spmd); + OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy( + RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt())); + CallInst *Barrier = + OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid}); Barrier->setDebugLoc(DL); OMPInfoCache.setCallingConvention(BarrierFn, Barrier); - } - }; - auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; - SmallPtrSet Visited; - for (Instruction *GuardedI : SPMDCompatibilityTracker) { - BasicBlock *BB = GuardedI->getParent(); - if (!Visited.insert(BB).second) - continue; + // Second barrier ensures workers have read broadcast values. + if (HasBroadcastValues) { + CallInst *Barrier = CallInst::Create(BarrierFn, {Ident, Tid}, "", + RegionBarrierBB->getTerminator()); + Barrier->setDebugLoc(DL); + OMPInfoCache.setCallingConvention(BarrierFn, Barrier); + } + }; - SmallVector> Reorders; - Instruction *LastEffect = nullptr; - BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend(); - while (++IP != IPEnd) { - if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory()) - continue; - Instruction *I = &*IP; - if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI)) - continue; - if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) { - LastEffect = nullptr; + auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; + SmallPtrSet Visited; + for (Instruction *GuardedI : SPMDCompatibilityTracker) { + BasicBlock *BB = GuardedI->getParent(); + if (!Visited.insert(BB).second) continue; + + SmallVector> Reorders; + Instruction *LastEffect = nullptr; + BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend(); + while (++IP != IPEnd) { + if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory()) + continue; + Instruction *I = &*IP; + if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI)) + continue; + if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) { + LastEffect = nullptr; + continue; + } + if (LastEffect) + Reorders.push_back({I, LastEffect}); + LastEffect = &*IP; } - if (LastEffect) - Reorders.push_back({I, LastEffect}); - LastEffect = &*IP; + for (auto &Reorder : Reorders) + Reorder.first->moveBefore(Reorder.second); } - for (auto &Reorder : Reorders) - Reorder.first->moveBefore(Reorder.second); - } - SmallVector, 4> GuardedRegions; - - for (Instruction *GuardedI : SPMDCompatibilityTracker) { - BasicBlock *BB = GuardedI->getParent(); - auto *CalleeAA = A.lookupAAFor( - IRPosition::function(*GuardedI->getFunction()), nullptr, - DepClassTy::NONE); - assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo"); - auto &CalleeAAFunction = *cast(CalleeAA); - // Continue if instruction is already guarded. - if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI)) - continue; + SmallVector, 4> GuardedRegions; + + for (Instruction *GuardedI : SPMDCompatibilityTracker) { + BasicBlock *BB = GuardedI->getParent(); + auto *CalleeAA = A.lookupAAFor( + IRPosition::function(*GuardedI->getFunction()), nullptr, + DepClassTy::NONE); + assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo"); + auto &CalleeAAFunction = *cast(CalleeAA); + // Continue if instruction is already guarded. + if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI)) + continue; - Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr; - for (Instruction &I : *BB) { - // If instruction I needs to be guarded update the guarded region - // bounds. - if (SPMDCompatibilityTracker.contains(&I)) { - CalleeAAFunction.getGuardedInstructions().insert(&I); - if (GuardedRegionStart) - GuardedRegionEnd = &I; - else - GuardedRegionStart = GuardedRegionEnd = &I; + Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr; + for (Instruction &I : *BB) { + // If instruction I needs to be guarded update the guarded region + // bounds. + if (SPMDCompatibilityTracker.contains(&I)) { + CalleeAAFunction.getGuardedInstructions().insert(&I); + if (GuardedRegionStart) + GuardedRegionEnd = &I; + else + GuardedRegionStart = GuardedRegionEnd = &I; - continue; - } + continue; + } - // Instruction I does not need guarding, store - // any region found and reset bounds. - if (GuardedRegionStart) { - GuardedRegions.push_back( - std::make_pair(GuardedRegionStart, GuardedRegionEnd)); - GuardedRegionStart = nullptr; - GuardedRegionEnd = nullptr; + // Instruction I does not need guarding, store + // any region found and reset bounds. + if (GuardedRegionStart) { + GuardedRegions.push_back( + std::make_pair(GuardedRegionStart, GuardedRegionEnd)); + GuardedRegionStart = nullptr; + GuardedRegionEnd = nullptr; + } } } - } - for (auto &GR : GuardedRegions) - CreateGuardedRegion(GR.first, GR.second); + for (auto &GR : GuardedRegions) + CreateGuardedRegion(GR.first, GR.second); + } else { + // Only allow 1 thread per block to continue executing the user code. + // + // InitCB = __kmpc_target_init(...) + // ThreadIdInBlock = __kmpc_get_hardware_thread_id_in_block(); + // if (ThreadIdInBlock != 0) return; + // UserCode: + // // user code + // + Function *Kernel = getAssociatedFunction(); + assert(Kernel && "Expected an associated function!"); + + // Create block for user code to branch to from initial block. + BasicBlock *InitBB = KernelInitCB->getParent(); + BasicBlock *UserCodeBB = InitBB->splitBasicBlock( + KernelInitCB->getNextNode(), "main.thread.user_code"); + BasicBlock *ReturnBB = BasicBlock::Create( + Ctx, "exit.threads", Kernel, UserCodeBB); + + // Register blocks with attributor: + A.registerManifestAddedBasicBlock(*InitBB); + A.registerManifestAddedBasicBlock(*UserCodeBB); + A.registerManifestAddedBasicBlock(*ReturnBB); + + // Debug location: + const DebugLoc &DLoc = KernelInitCB->getDebugLoc(); + ReturnInst::Create(Ctx, ReturnBB)->setDebugLoc(DLoc); + InitBB->getTerminator()->eraseFromParent(); + + // Prepare function call to OMPRTL___kmpc_get_hardware_thread_id_in_block: + Module &M = *Kernel->getParent(); + auto &OMPInfoCache = static_cast(A.getInfoCache()); + FunctionCallee ThreadIdInBlockFn = + OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_get_hardware_thread_id_in_block); + + // Get thread ID in block. + CallInst *ThreadIdInBlock = + CallInst::Create(ThreadIdInBlockFn, "thread_id.in.block", InitBB); + OMPInfoCache.setCallingConvention(ThreadIdInBlockFn, ThreadIdInBlock); + ThreadIdInBlock->setDebugLoc(DLoc); + + // Eliminate all threads in the block with ID not equal to 0: + Instruction *IsMainThread = + ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, ThreadIdInBlock, + ConstantInt::get(ThreadIdInBlock->getType(), 0), + "thread.is_main", InitBB); + IsMainThread->setDebugLoc(DLoc); + BranchInst::Create(ReturnBB, UserCodeBB, IsMainThread, InitBB); + } // Adjust the global exec mode flag that tells the runtime what mode this // kernel is executed in. @@ -3633,7 +3683,6 @@ const int InitRequiresFullRuntimeArgNo = 3; const int DeinitRequiresFullRuntimeArgNo = 2; - auto &Ctx = getAnchorValue().getContext(); A.changeUseAfterManifest( KernelInitCB->getArgOperandUse(InitModeArgNo), *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), Index: llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll =================================================================== --- llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -8,9 +8,9 @@ @G = external global i32 ;. -; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32 -; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 @@ -18,11 +18,21 @@ define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) + +; This is a target region without a parallel region so it is SPMD but uses just 1 thread per team +; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +; CHECK-NEXT: [[IS_MAIN_THREAD:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 +; CHECK-NEXT: br i1 [[IS_MAIN_THREAD]], label %[[EXIT_THREADS_BB:.*]], label %[[MAIN_THREAD_USER_CODE_BB:.*]] + +; CHECK: [[EXIT_THREADS_BB]]: +; CHECK-NEXT: ret void + +; CHECK: [[MAIN_THREAD_USER_CODE_BB]]: ; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]] ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) @@ -38,9 +48,19 @@ define weak void @kernel1() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) + +; This is a target region without a parallel region so it is SPMD but uses just 1 thread per team +; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +; CHECK-NEXT: [[IS_MAIN_THREAD:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 +; CHECK-NEXT: br i1 [[IS_MAIN_THREAD]], label %[[EXIT_THREADS_BB:.*]], label %[[MAIN_THREAD_USER_CODE_BB:.*]] + +; CHECK: [[EXIT_THREADS_BB]]: +; CHECK-NEXT: ret void + +; CHECK: [[MAIN_THREAD_USER_CODE_BB]]: ; CHECK-NEXT: call void @helper1() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)