diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1377,15 +1377,15 @@ ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB); llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); - CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint()); llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator(); - CodeGenIPBBTI->removeFromParent(); + CodeGenIPBBTI->eraseFromParent(); Builder.SetInsertPoint(CodeGenIPBB); EmitStmt(ParallelRegionBodyStmt); - Builder.Insert(CodeGenIPBBTI); + if (Builder.saveIP().isSet()) + Builder.CreateBr(&ContinuationBB); AllocaInsertPt = OldAllocaIP; ReturnBlock = OldReturnBlock; diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -21,11 +21,13 @@ // CHECK-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } // CHECK-DEBUG-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" // CHECK-DEBUG-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } -// CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+22]];1;;\00" -// CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+11]];1;;\00" +// CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+29]];1;;\00" +// CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+13]];1;;\00" +// CHECK-DEBUG-DAG: [[LOC3:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+19]];1;;\00" // IRBUILDER-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } -// IRBUILDER-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+19]];0;;\00" -// IRBUILDER-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+8]];0;;\00" +// IRBUILDER-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+25]];0;;\00" +// IRBUILDER-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+9]];0;;\00" +// IRBUILDER-DEBUG-DAG: [[LOC3:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+15]];0;;\00" template void foo(T argc) {} @@ -38,6 +40,11 @@ foo(argc); chunk_t var;(void)var[0][0]; } + + if (argc[1]) +#pragma omp parallel + while(1); + return 0; } @@ -113,6 +120,8 @@ // ALL: store i8** %argc, i8*** [[ARGC_ADDR:%.+]], // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}}) // IRBUILDER: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}}) +// CHECK-DAG: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*)) +// IRBUILDER-DAG: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*)) // ALL: ret i32 0 // ALL-NEXT: } // ALL-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc) @@ -128,6 +137,13 @@ // ALL-DEBUG: ret i32 0 // ALL-DEBUG-NEXT: } +// IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %{{.*}}, i32* noalias %{{.*}}) +// IRBUILDER-SAME: #[[FN_ATTRS:[0-9]+]] +// IRBUILDER: br label %while.body +// IRBUILDER-NOT: ret %{{.*}} +// IRBUILDER: br label %while.body +// IRBUILDER-NOT: ret %{{.*}} + // CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i{{64|32}}{{.*}} %{{.+}}) // IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i{{64|32}}{{.*}} %{{.+}}) // CHECK: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], @@ -152,6 +168,12 @@ // CHECK-DEBUG-NEXT: } // ALL: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc) +// CHECK: define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %.global_tid., i32* noalias %.bound_tid.) +// CHECK-SAME: #[[FN_ATTRS:[0-9]+]] +// CHECK: br label %while.body +// CHECK-NOT: ret %{{.*}} +// CHECK: br label %while.cond +// CHECK-NOT: ret %{{.*}} // CHECK-DEBUG-DAG: define linkonce_odr void [[FOO1]](i8** %argc) // CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i64 %{{.+}}) // CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]({{[^)]+}}){{[^,]*}}, !dbg diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -439,6 +439,18 @@ Worklist.push_back(SuccBB); } + // If we didn't emit a branch to FiniBB during body generation, it means + // FiniBB is unreachable (e.g. while(1);). stop generating all the + // unreachable blocks, and remove anything we are not going to use. + // Check to see if PRegPreFiniBB is reachable from PRegionBodyBB. + bool FoundPreFiniBB = false; + for (auto BI : ParallelRegionBlocks) { + if (BI == PRegPreFiniBB) { + FoundPreFiniBB = true; + break; + } + } + CodeExtractorAnalysisCache CEAC(*OuterFn); CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, @@ -564,7 +576,7 @@ } } - Builder.CreateCall(RTLFn, RealArgs); + CallInst *ForkCall = Builder.CreateCall(RTLFn, RealArgs); LLVM_DEBUG(dbgs() << "With fork_call placed: " << *Builder.GetInsertBlock()->getParent() << "\n"); @@ -583,7 +595,6 @@ if (!ElseTI) { CI->eraseFromParent(); } else { - // If an "if" clause was present we are now generating the serialized // version into the "else" branch. Builder.SetInsertPoint(ElseTI); @@ -608,22 +619,46 @@ << *Builder.GetInsertBlock()->getParent() << "\n"); } - // Adjust the finalization stack, verify the adjustment, and call the - // finalize function a last time to finalize values between the pre-fini block - // and the exit block if we left the parallel "the normal way". + assert(!FinalizationStack.empty() && "Unexpected finalization stack state!"); auto FiniInfo = FinalizationStack.pop_back_val(); - (void)FiniInfo; assert(FiniInfo.DK == OMPD_parallel && "Unexpected finalization stack state!"); - Instruction *PreFiniTI = PRegPreFiniBB->getTerminator(); - assert(PreFiniTI->getNumSuccessors() == 1 && - PreFiniTI->getSuccessor(0)->size() == 1 && - isa(PreFiniTI->getSuccessor(0)->getTerminator()) && - "Unexpected CFG structure!"); + if (FoundPreFiniBB) { + // PRegPreFiniBB is reachable. Adjust the finalization stack, verify the + // adjustment, and call the finalize function a last time to finalize values + // between the pre-fini block and the exit block if we left the parallel + // "the normal way". + (void)FiniInfo; - InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator()); - FiniCB(PreFiniIP); + Instruction *PreFiniTI = PRegPreFiniBB->getTerminator(); + assert(PreFiniTI->getNumSuccessors() == 1 && "Unexpected CFG structure!"); + + InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator()); + FiniCB(PreFiniIP); + } else { + // PRegPreFiniBB is unreachable. remove the unreachable blocks + // and discard the finalization callback + llvm::SmallVector ToBeDeletedBB; + ToBeDeletedBB.push_back(PRegPreFiniBB); + BranchInst *BBTerminator = + dyn_cast_or_null(PRegPreFiniBB->getTerminator()); + while (BBTerminator) { + assert(!BBTerminator->isConditional() && + "unexpected conditional branch in unreachable blocks"); + BasicBlock *next = BBTerminator->getSuccessor(0); + ToBeDeletedBB.push_back(next); + BBTerminator = dyn_cast_or_null(next->getTerminator()); + } + + for (auto BB : ToBeDeletedBB) { + BB->eraseFromParent(); + } + + BasicBlock *ForkBB = ForkCall->getParent(); + ForkBB->getTerminator()->eraseFromParent(); + AfterIP = InsertPointTy(ForkBB, ForkBB->end()); + } for (Instruction *I : ToBeDeleted) I->eraseFromParent(); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -400,6 +400,78 @@ EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin()); } +TEST_F(OpenMPIRBuilderTest, ParallelEndless) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumBodiesGenerated = 0; + unsigned NumPrivatizedVars = 0; + unsigned NumFinalizationPoints = 0; + + BasicBlock *OutlinedBodyBB = nullptr; + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumBodiesGenerated; + + auto *OldBB = OutlinedBodyBB = CodeGenIP.getBlock(); + + // Create an endless loop. + OldBB->getTerminator()->eraseFromParent(); + BranchInst::Create(OldBB, OldBB); + + Builder.ClearInsertionPoint(); + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + ++NumPrivatizedVars; + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, BodyGenCB, PrivCB, FiniCB, nullptr, + nullptr, OMP_PROC_BIND_default, false); + + EXPECT_EQ(NumBodiesGenerated, 1U); + EXPECT_EQ(NumPrivatizedVars, 0U); + EXPECT_EQ(NumFinalizationPoints, 0U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + ASSERT_NE(OutlinedBodyBB, nullptr); + Function *OutlinedFn = OutlinedBodyBB->getParent(); + EXPECT_NE(F, OutlinedFn); + EXPECT_FALSE(verifyModule(*M)); + EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); + EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse)); + EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); + EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias)); + + EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); + EXPECT_EQ(OutlinedFn->arg_size(), 2U); + + EXPECT_EQ(OutlinedFn->getNumUses(), 1U); + User *Usr = OutlinedFn->user_back(); + ASSERT_TRUE(isa(Usr)); + CallInst *ForkCI = dyn_cast(Usr->user_back()); + ASSERT_NE(ForkCI, nullptr); + + EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); + EXPECT_EQ(ForkCI->getNumArgOperands(), 3U); + EXPECT_TRUE(isa(ForkCI->getArgOperand(0))); + EXPECT_EQ(ForkCI->getArgOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); + EXPECT_EQ(ForkCI->getArgOperand(2), Usr); +} + TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M);