diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1324,14 +1324,6 @@ // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 -// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1348,6 +1340,14 @@ // CHECK3-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK3-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK3: omp_parallel: // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]]) @@ -1951,14 +1951,6 @@ // CHECK4-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 -// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1975,6 +1967,14 @@ // CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]]) @@ -3818,14 +3818,6 @@ // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 -// CHECK9-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3842,6 +3834,14 @@ // CHECK9-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK9-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK9-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK9: omp_parallel: // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]]) @@ -4445,14 +4445,6 @@ // CHECK10-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 -// CHECK10-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4469,6 +4461,14 @@ // CHECK10-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK10-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK10-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_LASTITER27:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_LOWERBOUND28:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_UPPERBOUND29:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK10-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK10: omp_parallel: // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]]) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -993,6 +993,8 @@ Value *ST = ConstantInt::get(I32Ty, 1); llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); + Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); + AllocaIP = Builder.saveIP(); InsertPointTy AfterIP = applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true); BasicBlock *LoopAfterBB = AfterIP.getBlock(); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -3487,6 +3487,37 @@ EXPECT_TRUE(isSimpleBinaryReduction(Bitcast, FnReductionBB, &Opcode)); } +TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + llvm::SmallVector SectionCBVector; + llvm::SmallVector CaseBBs; + + auto FiniCB = [&](InsertPointTy IP) {}; + auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &FiniBB) { + Builder.restoreIP(CodeGenIP); + Builder.CreateBr(&FiniBB); + }; + SectionCBVector.push_back(SectionCB); + + auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + llvm::Value &, llvm::Value &Val, + llvm::Value *&ReplVal) { return CodeGenIP; }; + IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); + Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, + PrivCB, FiniCB, false, false)); + Builder.CreateRetVoid(); // Required at the end of the function + EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr); + EXPECT_FALSE(verifyModule(*M, &errs())); +} + TEST_F(OpenMPIRBuilderTest, CreateSections) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; @@ -3607,6 +3638,7 @@ ASSERT_EQ(NumBodiesGenerated, 2U); ASSERT_EQ(NumFiniCBCalls, 1U); + EXPECT_FALSE(verifyModule(*M, &errs())); } TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) {