diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3715,13 +3715,52 @@ static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { if (S.hasCancel()) return false; - for (OMPClause *C : S.clauses()) - if (!isa(C)) - return false; + for (OMPClause *C : S.clauses()) { + if (isa(C)) + continue; + + if (auto *SC = dyn_cast(C)) { + if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) + return false; + if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) + return false; + switch (SC->getScheduleKind()) { + case OMPC_SCHEDULE_auto: + case OMPC_SCHEDULE_dynamic: + case OMPC_SCHEDULE_runtime: + case OMPC_SCHEDULE_guided: + case OMPC_SCHEDULE_static: + continue; + case OMPC_SCHEDULE_unknown: + return false; + } + } + + return false; + } return true; } +static llvm::omp::ScheduleKind +convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) { + switch (ScheduleClauseKind) { + case OMPC_SCHEDULE_unknown: + return llvm::omp::OMP_SCHEDULE_Default; + case OMPC_SCHEDULE_auto: + return llvm::omp::OMP_SCHEDULE_Auto; + case OMPC_SCHEDULE_dynamic: + return llvm::omp::OMP_SCHEDULE_Dynamic; + case OMPC_SCHEDULE_guided: + return llvm::omp::OMP_SCHEDULE_Guided; + case OMPC_SCHEDULE_runtime: + return llvm::omp::OMP_SCHEDULE_Runtime; + case OMPC_SCHEDULE_static: + return llvm::omp::OMP_SCHEDULE_Static; + } + llvm_unreachable("Unhandled schedule kind"); +} + void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; bool UseOMPIRBuilder = @@ -3730,18 +3769,29 @@ UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { // Use the OpenMPIRBuilder if enabled. if (UseOMPIRBuilder) { + bool NeedsBarrier = !S.getSingleClause(); + + llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default; + llvm::Value *ChunkSize = nullptr; + if (auto *SchedClause = S.getSingleClause()) { + SchedKind = + convertClauseKindToSchedKind(SchedClause->getScheduleKind()); + if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize()) + ChunkSize = EmitScalarExpr(ChunkSizeExpr); + } + // Emit the associated statement and get its loop representation. const Stmt *Inner = S.getRawStmt(); llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1); - bool NeedsBarrier = !S.getSingleClause(); llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); OMPBuilder.applyWorkshareLoop(Builder.getCurrentDebugLocation(), CLI, - AllocaIP, NeedsBarrier); + AllocaIP, NeedsBarrier, SchedKind, + ChunkSize); return; } diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1366,7 +1366,7 @@ // CHECK3-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -1402,7 +1402,7 @@ // CHECK3-NEXT: store i32 1, i32* [[P_UPPERBOUND29]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_STRIDE30]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 0) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]] @@ -2002,7 +2002,7 @@ // CHECK4-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -2038,7 +2038,7 @@ // CHECK4-NEXT: store i32 1, i32* [[P_UPPERBOUND29]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_STRIDE30]], align 4 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 0) // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4 // CHECK4-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]] @@ -3878,7 +3878,7 @@ // CHECK9-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -3914,7 +3914,7 @@ // CHECK9-NEXT: store i32 1, i32* [[P_UPPERBOUND29]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_STRIDE30]], align 4 // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 0) // CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]] @@ -4514,7 +4514,7 @@ // CHECK10-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -4550,7 +4550,7 @@ // CHECK10-NEXT: store i32 1, i32* [[P_UPPERBOUND29]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_STRIDE30]], align 4 // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 0) // CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]] diff --git a/clang/test/OpenMP/irbuilder_for_iterator.cpp b/clang/test/OpenMP/irbuilder_for_iterator.cpp --- a/clang/test/OpenMP/irbuilder_for_iterator.cpp +++ b/clang/test/OpenMP/irbuilder_for_iterator.cpp @@ -59,7 +59,7 @@ // CHECK-NEXT: store i64 [[TMP2]], i64* [[P_UPPERBOUND]], align 8 // CHECK-NEXT: store i64 1, i64* [[P_STRIDE]], align 8 // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 1) +// CHECK-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 0) // CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[P_LOWERBOUND]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[P_UPPERBOUND]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[TMP3]] diff --git a/clang/test/OpenMP/irbuilder_for_rangefor.cpp b/clang/test/OpenMP/irbuilder_for_rangefor.cpp --- a/clang/test/OpenMP/irbuilder_for_rangefor.cpp +++ b/clang/test/OpenMP/irbuilder_for_rangefor.cpp @@ -77,7 +77,7 @@ // CHECK-NEXT: store i64 [[TMP5]], i64* [[P_UPPERBOUND]], align 8 // CHECK-NEXT: store i64 1, i64* [[P_STRIDE]], align 8 // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 1) +// CHECK-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 0) // CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[P_LOWERBOUND]], align 8 // CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[P_UPPERBOUND]], align 8 // CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], [[TMP6]] diff --git a/clang/test/OpenMP/irbuilder_for_unsigned.c b/clang/test/OpenMP/irbuilder_for_unsigned.c --- a/clang/test/OpenMP/irbuilder_for_unsigned.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned.c @@ -47,7 +47,7 @@ // CHECK-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]] diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_for_unsigned_auto.c copy from clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c copy to clang/test/OpenMP/irbuilder_for_unsigned_auto.c --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_auto.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for( +// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_auto( // CHECK-NEXT: [[ENTRY:.*]]: // CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 // CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 @@ -23,7 +23,7 @@ // CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 // CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 // CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 +// CHECK-NEXT: store i32 33, i32* %[[I]], align 4 // CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 // CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 @@ -34,113 +34,71 @@ // CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: store i32 %[[DOTCOUNT]], i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] -// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]] -// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 2 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] +// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 38, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1) +// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] +// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]: +// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ %[[LB:.+]], %[[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 2, %[[TMP12]] -// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] +// CHECK-NEXT: [[OMP_LOOP_COND]]: +// CHECK-NEXT: %[[UB:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[UB]] +// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_PREHEADER_OUTER_COND]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP16]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP19]], %[[TMP22]] -// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP25]] -// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM7]] +// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]]) +// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8 +// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP4]] to i64 +// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]] +// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4 +// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8 +// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM2:.+]] = zext i32 %[[TMP7]] to i64 +// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]] +// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4 +// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] +// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8 +// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM4:.+]] = zext i32 %[[TMP10]] to i64 +// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]] +// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4 +// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] +// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8 +// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM7:.+]] = zext i32 %[[TMP13]] to i64 +// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]] // CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] +// CHECK-NEXT: br label %[[OMP_LOOP_INC]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] +// CHECK-NEXT: [[OMP_LOOP_INC]]: +// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_LOOP_EXIT:.*]]: // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) // CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: // CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_AFTER]]: // CHECK-NEXT: ret void +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_PREHEADER_OUTER_COND]]: +// CHECK-NEXT: %[[TMP14:.+]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]]) +// CHECK-NEXT: %[[TMP15:.+]] = icmp ne i32 %[[TMP14]], 0 +// CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: %[[LB]] = sub i32 %[[TMP16]], 1 +// CHECK-NEXT: br i1 %[[TMP15]], label %[[OMP_LOOP_HEADER]], label %[[OMP_LOOP_EXIT]] // CHECK-NEXT: } -void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { -#pragma omp for -#pragma omp unroll partial(2) - for (int i = 0; i < 2; i++) { +extern "C" void workshareloop_unsigned_auto(float *a, float *b, float *c, float *d) { +#pragma omp for schedule(auto) + for (unsigned i = 33; i < 32000000; i += 7) { a[i] = b[i] * c[i] * d[i]; } } @@ -161,17 +119,17 @@ // CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 // CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: store i32 32000000, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 7, i32* %[[DOTSTEP]], align 4 // CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] +// CHECK-NEXT: %[[CMP:.+]] = icmp ult i32 %[[TMP4]], %[[TMP5]] // CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_TRUE]]: // CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] +// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 // CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] // CHECK-NEXT: br label %[[COND_END:.+]] @@ -199,7 +157,7 @@ // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 // CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] +// CHECK-NEXT: %[[MUL:.+]] = mul i32 7, %[[TMP3]] // CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] // CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 @@ -208,8 +166,5 @@ // CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} +// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} // CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2} diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c copy from clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c copy to clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for( +// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_dynamic( // CHECK-NEXT: [[ENTRY:.*]]: // CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 // CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 @@ -23,7 +23,7 @@ // CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 // CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 // CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 +// CHECK-NEXT: store i32 33, i32* %[[I]], align 4 // CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 // CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 @@ -34,113 +34,71 @@ // CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: store i32 %[[DOTCOUNT]], i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] -// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]] -// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 2 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] +// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 35, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1) +// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] +// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]: +// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ %[[LB:.+]], %[[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 2, %[[TMP12]] -// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] +// CHECK-NEXT: [[OMP_LOOP_COND]]: +// CHECK-NEXT: %[[UB:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[UB]] +// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_PREHEADER_OUTER_COND]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP16]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP19]], %[[TMP22]] -// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP25]] -// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM7]] +// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]]) +// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8 +// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP4]] to i64 +// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]] +// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4 +// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8 +// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM2:.+]] = zext i32 %[[TMP7]] to i64 +// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]] +// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4 +// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] +// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8 +// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM4:.+]] = zext i32 %[[TMP10]] to i64 +// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]] +// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4 +// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] +// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8 +// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM7:.+]] = zext i32 %[[TMP13]] to i64 +// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]] // CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] +// CHECK-NEXT: br label %[[OMP_LOOP_INC]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] +// CHECK-NEXT: [[OMP_LOOP_INC]]: +// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_LOOP_EXIT:.*]]: // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) // CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: // CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_AFTER]]: // CHECK-NEXT: ret void +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_PREHEADER_OUTER_COND]]: +// CHECK-NEXT: %[[TMP14:.+]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]]) +// CHECK-NEXT: %[[TMP15:.+]] = icmp ne i32 %[[TMP14]], 0 +// CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: %[[LB]] = sub i32 %[[TMP16]], 1 +// CHECK-NEXT: br i1 %[[TMP15]], label %[[OMP_LOOP_HEADER]], label %[[OMP_LOOP_EXIT]] // CHECK-NEXT: } -void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { -#pragma omp for -#pragma omp unroll partial(2) - for (int i = 0; i < 2; i++) { +extern "C" void workshareloop_unsigned_dynamic(float *a, float *b, float *c, float *d) { +#pragma omp for schedule(dynamic) + for (unsigned i = 33; i < 32000000; i += 7) { a[i] = b[i] * c[i] * d[i]; } } @@ -161,17 +119,17 @@ // CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 // CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: store i32 32000000, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 7, i32* %[[DOTSTEP]], align 4 // CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] +// CHECK-NEXT: %[[CMP:.+]] = icmp ult i32 %[[TMP4]], %[[TMP5]] // CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_TRUE]]: // CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] +// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 // CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] // CHECK-NEXT: br label %[[COND_END:.+]] @@ -199,7 +157,7 @@ // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 // CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] +// CHECK-NEXT: %[[MUL:.+]] = mul i32 7, %[[TMP3]] // CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] // CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 @@ -208,8 +166,5 @@ // CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} +// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} // CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2} diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c copy from clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c copy to clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for( +// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_dynamic_chunked( // CHECK-NEXT: [[ENTRY:.*]]: // CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 // CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 @@ -23,7 +23,7 @@ // CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 // CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 // CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 +// CHECK-NEXT: store i32 33, i32* %[[I]], align 4 // CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 // CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 @@ -34,113 +34,71 @@ // CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: store i32 %[[DOTCOUNT]], i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] -// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]] -// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 2 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] +// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 35, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 5) +// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] +// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]: +// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ %[[LB:.+]], %[[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 2, %[[TMP12]] -// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] +// CHECK-NEXT: [[OMP_LOOP_COND]]: +// CHECK-NEXT: %[[UB:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[UB]] +// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_PREHEADER_OUTER_COND]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP16]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP19]], %[[TMP22]] -// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP25]] -// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM7]] +// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]]) +// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8 +// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP4]] to i64 +// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]] +// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4 +// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8 +// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM2:.+]] = zext i32 %[[TMP7]] to i64 +// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]] +// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4 +// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] +// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8 +// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM4:.+]] = zext i32 %[[TMP10]] to i64 +// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]] +// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4 +// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] +// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8 +// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM7:.+]] = zext i32 %[[TMP13]] to i64 +// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]] // CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] +// CHECK-NEXT: br label %[[OMP_LOOP_INC]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] +// CHECK-NEXT: [[OMP_LOOP_INC]]: +// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_LOOP_EXIT:.*]]: // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) // CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: // CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_AFTER]]: // CHECK-NEXT: ret void +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_PREHEADER_OUTER_COND]]: +// CHECK-NEXT: %[[TMP14:.+]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]]) +// CHECK-NEXT: %[[TMP15:.+]] = icmp ne i32 %[[TMP14]], 0 +// CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: %[[LB]] = sub i32 %[[TMP16]], 1 +// CHECK-NEXT: br i1 %[[TMP15]], label %[[OMP_LOOP_HEADER]], label %[[OMP_LOOP_EXIT]] // CHECK-NEXT: } -void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { -#pragma omp for -#pragma omp unroll partial(2) - for (int i = 0; i < 2; i++) { +extern "C" void workshareloop_unsigned_dynamic_chunked(float *a, float *b, float *c, float *d) { +#pragma omp for schedule(dynamic, 5) + for (unsigned i = 33; i < 32000000; i += 7) { a[i] = b[i] * c[i] * d[i]; } } @@ -161,17 +119,17 @@ // CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 // CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: store i32 32000000, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 7, i32* %[[DOTSTEP]], align 4 // CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] +// CHECK-NEXT: %[[CMP:.+]] = icmp ult i32 %[[TMP4]], %[[TMP5]] // CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_TRUE]]: // CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] +// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 // CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] // CHECK-NEXT: br label %[[COND_END:.+]] @@ -199,7 +157,7 @@ // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 // CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] +// CHECK-NEXT: %[[MUL:.+]] = mul i32 7, %[[TMP3]] // CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] // CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 @@ -208,8 +166,5 @@ // CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} +// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} // CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2} diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_for_unsigned_runtime.c copy from clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c copy to clang/test/OpenMP/irbuilder_for_unsigned_runtime.c --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_runtime.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for( +// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_runtime( // CHECK-NEXT: [[ENTRY:.*]]: // CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 // CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 @@ -23,7 +23,7 @@ // CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 // CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 // CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 +// CHECK-NEXT: store i32 33, i32* %[[I]], align 4 // CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 // CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 @@ -34,113 +34,71 @@ // CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: store i32 %[[DOTCOUNT]], i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] -// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]] -// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 2 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] +// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 37, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1) +// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] +// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]: +// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ %[[LB:.+]], %[[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 2, %[[TMP12]] -// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] +// CHECK-NEXT: [[OMP_LOOP_COND]]: +// CHECK-NEXT: %[[UB:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[UB]] +// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_PREHEADER_OUTER_COND]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP16]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP19]], %[[TMP22]] -// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP25]] -// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM7]] +// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]]) +// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8 +// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP4]] to i64 +// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]] +// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4 +// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8 +// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM2:.+]] = zext i32 %[[TMP7]] to i64 +// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]] +// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4 +// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] +// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8 +// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM4:.+]] = zext i32 %[[TMP10]] to i64 +// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]] +// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4 +// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] +// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8 +// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM7:.+]] = zext i32 %[[TMP13]] to i64 +// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]] // CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] +// CHECK-NEXT: br label %[[OMP_LOOP_INC]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] +// CHECK-NEXT: [[OMP_LOOP_INC]]: +// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_LOOP_EXIT:.*]]: // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) // CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: // CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_AFTER]]: // CHECK-NEXT: ret void +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_PREHEADER_OUTER_COND]]: +// CHECK-NEXT: %[[TMP14:.+]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]]) +// CHECK-NEXT: %[[TMP15:.+]] = icmp ne i32 %[[TMP14]], 0 +// CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: %[[LB]] = sub i32 %[[TMP16]], 1 +// CHECK-NEXT: br i1 %[[TMP15]], label %[[OMP_LOOP_HEADER]], label %[[OMP_LOOP_EXIT]] // CHECK-NEXT: } -void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { -#pragma omp for -#pragma omp unroll partial(2) - for (int i = 0; i < 2; i++) { +extern "C" void workshareloop_unsigned_runtime(float *a, float *b, float *c, float *d) { +#pragma omp for schedule(runtime) + for (unsigned i = 33; i < 32000000; i += 7) { a[i] = b[i] * c[i] * d[i]; } } @@ -161,17 +119,17 @@ // CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 // CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: store i32 32000000, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 7, i32* %[[DOTSTEP]], align 4 // CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] +// CHECK-NEXT: %[[CMP:.+]] = icmp ult i32 %[[TMP4]], %[[TMP5]] // CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_TRUE]]: // CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] +// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 // CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] // CHECK-NEXT: br label %[[COND_END:.+]] @@ -199,7 +157,7 @@ // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 // CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] +// CHECK-NEXT: %[[MUL:.+]] = mul i32 7, %[[TMP3]] // CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] // CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 @@ -208,8 +166,5 @@ // CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} +// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} // CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2} diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c copy from clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c copy to clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for( +// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_static_chunked( // CHECK-NEXT: [[ENTRY:.*]]: // CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 // CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 @@ -23,7 +23,7 @@ // CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 // CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 // CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 +// CHECK-NEXT: store i32 33, i32* %[[I]], align 4 // CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 // CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 @@ -34,113 +34,112 @@ // CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) // CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: %[[TMP3:.+]] = sub i32 %[[DOTCOUNT]], 1 +// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] -// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]] -// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 2 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 2, %[[TMP12]] -// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 33, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 5) +// CHECK-NEXT: %[[OMP_FIRSTCHUNK_LB:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: %[[OMP_FIRSTCHUNK_UB:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: %[[TMP4:.+]] = add i32 %[[OMP_FIRSTCHUNK_UB]], 1 +// CHECK-NEXT: %[[OMP_CHUNK_RANGE:.+]] = sub i32 %[[TMP4]], %[[OMP_FIRSTCHUNK_LB]] +// CHECK-NEXT: %[[OMP_DISPATCH_STRIDE:.+]] = load i32, i32* %[[P_STRIDE]], align 4 +// CHECK-NEXT: %[[TMP5:.+]] = sub nuw i32 %[[DOTCOUNT]], %[[OMP_FIRSTCHUNK_LB]] +// CHECK-NEXT: %[[TMP6:.+]] = icmp ule i32 %[[DOTCOUNT]], %[[OMP_FIRSTCHUNK_LB]] +// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[TMP5]], 1 +// CHECK-NEXT: %[[TMP8:.+]] = udiv i32 %[[TMP7]], %[[OMP_DISPATCH_STRIDE]] +// CHECK-NEXT: %[[TMP9:.+]] = add i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[TMP10:.+]] = icmp ule i32 %[[TMP5]], %[[OMP_DISPATCH_STRIDE]] +// CHECK-NEXT: %[[TMP11:.+]] = select i1 %[[TMP10]], i32 1, i32 %[[TMP9]] +// CHECK-NEXT: %[[OMP_DISPATCH_TRIPCOUNT:.+]] = select i1 %[[TMP6]], i32 0, i32 %[[TMP11]] +// CHECK-NEXT: br label %[[OMP_DISPATCH_PREHEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_DISPATCH_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_DISPATCH_HEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_DISPATCH_HEADER]]: +// CHECK-NEXT: %[[OMP_DISPATCH_IV:.+]] = phi i32 [ 0, %[[OMP_DISPATCH_PREHEADER]] ], [ %[[OMP_DISPATCH_NEXT:.+]], %[[OMP_DISPATCH_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_DISPATCH_COND:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_DISPATCH_COND]]: +// CHECK-NEXT: %[[OMP_DISPATCH_CMP:.+]] = icmp ult i32 %[[OMP_DISPATCH_IV]], %[[OMP_DISPATCH_TRIPCOUNT]] +// CHECK-NEXT: br i1 %[[OMP_DISPATCH_CMP]], label %[[OMP_DISPATCH_BODY:.+]], label %[[OMP_DISPATCH_EXIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_DISPATCH_BODY]]: +// CHECK-NEXT: %[[TMP12:.+]] = mul i32 %[[OMP_DISPATCH_IV]], %[[OMP_DISPATCH_STRIDE]] +// CHECK-NEXT: %[[TMP13:.+]] = add i32 %[[TMP12]], %[[OMP_FIRSTCHUNK_LB]] +// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER9:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_DISPATCH_INC]]: +// CHECK-NEXT: %[[OMP_DISPATCH_NEXT]] = add nuw i32 %[[OMP_DISPATCH_IV]], 1 +// CHECK-NEXT: br label %[[OMP_DISPATCH_HEADER]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_DISPATCH_EXIT]]: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM10:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK-NEXT: br label %[[OMP_DISPATCH_AFTER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_DISPATCH_AFTER]]: +// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_PREHEADER9]]: +// CHECK-NEXT: %[[TMP14:.+]] = add i32 %[[TMP13]], %[[OMP_CHUNK_RANGE]] +// CHECK-NEXT: %[[OMP_CHUNK_IS_LAST:.+]] = icmp uge i32 %[[TMP14]], %[[DOTCOUNT]] +// CHECK-NEXT: %[[TMP15:.+]] = sub i32 %[[DOTCOUNT]], %[[TMP13]] +// CHECK-NEXT: %[[OMP_CHUNK_TRIPCOUNT:.+]] = select i1 %[[OMP_CHUNK_IS_LAST]], i32 %[[TMP15]], i32 %[[OMP_CHUNK_RANGE]] +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_HEADER]]: +// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER9]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_COND]]: +// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[OMP_CHUNK_TRIPCOUNT]] +// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_BODY]]: +// CHECK-NEXT: %[[TMP16:.+]] = add i32 %[[OMP_LOOP_IV]], %[[TMP13]] // CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP16]], %struct.anon.0* %[[AGG_CAPTURED1]]) // CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[B_ADDR]], align 8 // CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64 +// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP18]] to i64 // CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM]] // CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX]], align 4 // CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[C_ADDR]], align 8 // CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64 +// CHECK-NEXT: %[[IDXPROM2:.+]] = zext i32 %[[TMP21]] to i64 // CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM2]] // CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX3]], align 4 // CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP19]], %[[TMP22]] // CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[D_ADDR]], align 8 // CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP24]] to i64 +// CHECK-NEXT: %[[IDXPROM4:.+]] = zext i32 %[[TMP24]] to i64 // CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM4]] // CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX5]], align 4 // CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP25]] // CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[A_ADDR]], align 8 // CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP27]] to i64 +// CHECK-NEXT: %[[IDXPROM7:.+]] = zext i32 %[[TMP27]] to i64 // CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM7]] // CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] +// CHECK-NEXT: br label %[[OMP_LOOP_INC]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] +// CHECK-NEXT: [[OMP_LOOP_INC]]: +// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] // CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] +// CHECK-NEXT: [[OMP_LOOP_EXIT]]: +// CHECK-NEXT: br label %[[OMP_DISPATCH_INC]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_AFTER]]: // CHECK-NEXT: ret void // CHECK-NEXT: } -void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { -#pragma omp for -#pragma omp unroll partial(2) - for (int i = 0; i < 2; i++) { +extern "C" void workshareloop_unsigned_static_chunked(float *a, float *b, float *c, float *d) { +#pragma omp for schedule(static, 5) + for (unsigned i = 33; i < 32000000; i += 7) { a[i] = b[i] * c[i] * d[i]; } } @@ -161,17 +160,17 @@ // CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 // CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: store i32 32000000, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 7, i32* %[[DOTSTEP]], align 4 // CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] +// CHECK-NEXT: %[[CMP:.+]] = icmp ult i32 %[[TMP4]], %[[TMP5]] // CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_TRUE]]: // CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] +// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 // CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] // CHECK-NEXT: br label %[[COND_END:.+]] @@ -199,7 +198,7 @@ // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 // CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] +// CHECK-NEXT: %[[MUL:.+]] = mul i32 7, %[[TMP3]] // CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] // CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 @@ -208,8 +207,5 @@ // CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} +// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} // CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2} diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c --- a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c +++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c @@ -134,3 +134,444 @@ } #endif + +// CHECK-LABEL: define {{.*}}@_Z17nested_parallel_0v( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: br label %[[OMP_PARALLEL:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PARALLEL]]: +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*)) +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT12:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT12]]: +// CHECK-NEXT: br label %[[OMP_PAR_EXIT_SPLIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_EXIT_SPLIT]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@_Z17nested_parallel_0v..omp_par.1( +// CHECK-NEXT: [[OMP_PAR_ENTRY:.*]]: +// CHECK-NEXT: %[[TID_ADDR_LOCAL:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[TMP0:.+]] = load i32, i32* %[[TID_ADDR:.+]], align 4 +// CHECK-NEXT: store i32 %[[TMP0]], i32* %[[TID_ADDR_LOCAL]], align 4 +// CHECK-NEXT: %[[TID:.+]] = load i32, i32* %[[TID_ADDR_LOCAL]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_REGION:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_REGION]]: +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM1:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: br label %[[OMP_PARALLEL:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PARALLEL]]: +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par to void (i32*, i32*, ...)*)) +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT]]: +// CHECK-NEXT: br label %[[OMP_PAR_EXIT7_SPLIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_EXIT7_SPLIT]]: +// CHECK-NEXT: br label %[[OMP_PAR_PRE_FINALIZE:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_PRE_FINALIZE]]: +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT12_EXITSTUB:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT12_EXITSTUB]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@_Z17nested_parallel_0v..omp_par( +// CHECK-NEXT: [[OMP_PAR_ENTRY4:.*]]: +// CHECK-NEXT: %[[TID_ADDR_LOCAL8:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[TMP0:.+]] = load i32, i32* %[[TID_ADDR2:.+]], align 4 +// CHECK-NEXT: store i32 %[[TMP0]], i32* %[[TID_ADDR_LOCAL8]], align 4 +// CHECK-NEXT: %[[TID9:.+]] = load i32, i32* %[[TID_ADDR_LOCAL8]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_REGION5:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_REGION5]]: +// CHECK-NEXT: br label %[[OMP_PAR_PRE_FINALIZE6:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_PRE_FINALIZE6]]: +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT_EXITSTUB:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@_Z17nested_parallel_1Pfid( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[STRUCTARG14:.+]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8 +// CHECK-NEXT: %[[STRUCTARG:.+]] = alloca { i32*, double*, float** }, align 8 +// CHECK-NEXT: %[[R_ADDR:.+]] = alloca float*, align 8 +// CHECK-NEXT: %[[A_ADDR:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[B_ADDR:.+]] = alloca double, align 8 +// CHECK-NEXT: store float* %[[R:.+]], float** %[[R_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[A:.+]], i32* %[[A_ADDR]], align 4 +// CHECK-NEXT: store double %[[B:.+]], double* %[[B_ADDR]], align 8 +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: br label %[[OMP_PARALLEL:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PARALLEL]]: +// CHECK-NEXT: %[[GEP_STRUCTARG:.+]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* %[[STRUCTARG14]], i32 0, i32 0 +// CHECK-NEXT: store { i32*, double*, float** }* %[[STRUCTARG]], { i32*, double*, float** }** %[[GEP_STRUCTARG]], align 8 +// CHECK-NEXT: %[[GEP_A_ADDR15:.+]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* %[[STRUCTARG14]], i32 0, i32 1 +// CHECK-NEXT: store i32* %[[A_ADDR]], i32** %[[GEP_A_ADDR15]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR16:.+]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* %[[STRUCTARG14]], i32 0, i32 2 +// CHECK-NEXT: store double* %[[B_ADDR]], double** %[[GEP_B_ADDR16]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR17:.+]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* %[[STRUCTARG14]], i32 0, i32 3 +// CHECK-NEXT: store float** %[[R_ADDR]], float*** %[[GEP_R_ADDR17]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* %[[STRUCTARG14]]) +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT13:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT13]]: +// CHECK-NEXT: br label %[[OMP_PAR_EXIT_SPLIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_EXIT_SPLIT]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@_Z17nested_parallel_1Pfid..omp_par.2( +// CHECK-NEXT: [[OMP_PAR_ENTRY:.*]]: +// CHECK-NEXT: %[[GEP_STRUCTARG:.+]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* %[[TMP0:.+]], i32 0, i32 0 +// CHECK-NEXT: %[[LOADGEP_STRUCTARG:.+]] = load { i32*, double*, float** }*, { i32*, double*, float** }** %[[GEP_STRUCTARG]], align 8 +// CHECK-NEXT: %[[GEP_A_ADDR:.+]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* %[[TMP0]], i32 0, i32 1 +// CHECK-NEXT: %[[LOADGEP_A_ADDR:.+]] = load i32*, i32** %[[GEP_A_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR:.+]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* %[[TMP0]], i32 0, i32 2 +// CHECK-NEXT: %[[LOADGEP_B_ADDR:.+]] = load double*, double** %[[GEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR:.+]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* %[[TMP0]], i32 0, i32 3 +// CHECK-NEXT: %[[LOADGEP_R_ADDR:.+]] = load float**, float*** %[[GEP_R_ADDR]], align 8 +// CHECK-NEXT: %[[TID_ADDR_LOCAL:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[TMP1:.+]] = load i32, i32* %[[TID_ADDR:.+]], align 4 +// CHECK-NEXT: store i32 %[[TMP1]], i32* %[[TID_ADDR_LOCAL]], align 4 +// CHECK-NEXT: %[[TID:.+]] = load i32, i32* %[[TID_ADDR_LOCAL]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_REGION:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_REGION]]: +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM1:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: br label %[[OMP_PARALLEL:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PARALLEL]]: +// CHECK-NEXT: %[[GEP_A_ADDR1:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[LOADGEP_STRUCTARG]], i32 0, i32 0 +// CHECK-NEXT: store i32* %[[LOADGEP_A_ADDR]], i32** %[[GEP_A_ADDR1]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR2:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[LOADGEP_STRUCTARG]], i32 0, i32 1 +// CHECK-NEXT: store double* %[[LOADGEP_B_ADDR]], double** %[[GEP_B_ADDR2]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR3:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[LOADGEP_STRUCTARG]], i32 0, i32 2 +// CHECK-NEXT: store float** %[[LOADGEP_R_ADDR]], float*** %[[GEP_R_ADDR3]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par to void (i32*, i32*, ...)*), { i32*, double*, float** }* %[[LOADGEP_STRUCTARG]]) +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT]]: +// CHECK-NEXT: br label %[[OMP_PAR_EXIT7_SPLIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_EXIT7_SPLIT]]: +// CHECK-NEXT: br label %[[OMP_PAR_PRE_FINALIZE:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_PRE_FINALIZE]]: +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT13_EXITSTUB:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT13_EXITSTUB]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@_Z17nested_parallel_1Pfid..omp_par( +// CHECK-NEXT: [[OMP_PAR_ENTRY4:.*]]: +// CHECK-NEXT: %[[GEP_A_ADDR:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[TMP0:.+]], i32 0, i32 0 +// CHECK-NEXT: %[[LOADGEP_A_ADDR:.+]] = load i32*, i32** %[[GEP_A_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[TMP0]], i32 0, i32 1 +// CHECK-NEXT: %[[LOADGEP_B_ADDR:.+]] = load double*, double** %[[GEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[TMP0]], i32 0, i32 2 +// CHECK-NEXT: %[[LOADGEP_R_ADDR:.+]] = load float**, float*** %[[GEP_R_ADDR]], align 8 +// CHECK-NEXT: %[[TID_ADDR_LOCAL8:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[TMP1:.+]] = load i32, i32* %[[TID_ADDR2:.+]], align 4 +// CHECK-NEXT: store i32 %[[TMP1]], i32* %[[TID_ADDR_LOCAL8]], align 4 +// CHECK-NEXT: %[[TID9:.+]] = load i32, i32* %[[TID_ADDR_LOCAL8]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_REGION5:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_REGION5]]: +// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[LOADGEP_A_ADDR]], align 4 +// CHECK-NEXT: %[[CONV:.+]] = sitofp i32 %[[TMP2]] to double +// CHECK-NEXT: %[[TMP3:.+]] = load double, double* %[[LOADGEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[ADD:.+]] = fadd double %[[CONV]], %[[TMP3]] +// CHECK-NEXT: %[[CONV12:.+]] = fptrunc double %[[ADD]] to float +// CHECK-NEXT: %[[TMP4:.+]] = load float*, float** %[[LOADGEP_R_ADDR]], align 8 +// CHECK-NEXT: store float %[[CONV12]], float* %[[TMP4]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_PRE_FINALIZE6:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_PRE_FINALIZE6]]: +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT_EXITSTUB:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@_Z17nested_parallel_2Pfid( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[STRUCTARG68:.+]] = alloca { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, align 8 +// CHECK-NEXT: %[[STRUCTARG64:.+]] = alloca { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, align 8 +// CHECK-NEXT: %[[STRUCTARG59:.+]] = alloca { i32*, double*, float** }, align 8 +// CHECK-NEXT: %[[STRUCTARG:.+]] = alloca { i32*, double*, float** }, align 8 +// CHECK-NEXT: %[[R_ADDR:.+]] = alloca float*, align 8 +// CHECK-NEXT: %[[A_ADDR:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[B_ADDR:.+]] = alloca double, align 8 +// CHECK-NEXT: store float* %[[R:.+]], float** %[[R_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[A:.+]], i32* %[[A_ADDR]], align 4 +// CHECK-NEXT: store double %[[B:.+]], double* %[[B_ADDR]], align 8 +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: br label %[[OMP_PARALLEL:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PARALLEL]]: +// CHECK-NEXT: %[[GEP_A_ADDR:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[STRUCTARG68]], i32 0, i32 0 +// CHECK-NEXT: store i32* %[[A_ADDR]], i32** %[[GEP_A_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[STRUCTARG68]], i32 0, i32 1 +// CHECK-NEXT: store double* %[[B_ADDR]], double** %[[GEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[STRUCTARG68]], i32 0, i32 2 +// CHECK-NEXT: store float** %[[R_ADDR]], float*** %[[GEP_R_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_STRUCTARG64:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[STRUCTARG68]], i32 0, i32 3 +// CHECK-NEXT: store { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[STRUCTARG64]], { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }** %[[GEP_STRUCTARG64]], align 8 +// CHECK-NEXT: %[[GEP_STRUCTARG69:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[STRUCTARG68]], i32 0, i32 4 +// CHECK-NEXT: store { i32*, double*, float** }* %[[STRUCTARG]], { i32*, double*, float** }** %[[GEP_STRUCTARG69]], align 8 +// CHECK-NEXT: %[[GEP_STRUCTARG5970:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[STRUCTARG68]], i32 0, i32 5 +// CHECK-NEXT: store { i32*, double*, float** }* %[[STRUCTARG59]], { i32*, double*, float** }** %[[GEP_STRUCTARG5970]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }*)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[STRUCTARG68]]) +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT55:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT55]]: +// CHECK-NEXT: br label %[[OMP_PAR_EXIT_SPLIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_EXIT_SPLIT]]: +// CHECK-NEXT: %[[TMP0:.+]] = load i32, i32* %[[A_ADDR]], align 4 +// CHECK-NEXT: %[[CONV56:.+]] = sitofp i32 %[[TMP0]] to double +// CHECK-NEXT: %[[TMP1:.+]] = load double, double* %[[B_ADDR]], align 8 +// CHECK-NEXT: %[[ADD57:.+]] = fadd double %[[CONV56]], %[[TMP1]] +// CHECK-NEXT: %[[CONV58:.+]] = fptrunc double %[[ADD57]] to float +// CHECK-NEXT: %[[TMP2:.+]] = load float*, float** %[[R_ADDR]], align 8 +// CHECK-NEXT: store float %[[CONV58]], float* %[[TMP2]], align 4 +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@_Z17nested_parallel_2Pfid..omp_par.5( +// CHECK-NEXT: [[OMP_PAR_ENTRY:.*]]: +// CHECK-NEXT: %[[GEP_A_ADDR:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[TMP0:.+]], i32 0, i32 0 +// CHECK-NEXT: %[[LOADGEP_A_ADDR:.+]] = load i32*, i32** %[[GEP_A_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[TMP0]], i32 0, i32 1 +// CHECK-NEXT: %[[LOADGEP_B_ADDR:.+]] = load double*, double** %[[GEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[TMP0]], i32 0, i32 2 +// CHECK-NEXT: %[[LOADGEP_R_ADDR:.+]] = load float**, float*** %[[GEP_R_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_STRUCTARG64:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[TMP0]], i32 0, i32 3 +// CHECK-NEXT: %[[LOADGEP_STRUCTARG64:.+]] = load { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }** %[[GEP_STRUCTARG64]], align 8 +// CHECK-NEXT: %[[GEP_STRUCTARG:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[TMP0]], i32 0, i32 4 +// CHECK-NEXT: %[[LOADGEP_STRUCTARG:.+]] = load { i32*, double*, float** }*, { i32*, double*, float** }** %[[GEP_STRUCTARG]], align 8 +// CHECK-NEXT: %[[GEP_STRUCTARG59:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[TMP0]], i32 0, i32 5 +// CHECK-NEXT: %[[LOADGEP_STRUCTARG59:.+]] = load { i32*, double*, float** }*, { i32*, double*, float** }** %[[GEP_STRUCTARG59]], align 8 +// CHECK-NEXT: %[[TID_ADDR_LOCAL:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[TMP1:.+]] = load i32, i32* %[[TID_ADDR:.+]], align 4 +// CHECK-NEXT: store i32 %[[TMP1]], i32* %[[TID_ADDR_LOCAL]], align 4 +// CHECK-NEXT: %[[TID:.+]] = load i32, i32* %[[TID_ADDR_LOCAL]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_REGION:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_REGION]]: +// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[LOADGEP_A_ADDR]], align 4 +// CHECK-NEXT: %[[CONV:.+]] = sitofp i32 %[[TMP2]] to double +// CHECK-NEXT: %[[TMP3:.+]] = load double, double* %[[LOADGEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[ADD:.+]] = fadd double %[[CONV]], %[[TMP3]] +// CHECK-NEXT: %[[CONV1:.+]] = fptrunc double %[[ADD]] to float +// CHECK-NEXT: %[[TMP4:.+]] = load float*, float** %[[LOADGEP_R_ADDR]], align 8 +// CHECK-NEXT: store float %[[CONV1]], float* %[[TMP4]], align 4 +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM2:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: br label %[[OMP_PARALLEL:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PARALLEL]]: +// CHECK-NEXT: %[[GEP_A_ADDR65:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[LOADGEP_STRUCTARG64]], i32 0, i32 0 +// CHECK-NEXT: store i32* %[[LOADGEP_A_ADDR]], i32** %[[GEP_A_ADDR65]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR66:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[LOADGEP_STRUCTARG64]], i32 0, i32 1 +// CHECK-NEXT: store double* %[[LOADGEP_B_ADDR]], double** %[[GEP_B_ADDR66]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR67:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[LOADGEP_STRUCTARG64]], i32 0, i32 2 +// CHECK-NEXT: store float** %[[LOADGEP_R_ADDR]], float*** %[[GEP_R_ADDR67]], align 8 +// CHECK-NEXT: %[[GEP_STRUCTARG1:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[LOADGEP_STRUCTARG64]], i32 0, i32 3 +// CHECK-NEXT: store { i32*, double*, float** }* %[[LOADGEP_STRUCTARG]], { i32*, double*, float** }** %[[GEP_STRUCTARG1]], align 8 +// CHECK-NEXT: %[[GEP_STRUCTARG592:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[LOADGEP_STRUCTARG64]], i32 0, i32 4 +// CHECK-NEXT: store { i32*, double*, float** }* %[[LOADGEP_STRUCTARG59]], { i32*, double*, float** }** %[[GEP_STRUCTARG592]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*)* @_Z17nested_parallel_2Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[LOADGEP_STRUCTARG64]]) +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT51:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT51]]: +// CHECK-NEXT: br label %[[OMP_PAR_EXIT8_SPLIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_EXIT8_SPLIT]]: +// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[LOADGEP_A_ADDR]], align 4 +// CHECK-NEXT: %[[CONV52:.+]] = sitofp i32 %[[TMP5]] to double +// CHECK-NEXT: %[[TMP6:.+]] = load double, double* %[[LOADGEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[ADD53:.+]] = fadd double %[[CONV52]], %[[TMP6]] +// CHECK-NEXT: %[[CONV54:.+]] = fptrunc double %[[ADD53]] to float +// CHECK-NEXT: %[[TMP7:.+]] = load float*, float** %[[LOADGEP_R_ADDR]], align 8 +// CHECK-NEXT: store float %[[CONV54]], float* %[[TMP7]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_PRE_FINALIZE:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_PRE_FINALIZE]]: +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT55_EXITSTUB:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT55_EXITSTUB]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@_Z17nested_parallel_2Pfid..omp_par.4( +// CHECK-NEXT: [[OMP_PAR_ENTRY5:.*]]: +// CHECK-NEXT: %[[GEP_A_ADDR:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[TMP0:.+]], i32 0, i32 0 +// CHECK-NEXT: %[[LOADGEP_A_ADDR:.+]] = load i32*, i32** %[[GEP_A_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[TMP0]], i32 0, i32 1 +// CHECK-NEXT: %[[LOADGEP_B_ADDR:.+]] = load double*, double** %[[GEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[TMP0]], i32 0, i32 2 +// CHECK-NEXT: %[[LOADGEP_R_ADDR:.+]] = load float**, float*** %[[GEP_R_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_STRUCTARG:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[TMP0]], i32 0, i32 3 +// CHECK-NEXT: %[[LOADGEP_STRUCTARG:.+]] = load { i32*, double*, float** }*, { i32*, double*, float** }** %[[GEP_STRUCTARG]], align 8 +// CHECK-NEXT: %[[GEP_STRUCTARG59:.+]] = getelementptr { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* %[[TMP0]], i32 0, i32 4 +// CHECK-NEXT: %[[LOADGEP_STRUCTARG59:.+]] = load { i32*, double*, float** }*, { i32*, double*, float** }** %[[GEP_STRUCTARG59]], align 8 +// CHECK-NEXT: %[[TID_ADDR_LOCAL9:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[TMP1:.+]] = load i32, i32* %[[TID_ADDR3:.+]], align 4 +// CHECK-NEXT: store i32 %[[TMP1]], i32* %[[TID_ADDR_LOCAL9]], align 4 +// CHECK-NEXT: %[[TID10:.+]] = load i32, i32* %[[TID_ADDR_LOCAL9]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_REGION6:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_REGION6]]: +// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[LOADGEP_A_ADDR]], align 4 +// CHECK-NEXT: %[[CONV13:.+]] = sitofp i32 %[[TMP2]] to double +// CHECK-NEXT: %[[TMP3:.+]] = load double, double* %[[LOADGEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[ADD14:.+]] = fadd double %[[CONV13]], %[[TMP3]] +// CHECK-NEXT: %[[CONV15:.+]] = fptrunc double %[[ADD14]] to float +// CHECK-NEXT: %[[TMP4:.+]] = load float*, float** %[[LOADGEP_R_ADDR]], align 8 +// CHECK-NEXT: store float %[[CONV15]], float* %[[TMP4]], align 4 +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM16:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: br label %[[OMP_PARALLEL:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PARALLEL]]: +// CHECK-NEXT: %[[GEP_A_ADDR1:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[LOADGEP_STRUCTARG]], i32 0, i32 0 +// CHECK-NEXT: store i32* %[[LOADGEP_A_ADDR]], i32** %[[GEP_A_ADDR1]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR2:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[LOADGEP_STRUCTARG]], i32 0, i32 1 +// CHECK-NEXT: store double* %[[LOADGEP_B_ADDR]], double** %[[GEP_B_ADDR2]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR3:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[LOADGEP_STRUCTARG]], i32 0, i32 2 +// CHECK-NEXT: store float** %[[LOADGEP_R_ADDR]], float*** %[[GEP_R_ADDR3]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_2Pfid..omp_par to void (i32*, i32*, ...)*), { i32*, double*, float** }* %[[LOADGEP_STRUCTARG]]) +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT]]: +// CHECK-NEXT: br label %[[OMP_PAR_EXIT22_SPLIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_EXIT22_SPLIT]]: +// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[LOADGEP_A_ADDR]], align 4 +// CHECK-NEXT: %[[CONV30:.+]] = sitofp i32 %[[TMP5]] to double +// CHECK-NEXT: %[[TMP6:.+]] = load double, double* %[[LOADGEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[ADD31:.+]] = fadd double %[[CONV30]], %[[TMP6]] +// CHECK-NEXT: %[[CONV32:.+]] = fptrunc double %[[ADD31]] to float +// CHECK-NEXT: %[[TMP7:.+]] = load float*, float** %[[LOADGEP_R_ADDR]], align 8 +// CHECK-NEXT: store float %[[CONV32]], float* %[[TMP7]], align 4 +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM33:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: br label %[[OMP_PARALLEL63:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PARALLEL63]]: +// CHECK-NEXT: %[[GEP_A_ADDR60:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[LOADGEP_STRUCTARG59]], i32 0, i32 0 +// CHECK-NEXT: store i32* %[[LOADGEP_A_ADDR]], i32** %[[GEP_A_ADDR60]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR61:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[LOADGEP_STRUCTARG59]], i32 0, i32 1 +// CHECK-NEXT: store double* %[[LOADGEP_B_ADDR]], double** %[[GEP_B_ADDR61]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR62:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[LOADGEP_STRUCTARG59]], i32 0, i32 2 +// CHECK-NEXT: store float** %[[LOADGEP_R_ADDR]], float*** %[[GEP_R_ADDR62]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_2Pfid..omp_par.3 to void (i32*, i32*, ...)*), { i32*, double*, float** }* %[[LOADGEP_STRUCTARG59]]) +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT47:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT47]]: +// CHECK-NEXT: br label %[[OMP_PAR_EXIT39_SPLIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_EXIT39_SPLIT]]: +// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[LOADGEP_A_ADDR]], align 4 +// CHECK-NEXT: %[[CONV48:.+]] = sitofp i32 %[[TMP8]] to double +// CHECK-NEXT: %[[TMP9:.+]] = load double, double* %[[LOADGEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[ADD49:.+]] = fadd double %[[CONV48]], %[[TMP9]] +// CHECK-NEXT: %[[CONV50:.+]] = fptrunc double %[[ADD49]] to float +// CHECK-NEXT: %[[TMP10:.+]] = load float*, float** %[[LOADGEP_R_ADDR]], align 8 +// CHECK-NEXT: store float %[[CONV50]], float* %[[TMP10]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_PRE_FINALIZE7:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_PRE_FINALIZE7]]: +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT51_EXITSTUB:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT51_EXITSTUB]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@_Z17nested_parallel_2Pfid..omp_par.3( +// CHECK-NEXT: [[OMP_PAR_ENTRY36:.*]]: +// CHECK-NEXT: %[[GEP_A_ADDR:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[TMP0:.+]], i32 0, i32 0 +// CHECK-NEXT: %[[LOADGEP_A_ADDR:.+]] = load i32*, i32** %[[GEP_A_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[TMP0]], i32 0, i32 1 +// CHECK-NEXT: %[[LOADGEP_B_ADDR:.+]] = load double*, double** %[[GEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[TMP0]], i32 0, i32 2 +// CHECK-NEXT: %[[LOADGEP_R_ADDR:.+]] = load float**, float*** %[[GEP_R_ADDR]], align 8 +// CHECK-NEXT: %[[TID_ADDR_LOCAL40:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[TMP1:.+]] = load i32, i32* %[[TID_ADDR34:.+]], align 4 +// CHECK-NEXT: store i32 %[[TMP1]], i32* %[[TID_ADDR_LOCAL40]], align 4 +// CHECK-NEXT: %[[TID41:.+]] = load i32, i32* %[[TID_ADDR_LOCAL40]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_REGION37:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_REGION37]]: +// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[LOADGEP_A_ADDR]], align 4 +// CHECK-NEXT: %[[CONV44:.+]] = sitofp i32 %[[TMP2]] to double +// CHECK-NEXT: %[[TMP3:.+]] = load double, double* %[[LOADGEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[ADD45:.+]] = fadd double %[[CONV44]], %[[TMP3]] +// CHECK-NEXT: %[[CONV46:.+]] = fptrunc double %[[ADD45]] to float +// CHECK-NEXT: %[[TMP4:.+]] = load float*, float** %[[LOADGEP_R_ADDR]], align 8 +// CHECK-NEXT: store float %[[CONV46]], float* %[[TMP4]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_PRE_FINALIZE38:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_PRE_FINALIZE38]]: +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT47_EXITSTUB:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT47_EXITSTUB]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@_Z17nested_parallel_2Pfid..omp_par( +// CHECK-NEXT: [[OMP_PAR_ENTRY19:.*]]: +// CHECK-NEXT: %[[GEP_A_ADDR:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[TMP0:.+]], i32 0, i32 0 +// CHECK-NEXT: %[[LOADGEP_A_ADDR:.+]] = load i32*, i32** %[[GEP_A_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_B_ADDR:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[TMP0]], i32 0, i32 1 +// CHECK-NEXT: %[[LOADGEP_B_ADDR:.+]] = load double*, double** %[[GEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[GEP_R_ADDR:.+]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* %[[TMP0]], i32 0, i32 2 +// CHECK-NEXT: %[[LOADGEP_R_ADDR:.+]] = load float**, float*** %[[GEP_R_ADDR]], align 8 +// CHECK-NEXT: %[[TID_ADDR_LOCAL23:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[TMP1:.+]] = load i32, i32* %[[TID_ADDR17:.+]], align 4 +// CHECK-NEXT: store i32 %[[TMP1]], i32* %[[TID_ADDR_LOCAL23]], align 4 +// CHECK-NEXT: %[[TID24:.+]] = load i32, i32* %[[TID_ADDR_LOCAL23]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_REGION20:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_REGION20]]: +// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[LOADGEP_A_ADDR]], align 4 +// CHECK-NEXT: %[[CONV27:.+]] = sitofp i32 %[[TMP2]] to double +// CHECK-NEXT: %[[TMP3:.+]] = load double, double* %[[LOADGEP_B_ADDR]], align 8 +// CHECK-NEXT: %[[ADD28:.+]] = fadd double %[[CONV27]], %[[TMP3]] +// CHECK-NEXT: %[[CONV29:.+]] = fptrunc double %[[ADD28]] to float +// CHECK-NEXT: %[[TMP4:.+]] = load float*, float** %[[LOADGEP_R_ADDR]], align 8 +// CHECK-NEXT: store float %[[CONV29]], float* %[[TMP4]], align 4 +// CHECK-NEXT: br label %[[OMP_PAR_PRE_FINALIZE21:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_PRE_FINALIZE21]]: +// CHECK-NEXT: br label %[[OMP_PAR_OUTLINED_EXIT_EXITSTUB:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 50} +// CHECK: ![[META2:[0-9]+]] = +// CHECK: ![[META3:[0-9]+]] = !{![[META4:[0-9]+]]} +// CHECK: ![[META4]] = !{i64 2, i64 -1, i64 -1, i1 true} diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -78,12 +78,12 @@ // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG78:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG79:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0 @@ -94,12 +94,12 @@ // CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 3 // CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8 -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]]), !dbg [[DBG79:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]]), !dbg [[DBG80:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] // CHECK-DEBUG: omp.par.outlined.exit16: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG81:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG82:![0-9]+]] // void parallel_for_1(float *r, int a, double b) { #pragma omp parallel @@ -169,7 +169,7 @@ // CHECK-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4 // CHECK-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4 // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0) // CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]] @@ -221,12 +221,12 @@ // CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG140:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_STRUCTARG214:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 0 @@ -241,60 +241,60 @@ // CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_STRUCTARG209220:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 5 // CHECK-DEBUG-NEXT: store { i32*, double*, float** }* [[STRUCTARG209]], { i32*, double*, float** }** [[GEP_STRUCTARG209220]], align 8 -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]]), !dbg [[DBG140:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]]), !dbg [[DBG141:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] // CHECK-DEBUG: omp.par.outlined.exit184: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG149:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG149:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG150:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG149]] // CHECK-DEBUG: omp_loop.preheader190: -// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG149]] // CHECK-DEBUG: omp_loop.header191: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG149]] // CHECK-DEBUG: omp_loop.cond192: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG149]] // CHECK-DEBUG: omp_loop.body193: -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG150:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG151:![0-9]+]] -// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG151:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG152:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG152]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG153:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG152]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG154:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG155:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG149]] // CHECK-DEBUG: omp_loop.inc194: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG149]] // CHECK-DEBUG: omp_loop.exit195: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG149]] // CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG151]] // CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG149]] // CHECK-DEBUG: omp_loop.after196: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG156:![0-9]+]] // void parallel_for_2(float *r, int a, double b) { #pragma omp parallel diff --git a/clang/test/OpenMP/irbuilder_simd.cpp b/clang/test/OpenMP/irbuilder_simd.cpp --- a/clang/test/OpenMP/irbuilder_simd.cpp +++ b/clang/test/OpenMP/irbuilder_simd.cpp @@ -15,57 +15,260 @@ #pragma omp simd for (int i = 3; i < 32; i += 5) { // llvm.access.group test - // CHECK: %[[A_ADDR:.+]] = alloca float*, align 8 - // CHECK: %[[B_ADDR:.+]] = alloca float*, align 8 - // CHECK: %[[S:.+]] = alloca %struct.S, align 4 - // CHECK: %[[P:.+]] = alloca %struct.S*, align 8 - // CHECK: %[[I:.+]] = alloca i32, align 4 - // CHECK: %[[TMP3:.+]] = load float*, float** %[[B_ADDR:.+]], align 8, !llvm.access.group ![[META3:[0-9]+]] - // CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] - // CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4:.+]] to i64 - // CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3:.+]], i64 %[[IDXPROM:.+]] - // CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] - // CHECK-NEXT: %[[A2:.+]] = getelementptr inbounds %struct.S, %struct.S* %[[S:.+]], i32 0, i32 0 - // CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[A2:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] - // CHECK-NEXT: %[[CONV:.+]] = sitofp i32 %[[TMP6:.+]] to float - // CHECK-NEXT: %[[ADD:.+]] = fadd float %[[TMP5:.+]], %[[CONV:.+]] - // CHECK-NEXT: %[[TMP7:.+]] = load %struct.S*, %struct.S** %[[P:.+]], align 8, !llvm.access.group ![[META3:[0-9]+]] - // CHECK-NEXT: %[[A3:.+]] = getelementptr inbounds %struct.S, %struct.S* %[[TMP7:.+]], i32 0, i32 0 - // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[A3:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] - // CHECK-NEXT: %[[CONV4:.+]] = sitofp i32 %[[TMP8:.+]] to float - // CHECK-NEXT: %[[ADD5:.+]] = fadd float %[[ADD:.+]], %[[CONV4:.+]] - // CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[A_ADDR:.+]], align 8, !llvm.access.group ![[META3:[0-9]+]] - // CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] - // CHECK-NEXT: %[[IDXPROM6:.+]] = sext i32 %[[TMP10:.+]] to i64 - // CHECK-NEXT: %[[ARRAYIDX7:.+]] = getelementptr inbounds float, float* %[[TMP9:.+]], i64 %[[IDXPROM6:.+]] - // CHECK-NEXT: store float %[[ADD5:.+]], float* %[[ARRAYIDX7:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] // llvm.loop test - // CHECK: %[[OMP_LOOPDOTNEXT:.+]] = add nuw i32 %[[OMP_LOOPDOTIV:.+]], 1 - // CHECK-NEXT: br label %omp_loop.header, !llvm.loop ![[META4:[0-9]+]] a[i] = b[i] + s.a + p->a; } #pragma omp simd for (int j = 3; j < 32; j += 5) { // test if unique access groups were used for a second loop - // CHECK: %[[A22:.+]] = getelementptr inbounds %struct.P, %struct.P* %[[PP:.+]], i32 0, i32 0 - // CHECK-NEXT: %[[TMP14:.+]] = load i32, i32* %[[A22:.+]], align 4, !llvm.access.group ![[META7:[0-9]+]] - // CHECK-NEXT: %[[TMP15:.+]] = load i32*, i32** %[[C_ADDR:.+]], align 8, !llvm.access.group ![[META7:[0-9]+]] - // CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[J:.+]], align 4, !llvm.access.group ![[META7:[0-9]+]] - // CHECK-NEXT: %[[IDXPROM23:.+]] = sext i32 %[[TMP16:.+]] to i64 - // CHECK-NEXT: %[[ARRAYIDX24:.+]] = getelementptr inbounds i32, i32* %[[TMP15:.+]], i64 %[[IDXPROM23:.+]] - // CHECK-NEXT: store i32 %[[TMP14:.+]], i32* %[[ARRAYIDX24:.+]], align 4, !llvm.access.group ![[META7:[0-9]+]] // check llvm.loop metadata - // CHECK: %[[OMP_LOOPDOTNEXT:.+]] = add nuw i32 %[[OMP_LOOPDOTIV:.+]], 1 - // CHECK-NEXT: br label %[[OMP_LLOP_BODY:.*]], !llvm.loop ![[META8:[0-9]+]] c[j] = pp.a; } } -// CHECK: ![[META3:[0-9]+]] = distinct !{} -// CHECK-NEXT: ![[META4]] = distinct !{![[META4]], ![[META5:[0-9]+]], ![[META6:[0-9]+]]} -// CHECK-NEXT: ![[META5]] = !{!"llvm.loop.parallel_accesses", ![[META3]]} -// CHECK-NEXT: ![[META6]] = !{!"llvm.loop.vectorize.enable", i1 true} -// CHECK-NEXT: ![[META7:[0-9]+]] = distinct !{} -// CHECK-NEXT: ![[META8]] = distinct !{![[META8]], ![[META9:[0-9]+]], ![[META6]]} -// CHECK-NEXT: ![[META9]] = !{!"llvm.loop.parallel_accesses", ![[META7]]} \ No newline at end of file +// CHECK-LABEL: define {{.*}}@_Z6simplePfS_Pi( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 +// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 +// CHECK-NEXT: %[[C_ADDR:.+]] = alloca i32*, align 8 +// CHECK-NEXT: %[[S:.+]] = alloca %struct.S, align 4 +// CHECK-NEXT: %[[P:.+]] = alloca %struct.S*, align 8 +// CHECK-NEXT: %[[PP:.+]] = alloca %struct.P, align 4 +// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 +// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 +// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[J:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[AGG_CAPTURED8:.+]] = alloca %struct.anon.1, align 8 +// CHECK-NEXT: %[[AGG_CAPTURED9:.+]] = alloca %struct.anon.2, align 4 +// CHECK-NEXT: %[[DOTCOUNT_ADDR10:.+]] = alloca i32, align 4 +// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 +// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 +// CHECK-NEXT: store i32* %[[C:.+]], i32** %[[C_ADDR]], align 8 +// CHECK-NEXT: store i32 3, i32* %[[I]], align 4 +// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 +// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) +// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_HEADER]]: +// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_COND]]: +// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]] +// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_BODY]]: +// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]]), !llvm.access.group ![[META3:[0-9]+]] +// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8, !llvm.access.group ![[META3]] +// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4, !llvm.access.group ![[META3]] +// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64 +// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]] +// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4, !llvm.access.group ![[META3]] +// CHECK-NEXT: %[[A2:.+]] = getelementptr inbounds %struct.S, %struct.S* %[[S]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[A2]], align 4, !llvm.access.group ![[META3]] +// CHECK-NEXT: %[[CONV:.+]] = sitofp i32 %[[TMP6]] to float +// CHECK-NEXT: %[[ADD:.+]] = fadd float %[[TMP5]], %[[CONV]] +// CHECK-NEXT: %[[TMP7:.+]] = load %struct.S*, %struct.S** %[[P]], align 8, !llvm.access.group ![[META3]] +// CHECK-NEXT: %[[A3:.+]] = getelementptr inbounds %struct.S, %struct.S* %[[TMP7]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[A3]], align 4, !llvm.access.group ![[META3]] +// CHECK-NEXT: %[[CONV4:.+]] = sitofp i32 %[[TMP8]] to float +// CHECK-NEXT: %[[ADD5:.+]] = fadd float %[[ADD]], %[[CONV4]] +// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[A_ADDR]], align 8, !llvm.access.group ![[META3]] +// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4, !llvm.access.group ![[META3]] +// CHECK-NEXT: %[[IDXPROM6:.+]] = sext i32 %[[TMP10]] to i64 +// CHECK-NEXT: %[[ARRAYIDX7:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM6]] +// CHECK-NEXT: store float %[[ADD5]], float* %[[ARRAYIDX7]], align 4, !llvm.access.group ![[META3]] +// CHECK-NEXT: br label %[[OMP_LOOP_INC]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_INC]]: +// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP4:[0-9]+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_EXIT]]: +// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_AFTER]]: +// CHECK-NEXT: store i32 3, i32* %[[J]], align 4 +// CHECK-NEXT: %[[TMP11:.+]] = getelementptr inbounds %struct.anon.1, %struct.anon.1* %[[AGG_CAPTURED8]], i32 0, i32 0 +// CHECK-NEXT: store i32* %[[J]], i32** %[[TMP11]], align 8 +// CHECK-NEXT: %[[TMP12:.+]] = getelementptr inbounds %struct.anon.2, %struct.anon.2* %[[AGG_CAPTURED9]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[J]], align 4 +// CHECK-NEXT: store i32 %[[TMP13]], i32* %[[TMP12]], align 4 +// CHECK-NEXT: call void @__captured_stmt.2(i32* %[[DOTCOUNT_ADDR10]], %struct.anon.1* %[[AGG_CAPTURED8]]) +// CHECK-NEXT: %[[DOTCOUNT11:.+]] = load i32, i32* %[[DOTCOUNT_ADDR10]], align 4 +// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER12:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_PREHEADER12]]: +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER13:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_HEADER13]]: +// CHECK-NEXT: %[[OMP_LOOP_IV19:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER12]] ], [ %[[OMP_LOOP_NEXT21:.+]], %[[OMP_LOOP_INC16:.+]] ] +// CHECK-NEXT: br label %[[OMP_LOOP_COND14:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_COND14]]: +// CHECK-NEXT: %[[OMP_LOOP_CMP20:.+]] = icmp ult i32 %[[OMP_LOOP_IV19]], %[[DOTCOUNT11]] +// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP20]], label %[[OMP_LOOP_BODY15:.+]], label %[[OMP_LOOP_EXIT17:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_BODY15]]: +// CHECK-NEXT: call void @__captured_stmt.3(i32* %[[J]], i32 %[[OMP_LOOP_IV19]], %struct.anon.2* %[[AGG_CAPTURED9]]), !llvm.access.group ![[META7:[0-9]+]] +// CHECK-NEXT: %[[A22:.+]] = getelementptr inbounds %struct.P, %struct.P* %[[PP]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP14:.+]] = load i32, i32* %[[A22]], align 4, !llvm.access.group ![[META7]] +// CHECK-NEXT: %[[TMP15:.+]] = load i32*, i32** %[[C_ADDR]], align 8, !llvm.access.group ![[META7]] +// CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[J]], align 4, !llvm.access.group ![[META7]] +// CHECK-NEXT: %[[IDXPROM23:.+]] = sext i32 %[[TMP16]] to i64 +// CHECK-NEXT: %[[ARRAYIDX24:.+]] = getelementptr inbounds i32, i32* %[[TMP15]], i64 %[[IDXPROM23]] +// CHECK-NEXT: store i32 %[[TMP14]], i32* %[[ARRAYIDX24]], align 4, !llvm.access.group ![[META7]] +// CHECK-NEXT: br label %[[OMP_LOOP_INC16]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_INC16]]: +// CHECK-NEXT: %[[OMP_LOOP_NEXT21]] = add nuw i32 %[[OMP_LOOP_IV19]], 1 +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER13]], !llvm.loop ![[LOOP8:[0-9]+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_EXIT17]]: +// CHECK-NEXT: br label %[[OMP_LOOP_AFTER18:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_AFTER18]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@__captured_stmt( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 +// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 +// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 +// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 +// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 +// CHECK-NEXT: store i32 32, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 5, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 +// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] +// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_TRUE]]: +// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 +// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] +// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: br label %[[COND_END:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_FALSE]]: +// CHECK-NEXT: br label %[[COND_END]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_END]]: +// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] +// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@__captured_stmt.1( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 +// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 +// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 +// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: %[[MUL:.+]] = mul i32 5, %[[TMP3]] +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] +// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@__captured_stmt.2( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 +// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.1*, align 8 +// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 +// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon.1* %[[__CONTEXT:.+]], %struct.anon.1** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.1*, %struct.anon.1** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.1, %struct.anon.1* %[[TMP0]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 +// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 +// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 +// CHECK-NEXT: store i32 32, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 5, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 +// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] +// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_TRUE]]: +// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 +// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] +// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: br label %[[COND_END:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_FALSE]]: +// CHECK-NEXT: br label %[[COND_END]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_END]]: +// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] +// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK-LABEL: define {{.*}}@__captured_stmt.3( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 +// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.2*, align 8 +// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store %struct.anon.2* %[[__CONTEXT:.+]], %struct.anon.2** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.2*, %struct.anon.2** %[[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.2, %struct.anon.2* %[[TMP0]], i32 0, i32 0 +// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 +// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: %[[MUL:.+]] = mul i32 5, %[[TMP3]] +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] +// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 +// CHECK-NEXT: ret void +// CHECK-NEXT: } + + +// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} +// CHECK: ![[META2:[0-9]+]] = +// CHECK: ![[META3]] = distinct !{} +// CHECK: ![[LOOP4]] = distinct !{![[LOOP4]], ![[LOOPPROP5:[0-9]+]], ![[LOOPPROP6:[0-9]+]]} +// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.parallel_accesses", ![[META3]]} +// CHECK: ![[LOOPPROP6]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK: ![[META7]] = distinct !{} +// CHECK: ![[LOOP8]] = distinct !{![[LOOP8]], ![[LOOPPROP9:[0-9]+]], ![[LOOPPROP6]]} +// CHECK: ![[LOOPPROP9]] = !{!"llvm.loop.parallel_accesses", ![[META7]]} diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c @@ -51,7 +51,7 @@ // CHECK-NEXT: store i32 %[[TMP8]], i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 0) // CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 // CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: %[[TMP11:.+]] = sub i32 %[[TMP10]], %[[TMP9]] diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c @@ -57,7 +57,7 @@ // CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 0) // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 // CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c @@ -9,6 +9,202 @@ double sind(double); +// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_for( +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: %[[M_ADDR:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 +// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 +// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 +// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 +// CHECK-NEXT: %[[E_ADDR:.+]] = alloca float*, align 8 +// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4 +// CHECK-NEXT: %[[DOTOMP_IV:.+]] = alloca i64, align 8 +// CHECK-NEXT: %[[TMP:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[TMP1:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[J:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i64, align 8 +// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTUNROLLED_IV_J:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTOMP_LB:.+]] = alloca i64, align 8 +// CHECK-NEXT: %[[DOTOMP_UB:.+]] = alloca i64, align 8 +// CHECK-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i64, align 8 +// CHECK-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[I6:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTUNROLLED_IV_J7:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[DOTUNROLL_INNER_IV_J:.+]] = alloca i32, align 4 +// CHECK-NEXT: store i32 %[[M:.+]], i32* %[[M_ADDR]], align 4 +// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 +// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 +// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 +// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 +// CHECK-NEXT: store float* %[[E:.+]], float** %[[E_ADDR]], align 8 +// CHECK-NEXT: store float %[[OFFSET:.+]], float* %[[OFFSET_ADDR]], align 4 +// CHECK-NEXT: %[[TMP0:.+]] = load i32, i32* %[[M_ADDR]], align 4 +// CHECK-NEXT: store i32 %[[TMP0]], i32* %[[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: store i32 0, i32* %[[J]], align 4 +// CHECK-NEXT: %[[TMP1_1:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP1_1]], 0 +// CHECK-NEXT: %[[DIV:.+]] = sdiv i32 %[[SUB]], 1 +// CHECK-NEXT: %[[CONV:.+]] = sext i32 %[[DIV]] to i64 +// CHECK-NEXT: %[[MUL:.+]] = mul nsw i64 %[[CONV]], 4 +// CHECK-NEXT: %[[SUB3:.+]] = sub nsw i64 %[[MUL]], 1 +// CHECK-NEXT: store i64 %[[SUB3]], i64* %[[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 +// CHECK-NEXT: store i32 0, i32* %[[DOTUNROLLED_IV_J]], align 4 +// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 0, %[[TMP2]] +// CHECK-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PRECOND_THEN]]: +// CHECK-NEXT: store i64 0, i64* %[[DOTOMP_LB]], align 8 +// CHECK-NEXT: %[[TMP3:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: store i64 %[[TMP3]], i64* %[[DOTOMP_UB]], align 8 +// CHECK-NEXT: store i64 1, i64* %[[DOTOMP_STRIDE]], align 8 +// CHECK-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3) +// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK-NEXT: %[[TMP4:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 +// CHECK-NEXT: %[[TMP5:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]] +// CHECK-NEXT: br i1 %[[CMP8]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_TRUE]]: +// CHECK-NEXT: %[[TMP6:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: br label %[[COND_END:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_FALSE]]: +// CHECK-NEXT: %[[TMP7:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 +// CHECK-NEXT: br label %[[COND_END]] +// CHECK-EMPTY: +// CHECK-NEXT: [[COND_END]]: +// CHECK-NEXT: %[[COND:.+]] = phi i64 [ %[[TMP6]], %[[COND_TRUE]] ], [ %[[TMP7]], %[[COND_FALSE]] ] +// CHECK-NEXT: store i64 %[[COND]], i64* %[[DOTOMP_UB]], align 8 +// CHECK-NEXT: %[[TMP8:.+]] = load i64, i64* %[[DOTOMP_LB]], align 8 +// CHECK-NEXT: store i64 %[[TMP8]], i64* %[[DOTOMP_IV]], align 8 +// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_INNER_FOR_COND]]: +// CHECK-NEXT: %[[TMP9:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 +// CHECK-NEXT: %[[TMP10:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 +// CHECK-NEXT: %[[CMP10:.+]] = icmp sle i64 %[[TMP9]], %[[TMP10]] +// CHECK-NEXT: br i1 %[[CMP10]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_INNER_FOR_BODY]]: +// CHECK-NEXT: %[[TMP11:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 +// CHECK-NEXT: %[[DIV12:.+]] = sdiv i64 %[[TMP11]], 4 +// CHECK-NEXT: %[[MUL13:.+]] = mul nsw i64 %[[DIV12]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add nsw i64 0, %[[MUL13]] +// CHECK-NEXT: %[[CONV14:.+]] = trunc i64 %[[ADD]] to i32 +// CHECK-NEXT: store i32 %[[CONV14]], i32* %[[I6]], align 4 +// CHECK-NEXT: %[[TMP12:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 +// CHECK-NEXT: %[[TMP13:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 +// CHECK-NEXT: %[[DIV15:.+]] = sdiv i64 %[[TMP13]], 4 +// CHECK-NEXT: %[[MUL16:.+]] = mul nsw i64 %[[DIV15]], 4 +// CHECK-NEXT: %[[SUB17:.+]] = sub nsw i64 %[[TMP12]], %[[MUL16]] +// CHECK-NEXT: %[[MUL18:.+]] = mul nsw i64 %[[SUB17]], 2 +// CHECK-NEXT: %[[ADD19:.+]] = add nsw i64 0, %[[MUL18]] +// CHECK-NEXT: %[[CONV20:.+]] = trunc i64 %[[ADD19]] to i32 +// CHECK-NEXT: store i32 %[[CONV20]], i32* %[[DOTUNROLLED_IV_J7]], align 4 +// CHECK-NEXT: %[[TMP14:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4 +// CHECK-NEXT: store i32 %[[TMP14]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: br label %[[FOR_COND:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[FOR_COND]]: +// CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4 +// CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 2 +// CHECK-NEXT: %[[CMP22:.+]] = icmp sle i32 %[[TMP15]], %[[ADD21]] +// CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[LAND_RHS]]: +// CHECK-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: %[[CMP24:.+]] = icmp sle i32 %[[TMP17]], 8 +// CHECK-NEXT: br label %[[LAND_END]] +// CHECK-EMPTY: +// CHECK-NEXT: [[LAND_END]]: +// CHECK-NEXT: %[[TMP18:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP24]], %[[LAND_RHS]] ] +// CHECK-NEXT: br i1 %[[TMP18]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[FOR_BODY]]: +// CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: %[[MUL26:.+]] = mul nsw i32 %[[TMP19]], 1 +// CHECK-NEXT: %[[ADD27:.+]] = add nsw i32 0, %[[MUL26]] +// CHECK-NEXT: store i32 %[[ADD27]], i32* %[[J]], align 4 +// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[B_ADDR]], align 8 +// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I6]], align 4 +// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP21]] to i64 +// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM]] +// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX]], align 4 +// CHECK-NEXT: %[[CONV28:.+]] = fpext float %[[TMP22]] to double +// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double noundef %[[CONV28]]) +// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[C_ADDR]], align 8 +// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I6]], align 4 +// CHECK-NEXT: %[[IDXPROM29:.+]] = sext i32 %[[TMP24]] to i64 +// CHECK-NEXT: %[[ARRAYIDX30:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM29]] +// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX30]], align 4 +// CHECK-NEXT: %[[CONV31:.+]] = fpext float %[[TMP25]] to double +// CHECK-NEXT: %[[MUL32:.+]] = fmul double %[[CALL]], %[[CONV31]] +// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[D_ADDR]], align 8 +// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I6]], align 4 +// CHECK-NEXT: %[[IDXPROM33:.+]] = sext i32 %[[TMP27]] to i64 +// CHECK-NEXT: %[[ARRAYIDX34:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM33]] +// CHECK-NEXT: %[[TMP28:.+]] = load float, float* %[[ARRAYIDX34]], align 4 +// CHECK-NEXT: %[[CONV35:.+]] = fpext float %[[TMP28]] to double +// CHECK-NEXT: %[[MUL36:.+]] = fmul double %[[MUL32]], %[[CONV35]] +// CHECK-NEXT: %[[TMP29:.+]] = load float*, float** %[[E_ADDR]], align 8 +// CHECK-NEXT: %[[TMP30:.+]] = load i32, i32* %[[I6]], align 4 +// CHECK-NEXT: %[[IDXPROM37:.+]] = sext i32 %[[TMP30]] to i64 +// CHECK-NEXT: %[[ARRAYIDX38:.+]] = getelementptr inbounds float, float* %[[TMP29]], i64 %[[IDXPROM37]] +// CHECK-NEXT: %[[TMP31:.+]] = load float, float* %[[ARRAYIDX38]], align 4 +// CHECK-NEXT: %[[CONV39:.+]] = fpext float %[[TMP31]] to double +// CHECK-NEXT: %[[MUL40:.+]] = fmul double %[[MUL36]], %[[CONV39]] +// CHECK-NEXT: %[[TMP32:.+]] = load float, float* %[[OFFSET_ADDR]], align 4 +// CHECK-NEXT: %[[CONV41:.+]] = fpext float %[[TMP32]] to double +// CHECK-NEXT: %[[ADD42:.+]] = fadd double %[[MUL40]], %[[CONV41]] +// CHECK-NEXT: %[[TMP33:.+]] = load float*, float** %[[A_ADDR]], align 8 +// CHECK-NEXT: %[[TMP34:.+]] = load i32, i32* %[[I6]], align 4 +// CHECK-NEXT: %[[IDXPROM43:.+]] = sext i32 %[[TMP34]] to i64 +// CHECK-NEXT: %[[ARRAYIDX44:.+]] = getelementptr inbounds float, float* %[[TMP33]], i64 %[[IDXPROM43]] +// CHECK-NEXT: %[[TMP35:.+]] = load float, float* %[[ARRAYIDX44]], align 4 +// CHECK-NEXT: %[[CONV45:.+]] = fpext float %[[TMP35]] to double +// CHECK-NEXT: %[[ADD46:.+]] = fadd double %[[CONV45]], %[[ADD42]] +// CHECK-NEXT: %[[CONV47:.+]] = fptrunc double %[[ADD46]] to float +// CHECK-NEXT: store float %[[CONV47]], float* %[[ARRAYIDX44]], align 4 +// CHECK-NEXT: br label %[[FOR_INC:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[FOR_INC]]: +// CHECK-NEXT: %[[TMP36:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP36]], 1 +// CHECK-NEXT: store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP3:[0-9]+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[FOR_END]]: +// CHECK-NEXT: br label %[[OMP_BODY_CONTINUE:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_BODY_CONTINUE]]: +// CHECK-NEXT: br label %[[OMP_INNER_FOR_INC:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_INNER_FOR_INC]]: +// CHECK-NEXT: %[[TMP37:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8 +// CHECK-NEXT: %[[ADD48:.+]] = add nsw i64 %[[TMP37]], 1 +// CHECK-NEXT: store i64 %[[ADD48]], i64* %[[DOTOMP_IV]], align 8 +// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_INNER_FOR_END]]: +// CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_EXIT]]: +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM49:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @5) +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK-NEXT: br label %[[OMP_PRECOND_END]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_PRECOND_END]]: +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM50:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @7) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @6, i32 %[[OMP_GLOBAL_THREAD_NUM50]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } + void unroll_partial_heuristic_for(int m, float *a, float *b, float *c, float *d, float *e, float offset) { #pragma omp for collapse(2) @@ -21,183 +217,11 @@ } #endif // HEADER - -// CHECK-LABEL: define {{[^@]+}}@unroll_partial_heuristic_for -// CHECK-SAME: (i32 noundef [[M:%.*]], float* noundef [[A:%.*]], float* noundef [[B:%.*]], float* noundef [[C:%.*]], float* noundef [[D:%.*]], float* noundef [[E:%.*]], float noundef [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 -// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 -// CHECK-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 -// CHECK-NEXT: [[E_ADDR:%.*]] = alloca float*, align 8 -// CHECK-NEXT: [[OFFSET_ADDR:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTUNROLLED_IV_J:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTUNROLLED_IV_J7:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTUNROLL_INNER_IV_J:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 -// CHECK-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 -// CHECK-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 -// CHECK-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 -// CHECK-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK-NEXT: store float* [[E]], float** [[E_ADDR]], align 8 -// CHECK-NEXT: store float [[OFFSET]], float* [[OFFSET_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 -// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], 4 -// CHECK-NEXT: [[SUB3:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK-NEXT: store i64 [[SUB3]], i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK-NEXT: store i32 0, i32* [[DOTUNROLLED_IV_J]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] -// CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK: omp.precond.then: -// CHECK-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 -// CHECK-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i64 [[TMP4]], [[TMP5]] -// CHECK-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK: cond.true: -// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: br label [[COND_END:%.*]] -// CHECK: cond.false: -// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK-NEXT: br label [[COND_END]] -// CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP6]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] -// CHECK-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK-NEXT: [[CMP10:%.*]] = icmp sle i64 [[TMP9]], [[TMP10]] -// CHECK-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK-NEXT: [[DIV12:%.*]] = sdiv i64 [[TMP11]], 4 -// CHECK-NEXT: [[MUL13:%.*]] = mul nsw i64 [[DIV12]], 1 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL13]] -// CHECK-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK-NEXT: store i32 [[CONV14]], i32* [[I6]], align 4 -// CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP13]], 4 -// CHECK-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 4 -// CHECK-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP12]], [[MUL16]] -// CHECK-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 2 -// CHECK-NEXT: [[ADD19:%.*]] = add nsw i64 0, [[MUL18]] -// CHECK-NEXT: [[CONV20:%.*]] = trunc i64 [[ADD19]] to i32 -// CHECK-NEXT: store i32 [[CONV20]], i32* [[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: store i32 [[TMP14]], i32* [[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: br label [[FOR_COND:%.*]] -// CHECK: for.cond: -// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP16]], 2 -// CHECK-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP15]], [[ADD21]] -// CHECK-NEXT: br i1 [[CMP22]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] -// CHECK: land.rhs: -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: [[CMP24:%.*]] = icmp sle i32 [[TMP17]], 8 -// CHECK-NEXT: br label [[LAND_END]] -// CHECK: land.end: -// CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ false, [[FOR_COND]] ], [ [[CMP24]], [[LAND_RHS]] ] -// CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] -// CHECK: for.body: -// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: [[MUL26:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 0, [[MUL26]] -// CHECK-NEXT: store i32 [[ADD27]], i32* [[J]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = load float*, float** [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[CONV28:%.*]] = fpext float [[TMP22]] to double -// CHECK-NEXT: [[CALL:%.*]] = call double @sind(double noundef [[CONV28]]) -// CHECK-NEXT: [[TMP23:%.*]] = load float*, float** [[C_ADDR]], align 8 -// CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK-NEXT: [[IDXPROM29:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM29]] -// CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX30]], align 4 -// CHECK-NEXT: [[CONV31:%.*]] = fpext float [[TMP25]] to double -// CHECK-NEXT: [[MUL32:%.*]] = fmul double [[CALL]], [[CONV31]] -// CHECK-NEXT: [[TMP26:%.*]] = load float*, float** [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK-NEXT: [[IDXPROM33:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM33]] -// CHECK-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX34]], align 4 -// CHECK-NEXT: [[CONV35:%.*]] = fpext float [[TMP28]] to double -// CHECK-NEXT: [[MUL36:%.*]] = fmul double [[MUL32]], [[CONV35]] -// CHECK-NEXT: [[TMP29:%.*]] = load float*, float** [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK-NEXT: [[IDXPROM37:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM37]] -// CHECK-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX38]], align 4 -// CHECK-NEXT: [[CONV39:%.*]] = fpext float [[TMP31]] to double -// CHECK-NEXT: [[MUL40:%.*]] = fmul double [[MUL36]], [[CONV39]] -// CHECK-NEXT: [[TMP32:%.*]] = load float, float* [[OFFSET_ADDR]], align 4 -// CHECK-NEXT: [[CONV41:%.*]] = fpext float [[TMP32]] to double -// CHECK-NEXT: [[ADD42:%.*]] = fadd double [[MUL40]], [[CONV41]] -// CHECK-NEXT: [[TMP33:%.*]] = load float*, float** [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK-NEXT: [[IDXPROM43:%.*]] = sext i32 [[TMP34]] to i64 -// CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, float* [[TMP33]], i64 [[IDXPROM43]] -// CHECK-NEXT: [[TMP35:%.*]] = load float, float* [[ARRAYIDX44]], align 4 -// CHECK-NEXT: [[CONV45:%.*]] = fpext float [[TMP35]] to double -// CHECK-NEXT: [[ADD46:%.*]] = fadd double [[CONV45]], [[ADD42]] -// CHECK-NEXT: [[CONV47:%.*]] = fptrunc double [[ADD46]] to float -// CHECK-NEXT: store float [[CONV47]], float* [[ARRAYIDX44]], align 4 -// CHECK-NEXT: br label [[FOR_INC:%.*]] -// CHECK: for.inc: -// CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP36]], 1 -// CHECK-NEXT: store i32 [[INC]], i32* [[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] -// CHECK: for.end: -// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK: omp.body.continue: -// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK-NEXT: [[ADD48:%.*]] = add nsw i64 [[TMP37]], 1 -// CHECK-NEXT: store i64 [[ADD48]], i64* [[DOTOMP_IV]], align 8 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK: omp.inner.for.end: -// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK: omp.loop.exit: -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]]) -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) -// CHECK-NEXT: br label [[OMP_PRECOND_END]] -// CHECK: omp.precond.end: -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7:[0-9]+]]) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) -// CHECK-NEXT: ret void // + +// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} +// CHECK: ![[META2:[0-9]+]] = +// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} +// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.mustprogress"} +// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2} diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c @@ -59,7 +59,7 @@ // CHECK-NEXT: store i32 %[[TMP8]], i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 0) // CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 // CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: %[[TMP11:.+]] = sub i32 %[[TMP10]], %[[TMP9]] diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c @@ -47,7 +47,7 @@ // CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 0) // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 // CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 // CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -354,14 +354,6 @@ /// the current thread, updates the relevant instructions in the canonical /// loop and calls to an OpenMP runtime finalization function after the loop. /// - /// TODO: Workshare loops with static scheduling may contain up to two loops - /// that fulfill the requirements of an OpenMP canonical loop. One for - /// iterating over all iterations of a chunk and another one for iterating - /// over all chunks that are executed on the same thread. Returning - /// CanonicalLoopInfo objects representing them may eventually be useful for - /// the apply clause planned in OpenMP 6.0, but currently whether these are - /// canonical loops is irrelevant. - /// /// \param DL Debug location for instructions added for the /// workshare-loop construct itself. /// \param CLI A descriptor of the canonical loop to workshare. @@ -369,14 +361,30 @@ /// preheader of the loop. /// \param NeedsBarrier Indicates whether a barrier must be inserted after /// the loop. - /// \param Chunk The size of loop chunk considered as a unit when - /// scheduling. If \p nullptr, defaults to 1. /// /// \returns Point where to insert code after the workshare construct. InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - bool NeedsBarrier, - Value *Chunk = nullptr); + bool NeedsBarrier); + + /// Modifies the canonical loop a statically-scheduled workshare loop with a + /// user-specified chunk size. + /// + /// \param DL Debug location for instructions added for the + /// workshare-loop construct itself. + /// \param CLI A descriptor of the canonical loop to workshare. + /// \param AllocaIP An insertion point for Alloca instructions usable in + /// the preheader of the loop. + /// \param NeedsBarrier Indicates whether a barrier must be inserted after the + /// loop. + /// \param ChunkSize The user-specified chunk size. + /// + /// \returns Point where to insert code after the workshare construct. + InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL, + CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + bool NeedsBarrier, + Value *ChunkSize); /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. /// @@ -413,6 +421,10 @@ /// the current thread, updates the relevant instructions in the canonical /// loop and calls to an OpenMP runtime finalization function after the loop. /// + /// The concrete transformation is done by applyStaticWorkshareLoop, + /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending + /// on the value of \p SchedKind and \p ChunkSize. + /// /// \param DL Debug location for instructions added for the /// workshare-loop construct itself. /// \param CLI A descriptor of the canonical loop to workshare. @@ -420,10 +432,15 @@ /// preheader of the loop. /// \param NeedsBarrier Indicates whether a barrier must be insterted after /// the loop. + /// \param SchedKind Scheduling algorithm to use. + /// \param ChunkSize The chunk size for the inner loop. /// /// \returns Point where to insert code after the workshare construct. - InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, bool NeedsBarrier); + InsertPointTy applyWorkshareLoop( + DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, + bool NeedsBarrier, + llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default, + Value *ChunkSize = nullptr); /// Tile a loop nest. /// @@ -1483,6 +1500,27 @@ /// Re-evaluated whether this makes sense. void collectControlBlocks(SmallVectorImpl &BBs); + /// Sets the number of loop iterations to the given value. This value must be + /// valid in the condition block (i.e., defined in the preheader) and is + /// interpreted as an unsigned integer. + void setTripCount(Value *TripCount); + + /// Replace all uses of the canonical induction variable in the loop body with + /// a new one. + /// + /// The intended use case is to update the induction variable for an updated + /// iteration space such that it can stay normalized in the 0...tripcount-1 + /// range. + /// + /// The \p Updater is called with the (presumable updated) current normalized + /// induction variable and is expected to return the value that uses of the + /// pre-updated induction values should use instead, typically dependent on + /// the new induction variable. This is a lambda (instead of e.g. just passing + /// the new value) to be able to distinguish the uses of the pre-updated + /// induction variable and uses of the induction varible to compute the + /// updated induction variable value. + void mapIndVar(llvm::function_ref Updater); + public: /// Returns whether this object currently represents the IR of a loop. If /// returning false, it may have been consumed by a loop transformation or not diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -74,6 +74,106 @@ } #endif +/// Make \p Source branch to \p Target. +/// +/// Handles two situations: +/// * \p Source already has an unconditional branch. +/// * \p Source is a degenerate block (no terminator because the BB is +/// the current head of the IR construction). +static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) { + if (Instruction *Term = Source->getTerminator()) { + auto *Br = cast(Term); + assert(!Br->isConditional() && + "BB's terminator must be an unconditional branch (or degenerate)"); + BasicBlock *Succ = Br->getSuccessor(0); + Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true); + Br->setSuccessor(0, Target); + return; + } + + auto *NewBr = BranchInst::Create(Target, Source); + NewBr->setDebugLoc(DL); +} + +/// Move the instruction after an InsertPoint to the beginning of another +/// BasicBlock. +/// +/// The instructions after \p IP are moved to the beginning of \p New which must +/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to +/// \p New will be added such that there is no semantic change. Otherwise, the +/// \p IP insert block remains degenerate and it is up to the caller to insert a +/// terminator. +static void spliceBB(OpenMPIRBuilder::InsertPointTy IP, BasicBlock *New, + bool CreateBranch) { + assert(New->getFirstInsertionPt() == New->begin() && + "Target BB must not have PHI nodes"); + + // Move instructions to new block. + BasicBlock *Old = IP.getBlock(); + New->getInstList().splice(New->begin(), Old->getInstList(), IP.getPoint(), + Old->end()); + + if (CreateBranch) + BranchInst::Create(New, Old); +} + +/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new +/// insert location will stick to after the instruction before the insertion +/// point (instead of moving with the instruction the InsertPoint stores +/// internally). +static void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch) { + DebugLoc DebugLoc = Builder.getCurrentDebugLocation(); + BasicBlock *Old = Builder.GetInsertBlock(); + + spliceBB(Builder.saveIP(), New, CreateBranch); + if (CreateBranch) + Builder.SetInsertPoint(Old->getTerminator()); + else + Builder.SetInsertPoint(Old); + + // SetInsertPoint also updates the Builder's debug location, but we want to + // keep the one the Builder was configured to use. + Builder.SetCurrentDebugLocation(DebugLoc); +} + +/// Split a BasicBlock at an InsertPoint, even if the block is degenerate +/// (missing the terminator). +/// +/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed +/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch +/// is true, a branch to the new successor will new created such that +/// semantically there is no change; otherwise the block of the insertion point +/// remains degenerate and it is the caller's responsibility to insert a +/// terminator. Returns the new successor block. +static BasicBlock *splitBB(OpenMPIRBuilder::InsertPointTy IP, bool CreateBranch, + llvm::Twine Name = {}) { + BasicBlock *Old = IP.getBlock(); + BasicBlock *New = BasicBlock::Create( + Old->getContext(), Name.isTriviallyEmpty() ? Old->getName() : Name, + Old->getParent(), Old->getNextNode()); + spliceBB(IP, New, CreateBranch); + New->replaceSuccessorsPhiUsesWith(Old, New); + return New; +} + +/// Split a BasicBlock at \p Builder's insertion point, even if the block is +/// degenerate (missing the terminator). Its new insert location will stick to +/// after the instruction before the insertion point (instead of moving with the +/// instruction the InsertPoint stores internally). +static BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, + llvm::Twine Name = {}) { + DebugLoc DebugLoc = Builder.getCurrentDebugLocation(); + BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name); + if (CreateBranch) + Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator()); + else + Builder.SetInsertPoint(Builder.GetInsertBlock()); + // SetInsertPoint also updates the Builder's debug location, but we want to + // keep the one the Builder was configured to use. + Builder.SetCurrentDebugLocation(DebugLoc); + return New; +} + void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { LLVMContext &Ctx = Fn.getContext(); @@ -1402,10 +1502,8 @@ // Split the loop at the insertion point: Branch to the preheader and move // every following instruction to after the loop (the After BB). Also, the // new successor is the loop's after block. + spliceBB(Builder, After, /*CreateBranch=*/false); Builder.CreateBr(CL->getPreheader()); - After->getInstList().splice(After->begin(), BB->getInstList(), - Builder.GetInsertPoint(), BB->end()); - After->replaceSuccessorsPhiUsesWith(BB, After); } // Emit the body content. We do it after connecting the loop to the CFG to @@ -1506,20 +1604,10 @@ llvm_unreachable("unknown OpenMP loop iterator bitwidth"); } -// Sets the number of loop iterations to the given value. This value must be -// valid in the condition block (i.e., defined in the preheader) and is -// interpreted as an unsigned integer. -void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) { - Instruction *CmpI = &CLI->getCond()->front(); - assert(isa(CmpI) && "First inst must compare IV with TripCount"); - CmpI->setOperand(1, TripCount); - CLI->assertOK(); -} - OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - bool NeedsBarrier, Value *Chunk) { + bool NeedsBarrier) { assert(CLI->isValid() && "Requires a valid canonical loop"); assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) && "Require dedicated allocate IP"); @@ -1559,10 +1647,6 @@ Builder.CreateStore(UpperBound, PUpperBound); Builder.CreateStore(One, PStride); - // FIXME: schedule(static) is NOT the same as schedule(static,1) - if (!Chunk) - Chunk = One; - Value *ThreadNum = getOrCreateThreadID(SrcLoc); Constant *SchedulingType = @@ -1572,25 +1656,22 @@ // value it produced. Builder.CreateCall(StaticInit, {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, - PUpperBound, PStride, One, Chunk}); + PUpperBound, PStride, One, Zero}); Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound); Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound); Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One); - setCanonicalLoopTripCount(CLI, TripCount); + CLI->setTripCount(TripCount); // Update all uses of the induction variable except the one in the condition // block that compares it with the actual upper bound, and the increment in // the latch block. - // TODO: this can eventually move to CanonicalLoopInfo or to a new - // CanonicalLoopInfoUpdater interface. - Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt()); - Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound); - IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) { - auto *Instr = dyn_cast(U.getUser()); - return !Instr || - (Instr->getParent() != CLI->getCond() && - Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV); + + CLI->mapIndVar([&](Instruction *OldIV) -> Value * { + Builder.SetInsertPoint(CLI->getBody(), + CLI->getBody()->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(DL); + return Builder.CreateAdd(OldIV, LowerBound); }); // In the "exit" block, call the "fini" function. @@ -1610,11 +1691,164 @@ return AfterIP; } +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop( + DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, + bool NeedsBarrier, Value *ChunkSize) { + assert(CLI->isValid() && "Requires a valid canonical loop"); + assert(ChunkSize && "Chunk size is required"); + + Value *IV = CLI->getIndVar(); + Value *OrigTripCount = CLI->getTripCount(); + Type *IVTy = IV->getType(); + Type *I32Type = Type::getInt32Ty(M.getContext()); + Constant *Zero = ConstantInt::get(IVTy, 0); + Constant *One = ConstantInt::get(IVTy, 1); + + // Declare useful OpenMP runtime functions. + FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this); + FunctionCallee StaticFini = + getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); + + // Allocate space for computed loop bounds as expected by the "init" function. + Builder.restoreIP(AllocaIP); + Builder.SetCurrentDebugLocation(DL); + Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); + Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); + Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); + Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); + + // Set up the source location value for the OpenMP runtime. + Builder.restoreIP(CLI->getPreheaderIP()); + Builder.SetCurrentDebugLocation(DL); + + // Call the "init" function and update the trip count of the loop with the + // value it produced. + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize); + Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Value *ThreadNum = getOrCreateThreadID(SrcLoc); + Constant *SchedulingType = ConstantInt::get( + I32Type, static_cast(OMPScheduleType::StaticChunked)); + Builder.CreateStore(Zero, PLowerBound); + Value *OrigUpperBound = Builder.CreateSub(OrigTripCount, One); + Builder.CreateStore(OrigUpperBound, PUpperBound); + Builder.CreateStore(One, PStride); + Builder.CreateCall(StaticInit, + {/*loc=*/SrcLoc, /*global_tid=*/ThreadNum, + /*schedtype=*/SchedulingType, /*plastiter=*/PLastIter, + /*plower=*/PLowerBound, /*pupper=*/PUpperBound, + /*pstride=*/PStride, /*incr=*/One, /*chunk=*/ChunkSize}); + + // Load values written by the "init" function. + Value *FirstChunkStart = + Builder.CreateLoad(IVTy, PLowerBound, "omp_firstchunk.lb"); + Value *FirstChunkStop = + Builder.CreateLoad(IVTy, PUpperBound, "omp_firstchunk.ub"); + Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One); + Value *ChunkRange = + Builder.CreateSub(FirstChunkEnd, FirstChunkStart, "omp_chunk.range"); + Value *NextChunkStride = + Builder.CreateLoad(IVTy, PStride, "omp_dispatch.stride"); + + // Create outer "dispatch" loop for enumerating the chunks. + BasicBlock *DispatchEnter = splitBB(Builder, true); + Value *DispatchCounter; + CanonicalLoopInfo *DispatchCLI = createCanonicalLoop( + {Builder.saveIP(), DL}, + [&](InsertPointTy BodyIP, Value *Counter) { DispatchCounter = Counter; }, + FirstChunkStart, OrigTripCount, NextChunkStride, + /*IsSigned=*/false, /*InclusiveStop=*/false, /*ComputeIP=*/{}, + "dispatch"); + + // Remember the BasicBlocks of the dispatch loop we need, then invalidate to + // not have to preserve the canonical invariant. + BasicBlock *DispatchBody = DispatchCLI->getBody(); + BasicBlock *DispatchLatch = DispatchCLI->getLatch(); + BasicBlock *DispatchExit = DispatchCLI->getExit(); + BasicBlock *DispatchAfter = DispatchCLI->getAfter(); + DispatchCLI->invalidate(); + + // Rewire the original loop to become the chunk loop inside the dispatch loop. + redirectTo(DispatchAfter, CLI->getAfter(), DL); + redirectTo(CLI->getExit(), DispatchLatch, DL); + redirectTo(DispatchBody, DispatchEnter, DL); + + // Prepare the prolog of the chunk loop. + Builder.restoreIP(CLI->getPreheaderIP()); + Builder.SetCurrentDebugLocation(DL); + + // Compute the number of iterations of the chunk loop. + Builder.SetInsertPoint(CLI->getPreheader()->getTerminator()); + Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange); + Value *IsLastChunk = + Builder.CreateICmpUGE(ChunkEnd, OrigTripCount, "omp_chunk.is_last"); + Value *CountUntilOrigTripCount = + Builder.CreateSub(OrigTripCount, DispatchCounter); + Value *ChunkTripCount = Builder.CreateSelect( + IsLastChunk, CountUntilOrigTripCount, ChunkRange, "omp_chunk.tripcount"); + CLI->setTripCount(ChunkTripCount); + + // Update all uses of the induction variable except the one in the condition + // block that compares it with the actual upper bound, and the increment in + // the latch block. + CLI->mapIndVar([&](Instruction *) -> Value * { + Builder.restoreIP(CLI->getBodyIP()); + return Builder.CreateAdd(IV, DispatchCounter); + }); + + // In the "exit" block, call the "fini" function. + Builder.SetInsertPoint(DispatchExit, DispatchExit->getFirstInsertionPt()); + Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum}); + + // Add the barrier if requested. + if (NeedsBarrier) + createBarrier(LocationDescription(Builder.saveIP(), DL), OMPD_for, + /* ForceSimpleCall */ false, /* CheckCancelFlag */ false); + +#ifndef NDEBUG + // Even though we currently do not support applying additional methods to it, + // the chunk loop should remain a canonical loop. + CLI->assertOK(); +#endif + + return {DispatchAfter, DispatchAfter->getFirstInsertionPt()}; +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, bool NeedsBarrier) { - // Currently only supports static schedules. - return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier); + InsertPointTy AllocaIP, bool NeedsBarrier, + llvm::omp::ScheduleKind SchedKind, + llvm::Value *ChunkSize) { + switch (SchedKind) { + case llvm::omp::ScheduleKind ::OMP_SCHEDULE_Default: + assert(!ChunkSize && "No chunk size with default schedule (which for clang " + "is static non-chunked)"); + LLVM_FALLTHROUGH; + case llvm::omp::ScheduleKind ::OMP_SCHEDULE_Static: + if (ChunkSize) + return applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier, + ChunkSize); + return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier); + case llvm::omp::ScheduleKind ::OMP_SCHEDULE_Auto: + assert(!ChunkSize && "Chunk size with auto scheduling not user-defined"); + return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, OMPScheduleType::Auto, + NeedsBarrier, nullptr); + case llvm::omp::ScheduleKind ::OMP_SCHEDULE_Dynamic: + return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, + OMPScheduleType::DynamicChunked, + NeedsBarrier, ChunkSize); + case llvm::omp::ScheduleKind ::OMP_SCHEDULE_Guided: + return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, + OMPScheduleType::GuidedChunked, + NeedsBarrier, ChunkSize); + case llvm::omp::ScheduleKind ::OMP_SCHEDULE_Runtime: + assert(!ChunkSize && + "Chunk size with runtime scheduling implied to be one"); + return applyDynamicWorkshareLoop( + DL, CLI, AllocaIP, OMPScheduleType::Runtime, NeedsBarrier, nullptr); + } + + llvm_unreachable("Unknown/unimplemented schedule kind"); } /// Returns an LLVM function to call for initializing loop bounds using OpenMP @@ -1763,27 +1997,6 @@ return AfterIP; } -/// Make \p Source branch to \p Target. -/// -/// Handles two situations: -/// * \p Source already has an unconditional branch. -/// * \p Source is a degenerate block (no terminator because the BB is -/// the current head of the IR construction). -static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) { - if (Instruction *Term = Source->getTerminator()) { - auto *Br = cast(Term); - assert(!Br->isConditional() && - "BB's terminator must be an unconditional branch (or degenerate)"); - BasicBlock *Succ = Br->getSuccessor(0); - Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true); - Br->setSuccessor(0, Target); - return; - } - - auto *NewBr = BranchInst::Create(Target, Source); - NewBr->setDebugLoc(DL); -} - /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is, /// after this \p OldTarget will be orphaned. static void redirectAllPredecessorsTo(BasicBlock *OldTarget, @@ -3545,6 +3758,51 @@ llvm_unreachable("Missing preheader"); } +void CanonicalLoopInfo::setTripCount(Value *TripCount) { + assert(isValid() && "Requires a valid canonical loop"); + + Instruction *CmpI = &getCond()->front(); + assert(isa(CmpI) && "First inst must compare IV with TripCount"); + CmpI->setOperand(1, TripCount); + +#ifndef NDEBUG + assertOK(); +#endif +} + +void CanonicalLoopInfo::mapIndVar( + llvm::function_ref Updater) { + assert(isValid() && "Requires a valid canonical loop"); + + Instruction *OldIV = getIndVar(); + + // Record all uses excluding those introduced by the updater. Uses by the + // CanonicalLoopInfo itself to keep track of the number of iterations are + // excluded. + SmallVector ReplacableUses; + for (Use &U : OldIV->uses()) { + auto *User = dyn_cast(U.getUser()); + if (!User) + continue; + if (User->getParent() == getCond()) + continue; + if (User->getParent() == getLatch()) + continue; + ReplacableUses.push_back(&U); + } + + // Run the updater that may introduce new uses + Value *NewIV = Updater(OldIV); + + // Replace the old uses with the value returned by the updater. + for (Use *U : ReplacableUses) + U->set(NewIV); + +#ifndef NDEBUG + assertOK(); +#endif +} + void CanonicalLoopInfo::assertOK() const { #ifndef NDEBUG // No constraints if this object currently does not describe a loop. diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -113,6 +113,33 @@ << " in function " << F->getName(); } +/// Populate Calls with call instructions calling the function with the given +/// FnID from the given function F. +static void findCalls(Function *F, omp::RuntimeFunction FnID, + OpenMPIRBuilder &OMPBuilder, + SmallVectorImpl &Calls) { + Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID); + for (BasicBlock &BB : *F) { + for (Instruction &I : BB) { + auto *Call = dyn_cast(&I); + if (Call && Call->getCalledFunction() == Fn) + Calls.push_back(Call); + } + } +} + +/// Assuming \p F contains only one call to the function with the given \p FnID, +/// return that call. +static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID, + OpenMPIRBuilder &OMPBuilder) { + SmallVector Calls; + findCalls(F, FnID, OMPBuilder, Calls); + EXPECT_EQ(1u, Calls.size()); + if (Calls.size() != 1) + return nullptr; + return Calls.front(); +} + class OpenMPIRBuilderTest : public testing::Test { protected: void SetUp() override { @@ -1935,6 +1962,67 @@ EXPECT_EQ(NumCallsInExitBlock, 3u); } +TEST_F(OpenMPIRBuilderTest, StaticChunkedWorkshareLoop) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + + BasicBlock *Body; + CanonicalLoopInfo *CLI = + buildSingleLoopFunction(DL, OMPBuilder, nullptr, &Body); + + Type *LCTy = Type::getInt32Ty(Ctx); + Value *ChunkSize = ConstantInt::get(LCTy, 5); + InsertPointTy AllocaIP{&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()}; + OMPBuilder.applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP, + /*NeedsBarrier=*/true, ChunkSize); + + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + BasicBlock *Entry = &F->getEntryBlock(); + BasicBlock *Preheader = Entry->getSingleSuccessor(); + + BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor(); + BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor(); + BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor(); + BasicBlock *DispatchBody = succ_begin(DispatchCond)[0]; + BasicBlock *DispatchExit = succ_begin(DispatchCond)[1]; + BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor(); + BasicBlock *Return = DispatchAfter->getSingleSuccessor(); + + BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor(); + BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor(); + BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor(); + BasicBlock *ChunkBody = succ_begin(ChunkCond)[0]; + BasicBlock *ChunkExit = succ_begin(ChunkCond)[1]; + BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor(); + BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor(); + + BasicBlock *DispatchInc = ChunkAfter; + + EXPECT_EQ(ChunkBody, Body); + EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader); + EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader); + + EXPECT_TRUE(isa(Return->front())); + + CallInst *InitCall = findSingleCall( + F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u, OMPBuilder); + EXPECT_EQ(InitCall->getParent(), Preheader); + EXPECT_EQ(cast(InitCall->getArgOperand(2))->getSExtValue(), 33); + EXPECT_EQ(cast(InitCall->getArgOperand(7))->getSExtValue(), 1); + EXPECT_EQ(cast(InitCall->getArgOperand(8))->getSExtValue(), 5); + + CallInst *FiniCall = findSingleCall( + F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder); + EXPECT_EQ(FiniCall->getParent(), DispatchExit); + + CallInst *BarrierCall = findSingleCall( + F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder); + EXPECT_EQ(BarrierCall->getParent(), DispatchExit); +} + TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); @@ -3152,21 +3240,6 @@ return Builder.saveIP(); } -/// Populate Calls with call instructions calling the function with the given -/// FnID from the given function F. -static void findCalls(Function *F, omp::RuntimeFunction FnID, - OpenMPIRBuilder &OMPBuilder, - SmallVectorImpl &Calls) { - Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID); - for (BasicBlock &BB : *F) { - for (Instruction &I : BB) { - auto *Call = dyn_cast(&I); - if (Call && Call->getCalledFunction() == Fn) - Calls.push_back(Call); - } - } -} - TEST_F(OpenMPIRBuilderTest, CreateReductions) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -794,8 +794,9 @@ bool isSimd = loop.simd_modifier(); if (schedule == omp::ClauseScheduleKind::Static) { - ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, - !loop.nowait(), chunk); + ompBuilder->applyWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, + !loop.nowait(), + llvm::omp::OMP_SCHEDULE_Static, chunk); } else { llvm::omp::OMPScheduleType schedType; switch (schedule) { diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -423,6 +423,53 @@ // ----- +llvm.func @body(i32) + +// CHECK-LABEL: @test_omp_wsloop_static_defchunk +llvm.func @test_omp_wsloop_static_defchunk(%lb : i32, %ub : i32, %step : i32) -> () { + omp.wsloop (%iv) : i32 = (%lb) to (%ub) step (%step) schedule(static) { + // CHECK: call void @__kmpc_for_static_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 34, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32 1, i32 0) + // CHECK: call void @__kmpc_for_static_fini + llvm.call @body(%iv) : (i32) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i32) + +// CHECK-LABEL: @test_omp_wsloop_static_1 +llvm.func @test_omp_wsloop_static_1(%lb : i32, %ub : i32, %step : i32) -> () { + %static_chunk_size = llvm.mlir.constant(1 : i32) : i32 + omp.wsloop (%iv) : i32 = (%lb) to (%ub) step (%step) schedule(static = %static_chunk_size : i32) { + // CHECK: call void @__kmpc_for_static_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 33, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32 1, i32 1) + // CHECK: call void @__kmpc_for_static_fini + llvm.call @body(%iv) : (i32) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i32) + +// CHECK-LABEL: @test_omp_wsloop_static_2 +llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () { + %static_chunk_size = llvm.mlir.constant(2 : i32) : i32 + omp.wsloop (%iv) : i32 = (%lb) to (%ub) step (%step) schedule(static = %static_chunk_size : i32) { + // CHECK: call void @__kmpc_for_static_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 33, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32 1, i32 2) + // CHECK: call void @__kmpc_for_static_fini + llvm.call @body(%iv) : (i32) -> () + omp.yield + } + llvm.return +} + +// ----- + llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () {