Index: clang/test/OpenMP/cancel_codegen.cpp =================================================================== --- clang/test/OpenMP/cancel_codegen.cpp +++ clang/test/OpenMP/cancel_codegen.cpp @@ -1385,10 +1385,10 @@ // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK3: omp_section_loop.exit: // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK3: omp_section_loop.after: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK3-NEXT: br label [[OMP_SECTIONS_END:%.*]] // CHECK3: omp_sections.end: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] @@ -1422,10 +1422,10 @@ // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] // CHECK3: omp_section_loop.exit18: // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] // CHECK3: omp_section_loop.after19: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK3-NEXT: br label [[OMP_SECTIONS_END33:%.*]] // CHECK3: omp_sections.end33: // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 @@ -2012,10 +2012,10 @@ // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK4: omp_section_loop.exit: // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK4: omp_section_loop.after: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK4-NEXT: br label [[OMP_SECTIONS_END:%.*]] // CHECK4: omp_sections.end: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] @@ -2049,10 +2049,10 @@ // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] // CHECK4: omp_section_loop.exit18: // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] // CHECK4: omp_section_loop.after19: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK4-NEXT: br label [[OMP_SECTIONS_END33:%.*]] // CHECK4: omp_sections.end33: // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 @@ -3879,10 +3879,10 @@ // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK9: omp_section_loop.exit: // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK9: omp_section_loop.after: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK9-NEXT: br label [[OMP_SECTIONS_END:%.*]] // CHECK9: omp_sections.end: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] @@ -3916,10 +3916,10 @@ // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] // CHECK9: omp_section_loop.exit18: // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] // CHECK9: omp_section_loop.after19: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK9-NEXT: br label [[OMP_SECTIONS_END33:%.*]] // CHECK9: omp_sections.end33: // CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 @@ -4506,10 +4506,10 @@ // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK10: omp_section_loop.exit: // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK10: omp_section_loop.after: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK10-NEXT: br label [[OMP_SECTIONS_END:%.*]] // CHECK10: omp_sections.end: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] @@ -4543,10 +4543,10 @@ // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] // CHECK10: omp_section_loop.exit18: // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] // CHECK10: omp_section_loop.after19: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK10-NEXT: br label [[OMP_SECTIONS_END33:%.*]] // CHECK10: omp_sections.end33: // CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 Index: clang/test/OpenMP/irbuilder_for_iterator.cpp =================================================================== --- clang/test/OpenMP/irbuilder_for_iterator.cpp +++ clang/test/OpenMP/irbuilder_for_iterator.cpp @@ -98,10 +98,10 @@ // CHECK-NEXT: br label [[OMP_LOOP_HEADER]] // CHECK: omp_loop.exit: // CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] // CHECK: omp_loop.after: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK-NEXT: ret void // // Index: clang/test/OpenMP/irbuilder_for_rangefor.cpp =================================================================== --- clang/test/OpenMP/irbuilder_for_rangefor.cpp +++ clang/test/OpenMP/irbuilder_for_rangefor.cpp @@ -114,10 +114,10 @@ // CHECK-NEXT: br label [[OMP_LOOP_HEADER]] // CHECK: omp_loop.exit: // CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] // CHECK: omp_loop.after: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK-NEXT: ret void // // Index: clang/test/OpenMP/irbuilder_for_unsigned.c =================================================================== --- clang/test/OpenMP/irbuilder_for_unsigned.c +++ clang/test/OpenMP/irbuilder_for_unsigned.c @@ -90,10 +90,10 @@ // CHECK-NEXT: br label [[OMP_LOOP_HEADER]] // CHECK: omp_loop.exit: // CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) // CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] // CHECK: omp_loop.after: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) // CHECK-NEXT: ret void // // Index: clang/test/OpenMP/irbuilder_nested_parallel_for.c =================================================================== --- clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -23,15 +23,15 @@ // // CHECK-DEBUG-LABEL: @_Z14parallel_for_0v( // CHECK-DEBUG-NEXT: entry: -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG12:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG13:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), !dbg [[DBG13:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), !dbg [[DBG14:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK-DEBUG: omp.par.outlined.exit: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG17:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG18:![0-9]+]] // void parallel_for_0(void) { #pragma omp parallel @@ -66,20 +66,20 @@ // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG72:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG77:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG78:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg [[DBG78:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg [[DBG79:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] // CHECK-DEBUG: omp.par.outlined.exit16: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG80:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG81:![0-9]+]] // void parallel_for_1(float *r, int a, double b) { #pragma omp parallel @@ -161,10 +161,10 @@ // CHECK-NEXT: br label [[OMP_LOOP_HEADER191]] // CHECK: omp_loop.exit195: // CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]) // CHECK-NEXT: br label [[OMP_LOOP_AFTER196:%.*]] // CHECK: omp_loop.after196: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]) // CHECK-NEXT: ret void // // CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid( @@ -181,68 +181,68 @@ // CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG138:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg [[DBG139:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg [[DBG140:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] // CHECK-DEBUG: omp.par.outlined.exit184: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG146]] -// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG147:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG148:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG149:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.preheader190: -// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1), !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.header191: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.cond192: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.body193: -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG149:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG150:![0-9]+]] -// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG151:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG152:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG153:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG150:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG150]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG151:![0-9]+]] +// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG150]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.inc194: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.exit195: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.after196: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG154:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] // void parallel_for_2(float *r, int a, double b) { #pragma omp parallel Index: clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c =================================================================== --- clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c +++ clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c @@ -5,141 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[N_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32 %[[N:.+]], i32* %[[N_ADDR]], align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 1 -// CHECK-NEXT: store i32* %[[N_ADDR]], i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP2:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[TMP2]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP4:.+]] = udiv i32 %[[DOTCOUNT]], 13 -// CHECK-NEXT: %[[TMP5:.+]] = urem i32 %[[DOTCOUNT]], 13 -// CHECK-NEXT: %[[TMP6:.+]] = icmp ne i32 %[[TMP5]], 0 -// CHECK-NEXT: %[[TMP7:.+]] = zext i1 %[[TMP6]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP4]], %[[TMP7]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP8]], i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP11:.+]] = sub i32 %[[TMP10]], %[[TMP9]] -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[TMP11]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP12]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP13:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP9]] -// CHECK-NEXT: %[[TMP14:.+]] = icmp eq i32 %[[TMP13]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP15:.+]] = select i1 %[[TMP14]], i32 %[[TMP5]], i32 13 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP15]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP16:.+]] = mul nuw i32 13, %[[TMP13]] -// CHECK-NEXT: %[[TMP17:.+]] = add nuw i32 %[[TMP16]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP17]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP18:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP19]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP18]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP20:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP21:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP22:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP22]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP21]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP23:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP20]], %[[TMP23]] -// CHECK-NEXT: %[[TMP24:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP25:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP25]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP24]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP26:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP26]] -// CHECK-NEXT: %[[TMP27:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP28:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP28]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP27]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) { #pragma omp for @@ -151,72 +16,186 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 1 -// CHECK-NEXT: %[[TMP5:.+]] = load i32*, i32** %[[TMP4]], align 8 -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[TMP5]], align 4 -// CHECK-NEXT: store i32 %[[TMP6]], i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP7]], %[[TMP8]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]] -// CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP12]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 13} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_heuristic_for +// CHECK-SAME: (i32 [[N:%.*]], float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[I]], i32** [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK-NEXT: call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: [[TMP4:%.*]] = udiv i32 [[DOTCOUNT]], 13 +// CHECK-NEXT: [[TMP5:%.*]] = urem i32 [[DOTCOUNT]], 13 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32 +// CHECK-NEXT: [[OMP_FLOOR0_TRIPCOUNT:%.*]] = add nuw i32 [[TMP4]], [[TMP7]] +// CHECK-NEXT: br label [[OMP_FLOOR0_PREHEADER:%.*]] +// CHECK: omp_floor0.preheader: +// CHECK-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[OMP_FLOOR0_TRIPCOUNT]], 1 +// CHECK-NEXT: store i32 [[TMP8]], i32* [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] +// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER:%.*]] +// CHECK: omp_floor0.header: +// CHECK-NEXT: [[OMP_FLOOR0_IV:%.*]] = phi i32 [ 0, [[OMP_FLOOR0_PREHEADER]] ], [ [[OMP_FLOOR0_NEXT:%.*]], [[OMP_FLOOR0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_FLOOR0_COND:%.*]] +// CHECK: omp_floor0.cond: +// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[TMP12]] +// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] +// CHECK: omp_floor0.body: +// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 13 +// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] +// CHECK: omp_tile0.preheader: +// CHECK-NEXT: br label [[OMP_TILE0_HEADER:%.*]] +// CHECK: omp_tile0.header: +// CHECK-NEXT: [[OMP_TILE0_IV:%.*]] = phi i32 [ 0, [[OMP_TILE0_PREHEADER]] ], [ [[OMP_TILE0_NEXT:%.*]], [[OMP_TILE0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_TILE0_COND:%.*]] +// CHECK: omp_tile0.cond: +// CHECK-NEXT: [[OMP_TILE0_CMP:%.*]] = icmp ult i32 [[OMP_TILE0_IV]], [[TMP15]] +// CHECK-NEXT: br i1 [[OMP_TILE0_CMP]], label [[OMP_TILE0_BODY:%.*]], label [[OMP_TILE0_EXIT:%.*]] +// CHECK: omp_tile0.body: +// CHECK-NEXT: [[TMP16:%.*]] = mul nuw i32 13, [[TMP13]] +// CHECK-NEXT: [[TMP17:%.*]] = add nuw i32 [[TMP16]], [[OMP_TILE0_IV]] +// CHECK-NEXT: br label [[OMP_LOOP_BODY:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(i32* [[I]], i32 [[TMP17]], %struct.anon.0* [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP18:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK-NEXT: [[TMP24:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP24]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP26]] +// CHECK-NEXT: [[TMP27:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP27]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], float* [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_TILE0_INC]] +// CHECK: omp_tile0.inc: +// CHECK-NEXT: [[OMP_TILE0_NEXT]] = add nuw i32 [[OMP_TILE0_IV]], 1 +// CHECK-NEXT: br label [[OMP_TILE0_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_tile0.exit: +// CHECK-NEXT: br label [[OMP_TILE0_AFTER:%.*]] +// CHECK: omp_tile0.after: +// CHECK-NEXT: br label [[OMP_FLOOR0_INC]] +// CHECK: omp_floor0.inc: +// CHECK-NEXT: [[OMP_FLOOR0_NEXT]] = add nuw i32 [[OMP_FLOOR0_IV]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER]] +// CHECK: omp_floor0.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: br label [[OMP_FLOOR0_AFTER:%.*]] +// CHECK: omp_floor0.after: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DISTANCE]], i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, i32* [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[TMP10]] +// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], [[TMP11]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], i32* [[TMP12]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-NEXT: store i32* [[LOOPVAR]], i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], i32* [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 +// CHECK-NEXT: ret void +// Index: clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c =================================================================== --- clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c +++ clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c @@ -11,159 +11,6 @@ double sind(double); -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_constant_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[E_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store float* %[[E:.+]], float** %[[E_ADDR]], align 8 -// CHECK-NEXT: store float %[[OFFSET:.+]], float* %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] -// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]] -// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 4 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 4, %[[TMP12]] -// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP16]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[CONV:.+]] = fpext float %[[TMP19]] to double -// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double %[[CONV]]) -// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[CONV4:.+]] = fpext float %[[TMP22]] to double -// CHECK-NEXT: %[[MUL:.+]] = fmul double %[[CALL]], %[[CONV4]] -// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM5:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX6:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM5]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX6]], align 4 -// CHECK-NEXT: %[[CONV7:.+]] = fpext float %[[TMP25]] to double -// CHECK-NEXT: %[[MUL8:.+]] = fmul double %[[MUL]], %[[CONV7]] -// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[E_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM9:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX10:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM9]] -// CHECK-NEXT: %[[TMP28:.+]] = load float, float* %[[ARRAYIDX10]], align 4 -// CHECK-NEXT: %[[CONV11:.+]] = fpext float %[[TMP28]] to double -// CHECK-NEXT: %[[MUL12:.+]] = fmul double %[[MUL8]], %[[CONV11]] -// CHECK-NEXT: %[[TMP29:.+]] = load float, float* %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: %[[CONV13:.+]] = fpext float %[[TMP29]] to double -// CHECK-NEXT: %[[ADD:.+]] = fadd double %[[MUL12]], %[[CONV13]] -// CHECK-NEXT: %[[TMP30:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP31:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM14:.+]] = sext i32 %[[TMP31]] to i64 -// CHECK-NEXT: %[[ARRAYIDX15:.+]] = getelementptr inbounds float, float* %[[TMP30]], i64 %[[IDXPROM14]] -// CHECK-NEXT: %[[TMP32:.+]] = load float, float* %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: %[[CONV16:.+]] = fpext float %[[TMP32]] to double -// CHECK-NEXT: %[[ADD17:.+]] = fadd double %[[CONV16]], %[[ADD]] -// CHECK-NEXT: %[[CONV18:.+]] = fptrunc double %[[ADD17]] to float -// CHECK-NEXT: store float %[[CONV18]], float* %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM19:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM19]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float *d, float *e, float offset) { #pragma omp for @@ -175,69 +22,201 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 128, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_heuristic_constant_for +// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]], float* [[E:%.*]], float [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[E_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[OFFSET_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK-NEXT: store float* [[E]], float** [[E_ADDR]], align 8 +// CHECK-NEXT: store float [[OFFSET]], float* [[OFFSET_ADDR]], align 4 +// CHECK-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[I]], i32** [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: [[TMP3:%.*]] = udiv i32 [[DOTCOUNT]], 4 +// CHECK-NEXT: [[TMP4:%.*]] = urem i32 [[DOTCOUNT]], 4 +// CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +// CHECK-NEXT: [[OMP_FLOOR0_TRIPCOUNT:%.*]] = add nuw i32 [[TMP3]], [[TMP6]] +// CHECK-NEXT: br label [[OMP_FLOOR0_PREHEADER:%.*]] +// CHECK: omp_floor0.preheader: +// CHECK-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[OMP_FLOOR0_TRIPCOUNT]], 1 +// CHECK-NEXT: store i32 [[TMP7]], i32* [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], [[TMP8]] +// CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER:%.*]] +// CHECK: omp_floor0.header: +// CHECK-NEXT: [[OMP_FLOOR0_IV:%.*]] = phi i32 [ 0, [[OMP_FLOOR0_PREHEADER]] ], [ [[OMP_FLOOR0_NEXT:%.*]], [[OMP_FLOOR0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_FLOOR0_COND:%.*]] +// CHECK: omp_floor0.cond: +// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[TMP11]] +// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] +// CHECK: omp_floor0.body: +// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 4 +// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] +// CHECK: omp_tile0.preheader: +// CHECK-NEXT: br label [[OMP_TILE0_HEADER:%.*]] +// CHECK: omp_tile0.header: +// CHECK-NEXT: [[OMP_TILE0_IV:%.*]] = phi i32 [ 0, [[OMP_TILE0_PREHEADER]] ], [ [[OMP_TILE0_NEXT:%.*]], [[OMP_TILE0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_TILE0_COND:%.*]] +// CHECK: omp_tile0.cond: +// CHECK-NEXT: [[OMP_TILE0_CMP:%.*]] = icmp ult i32 [[OMP_TILE0_IV]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_TILE0_CMP]], label [[OMP_TILE0_BODY:%.*]], label [[OMP_TILE0_EXIT:%.*]] +// CHECK: omp_tile0.body: +// CHECK-NEXT: [[TMP15:%.*]] = mul nuw i32 4, [[TMP12]] +// CHECK-NEXT: [[TMP16:%.*]] = add nuw i32 [[TMP15]], [[OMP_TILE0_IV]] +// CHECK-NEXT: br label [[OMP_LOOP_BODY:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(i32* [[I]], i32 [[TMP16]], %struct.anon.0* [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP17:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP19]] to double +// CHECK-NEXT: [[CALL:%.*]] = call double @sind(double [[CONV]]) +// CHECK-NEXT: [[TMP20:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[CONV4:%.*]] = fpext float [[TMP22]] to double +// CHECK-NEXT: [[MUL:%.*]] = fmul double [[CALL]], [[CONV4]] +// CHECK-NEXT: [[TMP23:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK-NEXT: [[CONV7:%.*]] = fpext float [[TMP25]] to double +// CHECK-NEXT: [[MUL8:%.*]] = fmul double [[MUL]], [[CONV7]] +// CHECK-NEXT: [[TMP26:%.*]] = load float*, float** [[E_ADDR]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM9]] +// CHECK-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK-NEXT: [[CONV11:%.*]] = fpext float [[TMP28]] to double +// CHECK-NEXT: [[MUL12:%.*]] = fmul double [[MUL8]], [[CONV11]] +// CHECK-NEXT: [[TMP29:%.*]] = load float, float* [[OFFSET_ADDR]], align 4 +// CHECK-NEXT: [[CONV13:%.*]] = fpext float [[TMP29]] to double +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL12]], [[CONV13]] +// CHECK-NEXT: [[TMP30:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[TMP30]], i64 [[IDXPROM14]] +// CHECK-NEXT: [[TMP32:%.*]] = load float, float* [[ARRAYIDX15]], align 4 +// CHECK-NEXT: [[CONV16:%.*]] = fpext float [[TMP32]] to double +// CHECK-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], [[ADD]] +// CHECK-NEXT: [[CONV18:%.*]] = fptrunc double [[ADD17]] to float +// CHECK-NEXT: store float [[CONV18]], float* [[ARRAYIDX15]], align 4 +// CHECK-NEXT: br label [[OMP_TILE0_INC]] +// CHECK: omp_tile0.inc: +// CHECK-NEXT: [[OMP_TILE0_NEXT]] = add nuw i32 [[OMP_TILE0_IV]], 1 +// CHECK-NEXT: br label [[OMP_TILE0_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_tile0.exit: +// CHECK-NEXT: br label [[OMP_TILE0_AFTER:%.*]] +// CHECK: omp_tile0.after: +// CHECK-NEXT: br label [[OMP_FLOOR0_INC]] +// CHECK: omp_floor0.inc: +// CHECK-NEXT: [[OMP_FLOOR0_NEXT]] = add nuw i32 [[OMP_FLOOR0_IV]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER]] +// CHECK: omp_floor0.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: br label [[OMP_FLOOR0_AFTER:%.*]] +// CHECK: omp_floor0.after: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM19:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM19]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DISTANCE]], i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 128, i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, i32* [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], [[TMP8]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], i32* [[TMP9]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-NEXT: store i32* [[LOOPVAR]], i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], i32* [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 +// CHECK-NEXT: ret void +// Index: clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c =================================================================== --- clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c +++ clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c @@ -9,163 +9,6 @@ double sind(double); -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_runtime_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[N_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[E_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32 %[[N:.+]], i32* %[[N_ADDR]], align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store float* %[[E:.+]], float** %[[E_ADDR]], align 8 -// CHECK-NEXT: store float %[[OFFSET:.+]], float* %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 1 -// CHECK-NEXT: store i32* %[[N_ADDR]], i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP2:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[TMP2]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP4:.+]] = udiv i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP5:.+]] = urem i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP6:.+]] = icmp ne i32 %[[TMP5]], 0 -// CHECK-NEXT: %[[TMP7:.+]] = zext i1 %[[TMP6]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP4]], %[[TMP7]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP8]], i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP11:.+]] = sub i32 %[[TMP10]], %[[TMP9]] -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[TMP11]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP12]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP13:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP9]] -// CHECK-NEXT: %[[TMP14:.+]] = icmp eq i32 %[[TMP13]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP15:.+]] = select i1 %[[TMP14]], i32 %[[TMP5]], i32 4 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP15]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP16:.+]] = mul nuw i32 4, %[[TMP13]] -// CHECK-NEXT: %[[TMP17:.+]] = add nuw i32 %[[TMP16]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP17]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP18:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP19]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP18]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP20:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[CONV:.+]] = fpext float %[[TMP20]] to double -// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double %[[CONV]]) -// CHECK-NEXT: %[[TMP21:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP22:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP22]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP21]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP23:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[CONV4:.+]] = fpext float %[[TMP23]] to double -// CHECK-NEXT: %[[MUL:.+]] = fmul double %[[CALL]], %[[CONV4]] -// CHECK-NEXT: %[[TMP24:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP25:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM5:.+]] = sext i32 %[[TMP25]] to i64 -// CHECK-NEXT: %[[ARRAYIDX6:.+]] = getelementptr inbounds float, float* %[[TMP24]], i64 %[[IDXPROM5]] -// CHECK-NEXT: %[[TMP26:.+]] = load float, float* %[[ARRAYIDX6]], align 4 -// CHECK-NEXT: %[[CONV7:.+]] = fpext float %[[TMP26]] to double -// CHECK-NEXT: %[[MUL8:.+]] = fmul double %[[MUL]], %[[CONV7]] -// CHECK-NEXT: %[[TMP27:.+]] = load float*, float** %[[E_ADDR]], align 8 -// CHECK-NEXT: %[[TMP28:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM9:.+]] = sext i32 %[[TMP28]] to i64 -// CHECK-NEXT: %[[ARRAYIDX10:.+]] = getelementptr inbounds float, float* %[[TMP27]], i64 %[[IDXPROM9]] -// CHECK-NEXT: %[[TMP29:.+]] = load float, float* %[[ARRAYIDX10]], align 4 -// CHECK-NEXT: %[[CONV11:.+]] = fpext float %[[TMP29]] to double -// CHECK-NEXT: %[[MUL12:.+]] = fmul double %[[MUL8]], %[[CONV11]] -// CHECK-NEXT: %[[TMP30:.+]] = load float, float* %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: %[[CONV13:.+]] = fpext float %[[TMP30]] to double -// CHECK-NEXT: %[[ADD:.+]] = fadd double %[[MUL12]], %[[CONV13]] -// CHECK-NEXT: %[[TMP31:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP32:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM14:.+]] = sext i32 %[[TMP32]] to i64 -// CHECK-NEXT: %[[ARRAYIDX15:.+]] = getelementptr inbounds float, float* %[[TMP31]], i64 %[[IDXPROM14]] -// CHECK-NEXT: %[[TMP33:.+]] = load float, float* %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: %[[CONV16:.+]] = fpext float %[[TMP33]] to double -// CHECK-NEXT: %[[ADD17:.+]] = fadd double %[[CONV16]], %[[ADD]] -// CHECK-NEXT: %[[CONV18:.+]] = fptrunc double %[[ADD17]] to float -// CHECK-NEXT: store float %[[CONV18]], float* %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM19:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM19]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, float *d, float *e, float offset) { #pragma omp for @@ -177,72 +20,208 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 1 -// CHECK-NEXT: %[[TMP5:.+]] = load i32*, i32** %[[TMP4]], align 8 -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[TMP5]], align 4 -// CHECK-NEXT: store i32 %[[TMP6]], i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP7]], %[[TMP8]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]] -// CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP12]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_heuristic_runtime_for +// CHECK-SAME: (i32 [[N:%.*]], float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]], float* [[E:%.*]], float [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[E_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[OFFSET_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK-NEXT: store float* [[E]], float** [[E_ADDR]], align 8 +// CHECK-NEXT: store float [[OFFSET]], float* [[OFFSET_ADDR]], align 4 +// CHECK-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[I]], i32** [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK-NEXT: call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: [[TMP4:%.*]] = udiv i32 [[DOTCOUNT]], 4 +// CHECK-NEXT: [[TMP5:%.*]] = urem i32 [[DOTCOUNT]], 4 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32 +// CHECK-NEXT: [[OMP_FLOOR0_TRIPCOUNT:%.*]] = add nuw i32 [[TMP4]], [[TMP7]] +// CHECK-NEXT: br label [[OMP_FLOOR0_PREHEADER:%.*]] +// CHECK: omp_floor0.preheader: +// CHECK-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[OMP_FLOOR0_TRIPCOUNT]], 1 +// CHECK-NEXT: store i32 [[TMP8]], i32* [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] +// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER:%.*]] +// CHECK: omp_floor0.header: +// CHECK-NEXT: [[OMP_FLOOR0_IV:%.*]] = phi i32 [ 0, [[OMP_FLOOR0_PREHEADER]] ], [ [[OMP_FLOOR0_NEXT:%.*]], [[OMP_FLOOR0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_FLOOR0_COND:%.*]] +// CHECK: omp_floor0.cond: +// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[TMP12]] +// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] +// CHECK: omp_floor0.body: +// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 4 +// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] +// CHECK: omp_tile0.preheader: +// CHECK-NEXT: br label [[OMP_TILE0_HEADER:%.*]] +// CHECK: omp_tile0.header: +// CHECK-NEXT: [[OMP_TILE0_IV:%.*]] = phi i32 [ 0, [[OMP_TILE0_PREHEADER]] ], [ [[OMP_TILE0_NEXT:%.*]], [[OMP_TILE0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_TILE0_COND:%.*]] +// CHECK: omp_tile0.cond: +// CHECK-NEXT: [[OMP_TILE0_CMP:%.*]] = icmp ult i32 [[OMP_TILE0_IV]], [[TMP15]] +// CHECK-NEXT: br i1 [[OMP_TILE0_CMP]], label [[OMP_TILE0_BODY:%.*]], label [[OMP_TILE0_EXIT:%.*]] +// CHECK: omp_tile0.body: +// CHECK-NEXT: [[TMP16:%.*]] = mul nuw i32 4, [[TMP13]] +// CHECK-NEXT: [[TMP17:%.*]] = add nuw i32 [[TMP16]], [[OMP_TILE0_IV]] +// CHECK-NEXT: br label [[OMP_LOOP_BODY:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(i32* [[I]], i32 [[TMP17]], %struct.anon.0* [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP18:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK-NEXT: [[CALL:%.*]] = call double @sind(double [[CONV]]) +// CHECK-NEXT: [[TMP21:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[CONV4:%.*]] = fpext float [[TMP23]] to double +// CHECK-NEXT: [[MUL:%.*]] = fmul double [[CALL]], [[CONV4]] +// CHECK-NEXT: [[TMP24:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP24]], i64 [[IDXPROM5]] +// CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK-NEXT: [[CONV7:%.*]] = fpext float [[TMP26]] to double +// CHECK-NEXT: [[MUL8:%.*]] = fmul double [[MUL]], [[CONV7]] +// CHECK-NEXT: [[TMP27:%.*]] = load float*, float** [[E_ADDR]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP27]], i64 [[IDXPROM9]] +// CHECK-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK-NEXT: [[CONV11:%.*]] = fpext float [[TMP29]] to double +// CHECK-NEXT: [[MUL12:%.*]] = fmul double [[MUL8]], [[CONV11]] +// CHECK-NEXT: [[TMP30:%.*]] = load float, float* [[OFFSET_ADDR]], align 4 +// CHECK-NEXT: [[CONV13:%.*]] = fpext float [[TMP30]] to double +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL12]], [[CONV13]] +// CHECK-NEXT: [[TMP31:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP32:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[TMP31]], i64 [[IDXPROM14]] +// CHECK-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX15]], align 4 +// CHECK-NEXT: [[CONV16:%.*]] = fpext float [[TMP33]] to double +// CHECK-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], [[ADD]] +// CHECK-NEXT: [[CONV18:%.*]] = fptrunc double [[ADD17]] to float +// CHECK-NEXT: store float [[CONV18]], float* [[ARRAYIDX15]], align 4 +// CHECK-NEXT: br label [[OMP_TILE0_INC]] +// CHECK: omp_tile0.inc: +// CHECK-NEXT: [[OMP_TILE0_NEXT]] = add nuw i32 [[OMP_TILE0_IV]], 1 +// CHECK-NEXT: br label [[OMP_TILE0_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_tile0.exit: +// CHECK-NEXT: br label [[OMP_TILE0_AFTER:%.*]] +// CHECK: omp_tile0.after: +// CHECK-NEXT: br label [[OMP_FLOOR0_INC]] +// CHECK: omp_floor0.inc: +// CHECK-NEXT: [[OMP_FLOOR0_NEXT]] = add nuw i32 [[OMP_FLOOR0_IV]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER]] +// CHECK: omp_floor0.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: br label [[OMP_FLOOR0_AFTER:%.*]] +// CHECK: omp_floor0.after: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM19:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM19]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DISTANCE]], i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, i32* [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[TMP10]] +// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], [[TMP11]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], i32* [[TMP12]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-NEXT: store i32* [[LOOPVAR]], i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], i32* [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 +// CHECK-NEXT: ret void +// Index: clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c =================================================================== --- clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c @@ -5,137 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], i32* %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP7]], i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] -// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]] -// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 2 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 2, %[[TMP12]] -// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP16]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP17:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP17]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP19]], %[[TMP22]] -// CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP25]] -// CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { #pragma omp for @@ -147,69 +16,179 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8 -// CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_factor_for +// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// CHECK-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 +// CHECK-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[I]], i32** [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: [[TMP3:%.*]] = udiv i32 [[DOTCOUNT]], 2 +// CHECK-NEXT: [[TMP4:%.*]] = urem i32 [[DOTCOUNT]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +// CHECK-NEXT: [[OMP_FLOOR0_TRIPCOUNT:%.*]] = add nuw i32 [[TMP3]], [[TMP6]] +// CHECK-NEXT: br label [[OMP_FLOOR0_PREHEADER:%.*]] +// CHECK: omp_floor0.preheader: +// CHECK-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[OMP_FLOOR0_TRIPCOUNT]], 1 +// CHECK-NEXT: store i32 [[TMP7]], i32* [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], [[TMP8]] +// CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER:%.*]] +// CHECK: omp_floor0.header: +// CHECK-NEXT: [[OMP_FLOOR0_IV:%.*]] = phi i32 [ 0, [[OMP_FLOOR0_PREHEADER]] ], [ [[OMP_FLOOR0_NEXT:%.*]], [[OMP_FLOOR0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_FLOOR0_COND:%.*]] +// CHECK: omp_floor0.cond: +// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[TMP11]] +// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] +// CHECK: omp_floor0.body: +// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 2 +// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] +// CHECK: omp_tile0.preheader: +// CHECK-NEXT: br label [[OMP_TILE0_HEADER:%.*]] +// CHECK: omp_tile0.header: +// CHECK-NEXT: [[OMP_TILE0_IV:%.*]] = phi i32 [ 0, [[OMP_TILE0_PREHEADER]] ], [ [[OMP_TILE0_NEXT:%.*]], [[OMP_TILE0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_TILE0_COND:%.*]] +// CHECK: omp_tile0.cond: +// CHECK-NEXT: [[OMP_TILE0_CMP:%.*]] = icmp ult i32 [[OMP_TILE0_IV]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_TILE0_CMP]], label [[OMP_TILE0_BODY:%.*]], label [[OMP_TILE0_EXIT:%.*]] +// CHECK: omp_tile0.body: +// CHECK-NEXT: [[TMP15:%.*]] = mul nuw i32 2, [[TMP12]] +// CHECK-NEXT: [[TMP16:%.*]] = add nuw i32 [[TMP15]], [[OMP_TILE0_IV]] +// CHECK-NEXT: br label [[OMP_LOOP_BODY:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(i32* [[I]], i32 [[TMP16]], %struct.anon.0* [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP17:%.*]] = load float*, float** [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load float*, float** [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK-NEXT: [[TMP23:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP25]] +// CHECK-NEXT: [[TMP26:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], float* [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_TILE0_INC]] +// CHECK: omp_tile0.inc: +// CHECK-NEXT: [[OMP_TILE0_NEXT]] = add nuw i32 [[OMP_TILE0_IV]], 1 +// CHECK-NEXT: br label [[OMP_TILE0_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_tile0.exit: +// CHECK-NEXT: br label [[OMP_TILE0_AFTER:%.*]] +// CHECK: omp_tile0.after: +// CHECK-NEXT: br label [[OMP_FLOOR0_INC]] +// CHECK: omp_floor0.inc: +// CHECK-NEXT: [[OMP_FLOOR0_NEXT]] = add nuw i32 [[OMP_FLOOR0_IV]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER]] +// CHECK: omp_floor0.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: br label [[OMP_FLOOR0_AFTER:%.*]] +// CHECK: omp_floor0.after: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DISTANCE]], i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 2, i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, i32* [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], [[TMP8]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], i32* [[TMP9]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-NEXT: store i32* [[LOOPVAR]], i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], i32* [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 +// CHECK-NEXT: ret void +// Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -407,6 +407,25 @@ bool NeedsBarrier, Value *Chunk = nullptr); + /// Insert doacross loop info in a workshare loop. + /// + /// In \p AllocaIP, allocate space for the loop bounds info. In the front of + /// \p PreHeaderBB, store \p DoacrossVars in the loop bounds info and call + /// doacross loop init runtime function. Call the fini doacross loop runtime + /// function in \p ExitBB. + /// + /// \param DL Debug location for instructions. + /// \param AllocaIP An insertion point for Alloca instructions. + /// \param PreHeaderBB The preheader basic block of the loop. + /// \param ExitBB The exit basic block of the loop. + /// \param OrderedVal The ordered parameter (n) specified in ordered clause. + /// \param DoacrossVars The lower bounds, upper bounds, and steps of n outer + /// loops. + void applyDoacrossLoop(DebugLoc DL, InsertPointTy AllocaIP, + BasicBlock *PreHeaderBB, BasicBlock *ExitBB, + std::int64_t OrderedVal, + ArrayRef DoacrossVars); + /// Modifies the canonical loop to be a workshare loop. /// /// This takes a \p LoopInfo representing a canonical loop, such as the one Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1549,16 +1549,16 @@ CLI->getExit()->getTerminator()->getIterator()); Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum}); + Builder.restoreIP(CLI->getAfterIP()); // Add the barrier if requested. if (NeedsBarrier) createBarrier(LocationDescription(Builder.saveIP(), DL), omp::Directive::OMPD_for, /* ForceSimpleCall */ false, /* CheckCancelFlag */ false); - InsertPointTy AfterIP = CLI->getAfterIP(); CLI->invalidate(); - return AfterIP; + return Builder.saveIP(); } OpenMPIRBuilder::InsertPointTy @@ -1699,16 +1699,80 @@ assert(BI->getSuccessor(1) == Exit); BI->setSuccessor(1, OuterCond); + Builder.restoreIP(AfterIP); // Add the barrier if requested. - if (NeedsBarrier) { - Builder.SetInsertPoint(&Exit->back()); + if (NeedsBarrier) createBarrier(LocationDescription(Builder.saveIP(), DL), omp::Directive::OMPD_for, /* ForceSimpleCall */ false, /* CheckCancelFlag */ false); - } CLI->invalidate(); - return AfterIP; + return Builder.saveIP(); +} + +void OpenMPIRBuilder::applyDoacrossLoop(DebugLoc DL, InsertPointTy AllocaIP, + BasicBlock *PreHeaderBB, + BasicBlock *ExitBB, + std::int64_t OrderedVal, + ArrayRef DoacrossVars) { + for (size_t I = 0; I < DoacrossVars.size(); I++) + assert(DoacrossVars[I]->getType()->isIntegerTy(64) && + "Doacross init runtime call requires loop bounds with i64 type"); + // Set up the source location value for OpenMP runtime. + Builder.SetInsertPoint(&PreHeaderBB->front()); + Builder.SetCurrentDebugLocation(DL); + + Constant *SrcLocStr = getOrCreateSrcLocStr(DL); + Value *SrcLoc = getOrCreateIdent(SrcLocStr); + + // Allocate space for loop bounds and generate alloc instruction. + SmallVector ElementsTys; + ElementsTys.emplace_back(Int64); // lower + ElementsTys.emplace_back(Int64); // upper + ElementsTys.emplace_back(Int64); // stride(step) + auto *KmpDimTy = StructType::create(ElementsTys, "kmp_dim"); + auto *DimsTy = ArrayType::get(KmpDimTy, OrderedVal); + + Builder.restoreIP(AllocaIP); + AllocaInst *DimsInst = Builder.CreateAlloca(DimsTy, nullptr, "dims"); + DimsInst->setAlignment(Align(8)); + + // Emit doacross init call in preheader front. + Builder.SetInsertPoint(&PreHeaderBB->front()); + + // Store doacross loop vars in loop bounds. + for (std::int64_t I = 0; I < OrderedVal; I++) { + Value *LoopBounds = Builder.CreateInBoundsGEP( + DimsTy, DimsInst, {Builder.getInt64(0), Builder.getInt64(I)}); + Value *LowerBound = Builder.CreateInBoundsGEP( + KmpDimTy, LoopBounds, {Builder.getInt32(0), Builder.getInt32(0)}); + StoreInst *LBInst = Builder.CreateStore(DoacrossVars[I * 3], LowerBound); + LBInst->setAlignment(Align(8)); + Value *UpperBound = Builder.CreateInBoundsGEP( + KmpDimTy, LoopBounds, {Builder.getInt32(0), Builder.getInt32(1)}); + StoreInst *UBInst = + Builder.CreateStore(DoacrossVars[I * 3 + 1], UpperBound); + UBInst->setAlignment(Align(8)); + Value *Step = Builder.CreateInBoundsGEP( + KmpDimTy, LoopBounds, {Builder.getInt32(0), Builder.getInt32(2)}); + StoreInst *StepInst = Builder.CreateStore(DoacrossVars[I * 3 + 2], Step); + StepInst->setAlignment(Align(8)); + } + + Value *LoopBoundsBase = Builder.CreateInBoundsGEP( + DimsTy, DimsInst, {Builder.getInt64(0), Builder.getInt64(0)}); + Value *LoopBoundsBaseInt8Ptr = Builder.CreateBitCast(LoopBoundsBase, Int8Ptr); + + Value *ThreadId = getOrCreateThreadID(SrcLoc); + Function *RTLFnInit = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_init); + Builder.CreateCall(RTLFnInit, {SrcLoc, ThreadId, Builder.getInt32(OrderedVal), + LoopBoundsBaseInt8Ptr}); + + Builder.SetInsertPoint(&ExitBB->back()); + Function *RTLFnFini = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_fini); + Builder.CreateCall(RTLFnFini, {SrcLoc, ThreadId}); } /// Make \p Source branch to \p Target. Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp =================================================================== --- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1764,11 +1764,13 @@ BasicBlock *Body = CLI->getBody(); Value *IV = CLI->getIndVar(); BasicBlock *ExitBlock = CLI->getExit(); + BasicBlock *AfterBlock = CLI->getAfter(); Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); InsertPointTy AllocaIP = Builder.saveIP(); - OMPBuilder.applyStaticWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true); + InsertPointTy EndIP = OMPBuilder.applyStaticWorkshareLoop( + DL, CLI, AllocaIP, /*NeedsBarrier=*/true); BasicBlock *Cond = Body->getSinglePredecessor(); Instruction *Cmp = &*Cond->begin(); @@ -1834,11 +1836,22 @@ // increment and in the statement that adds the lower bound to it. EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); - // The exit block should contain the "fini" call and the barrier call, - // plus the call to obtain the thread ID. + // The exit block should contain the "fini" call. size_t NumCallsInExitBlock = count_if(*ExitBlock, [](Instruction &I) { return isa(I); }); - EXPECT_EQ(NumCallsInExitBlock, 3u); + EXPECT_EQ(NumCallsInExitBlock, 1u); + + // The after block should contain the barrier call, plus the call to obtain + // the thread ID. + size_t NumCallsInAfterBlock = + count_if(*AfterBlock, [](Instruction &I) { return isa(I); }); + EXPECT_EQ(NumCallsInAfterBlock, 2u); + + // Add a termination to our block and check that it is internally consistent. + Builder.restoreIP(EndIP); + Builder.CreateRetVoid(); + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); } TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { @@ -1882,7 +1895,7 @@ // createDynamicWorkshareLoop. InsertPointTy AfterIP = CLI->getAfterIP(); BasicBlock *Preheader = CLI->getPreheader(); - BasicBlock *ExitBlock = CLI->getExit(); + BasicBlock *AfterBlock = CLI->getAfter(); Value *IV = CLI->getIndVar(); InsertPointTy EndIP = @@ -1944,11 +1957,11 @@ // increment and in the statement that adds the lower bound to it. EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); - // The exit block should contain the barrier call, plus the call to obtain + // The after block should contain the barrier call, plus the call to obtain // the thread ID. - size_t NumCallsInExitBlock = - count_if(*ExitBlock, [](Instruction &I) { return isa(I); }); - EXPECT_EQ(NumCallsInExitBlock, 2u); + size_t NumCallsInAfterBlock = + count_if(*AfterBlock, [](Instruction &I) { return isa(I); }); + EXPECT_EQ(NumCallsInAfterBlock, 2u); // Add a termination to our block and check that it is internally consistent. Builder.restoreIP(EndIP); @@ -1975,6 +1988,139 @@ omp::OMPScheduleType::Runtime | omp::OMPScheduleType::ModifierMonotonic)); +TEST_F(OpenMPIRBuilderTest, DoacrossLoop) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + IRBuilder<> Builder(BB); + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + Type *LCTy = Type::getInt32Ty(Ctx); + Value *StartVal = ConstantInt::get(LCTy, 10); + Value *StopVal = ConstantInt::get(LCTy, 52); + Value *StepVal = ConstantInt::get(LCTy, 2); + auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; + + CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( + Loc, LoopBodyGen, StartVal, StopVal, StepVal, + /*IsSigned=*/false, /*InclusiveStop=*/false); + BasicBlock *Preheader = CLI->getPreheader(); + BasicBlock *ExitBlock = CLI->getExit(); + + Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); + InsertPointTy AllocaIP = Builder.saveIP(); + + InsertPointTy EndIP = OMPBuilder.applyStaticWorkshareLoop( + DL, CLI, AllocaIP, /*NeedsBarrier=*/true); + + SmallVector DoacrossVars; + Type *I64Ty = Type::getInt64Ty(Ctx); + DoacrossVars.emplace_back(ConstantInt::get(I64Ty, 10)); + DoacrossVars.emplace_back(ConstantInt::get(I64Ty, 52)); + DoacrossVars.emplace_back(ConstantInt::get(I64Ty, 2)); + std::int64_t OrderedVal = 1; + OMPBuilder.applyDoacrossLoop(DL, AllocaIP, Preheader, ExitBlock, OrderedVal, + DoacrossVars); + + auto AllocaIter = BB->begin(); + ASSERT_GE(std::distance(BB->begin(), BB->end()), 5); + AllocaIter++; // PLastIter + AllocaIter++; // PLowerBound + AllocaIter++; // PUpperBound + AllocaIter++; // PStride + AllocaInst *DIMS = dyn_cast(&*(AllocaIter)); + EXPECT_NE(DIMS, nullptr); + EXPECT_TRUE(DIMS->getAllocatedType()->isArrayTy()); + EXPECT_EQ(DIMS->getArraySize(), ConstantInt::get(LCTy, 1)); + EXPECT_EQ(DIMS->getAlignment(), 8); + Type *KmpDimTy = DIMS->getAllocatedType()->getArrayElementType(); + EXPECT_TRUE(KmpDimTy->isStructTy()); + EXPECT_EQ(KmpDimTy->getStructNumElements(), 3); + EXPECT_TRUE(KmpDimTy->getStructElementType(0)->isIntegerTy(64)); + EXPECT_TRUE(KmpDimTy->getStructElementType(1)->isIntegerTy(64)); + EXPECT_TRUE(KmpDimTy->getStructElementType(2)->isIntegerTy(64)); + + auto PreheaderIter = Preheader->begin(); + ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 17); + GetElementPtrInst *ADDR = dyn_cast(&*(PreheaderIter++)); + GetElementPtrInst *GEPLB = dyn_cast(&*(PreheaderIter++)); + StoreInst *StoreLB = dyn_cast(&*(PreheaderIter++)); + GetElementPtrInst *GEPUB = dyn_cast(&*(PreheaderIter++)); + StoreInst *StoreUB = dyn_cast(&*(PreheaderIter++)); + GetElementPtrInst *GEPStep = dyn_cast(&*(PreheaderIter++)); + StoreInst *StoreStep = dyn_cast(&*(PreheaderIter++)); + GetElementPtrInst *Base = dyn_cast(&*(PreheaderIter++)); + BitCastInst *BaseI8 = dyn_cast(&*(PreheaderIter++)); + CallInst *InitGTID = dyn_cast(&*(PreheaderIter++)); + CallInst *DoacrossInit = dyn_cast(&*(PreheaderIter++)); + EXPECT_NE(ADDR, nullptr); + EXPECT_NE(GEPLB, nullptr); + EXPECT_NE(StoreLB, nullptr); + EXPECT_NE(GEPUB, nullptr); + EXPECT_NE(StoreUB, nullptr); + EXPECT_NE(GEPStep, nullptr); + EXPECT_NE(StoreStep, nullptr); + EXPECT_NE(Base, nullptr); + EXPECT_NE(BaseI8, nullptr); + EXPECT_NE(InitGTID, nullptr); + EXPECT_NE(DoacrossInit, nullptr); + EXPECT_EQ(ADDR->getNumOperands(), 3); + EXPECT_EQ(ADDR->getOperand(0), DIMS); + EXPECT_EQ(ADDR->getOperand(1), ConstantInt::get(I64Ty, 0)); + EXPECT_EQ(ADDR->getOperand(2), ConstantInt::get(I64Ty, 0)); + EXPECT_EQ(GEPLB->getNumOperands(), 3); + EXPECT_EQ(GEPLB->getOperand(0), ADDR); + EXPECT_EQ(GEPLB->getOperand(1), ConstantInt::get(LCTy, 0)); + EXPECT_EQ(GEPLB->getOperand(2), ConstantInt::get(LCTy, 0)); + EXPECT_EQ(StoreLB->getNumOperands(), 2); + EXPECT_EQ(StoreLB->getOperand(0), DoacrossVars[0]); + EXPECT_EQ(StoreLB->getOperand(1), GEPLB); + EXPECT_EQ(StoreLB->getAlignment(), 8); + EXPECT_EQ(GEPUB->getNumOperands(), 3); + EXPECT_EQ(GEPUB->getOperand(0), ADDR); + EXPECT_EQ(GEPUB->getOperand(1), ConstantInt::get(LCTy, 0)); + EXPECT_EQ(GEPUB->getOperand(2), ConstantInt::get(LCTy, 1)); + EXPECT_EQ(StoreUB->getNumOperands(), 2); + EXPECT_EQ(StoreUB->getOperand(0), DoacrossVars[1]); + EXPECT_EQ(StoreUB->getOperand(1), GEPUB); + EXPECT_EQ(StoreUB->getAlignment(), 8); + EXPECT_EQ(GEPStep->getNumOperands(), 3); + EXPECT_EQ(GEPStep->getOperand(0), ADDR); + EXPECT_EQ(GEPStep->getOperand(1), ConstantInt::get(LCTy, 0)); + EXPECT_EQ(GEPStep->getOperand(2), ConstantInt::get(LCTy, 2)); + EXPECT_EQ(StoreStep->getNumOperands(), 2); + EXPECT_EQ(StoreStep->getOperand(0), DoacrossVars[2]); + EXPECT_EQ(StoreStep->getOperand(1), GEPStep); + EXPECT_EQ(StoreStep->getAlignment(), 8); + EXPECT_EQ(Base->getNumOperands(), 3); + EXPECT_EQ(Base->getOperand(0), DIMS); + EXPECT_EQ(Base->getOperand(1), ConstantInt::get(I64Ty, 0)); + EXPECT_EQ(Base->getOperand(2), ConstantInt::get(I64Ty, 0)); + EXPECT_EQ(BaseI8->getNumOperands(), 1); + EXPECT_EQ(BaseI8->getOperand(0), Base); + EXPECT_EQ(InitGTID->getCalledFunction()->getName(), + "__kmpc_global_thread_num"); + EXPECT_EQ(DoacrossInit->getCalledFunction()->getName(), + "__kmpc_doacross_init"); + EXPECT_EQ(DoacrossInit->getNumOperands(), 5); + EXPECT_EQ(DoacrossInit->getOperand(2), ConstantInt::get(LCTy, OrderedVal)); + EXPECT_EQ(DoacrossInit->getOperand(3), BaseI8); + + auto ExitIter = ExitBlock->begin(); + ASSERT_GE(std::distance(ExitBlock->begin(), ExitBlock->end()), 2); + ExitIter++; // __kmpc_for_static_fini + CallInst *DoacrossFini = dyn_cast(&*(ExitIter++)); + EXPECT_NE(DoacrossFini, nullptr); + EXPECT_EQ(DoacrossFini->getCalledFunction()->getName(), + "__kmpc_doacross_fini"); + + // Add a termination to our block and check that it is internally consistent. + Builder.restoreIP(EndIP); + Builder.CreateRetVoid(); + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); +} + TEST_F(OpenMPIRBuilderTest, MasterDirective) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); @@ -3600,22 +3746,29 @@ EXPECT_EQ(FoundForInit, true); bool FoundForExit = false; - bool FoundBarrier = false; for (Instruction &Inst : *ForExitBB) { if (isa(Inst)) { if (cast(&Inst)->getCalledFunction()->getName() == "__kmpc_for_static_fini") { FoundForExit = true; + break; } + } + } + EXPECT_EQ(FoundForExit, true); + + BasicBlock *ForAfterBB = ForExitBB->getSingleSuccessor(); + EXPECT_NE(ForAfterBB, nullptr); + bool FoundBarrier = false; + for (Instruction &Inst : *ForAfterBB) { + if (isa(Inst)) { if (cast(&Inst)->getCalledFunction()->getName() == "__kmpc_barrier") { FoundBarrier = true; - } - if (FoundForExit && FoundBarrier) break; + } } } - EXPECT_EQ(FoundForExit, true); EXPECT_EQ(FoundBarrier, true); EXPECT_NE(SwitchBB, nullptr); Index: mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td =================================================================== --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -273,6 +273,10 @@ The optional `ordered_val` attribute specifies how many loops are associated with the do loop construct. + The `doacross_vars` are arguments of doacross loop nest, which is formed by + "n" outer loops when the parameter "n" is in ordered clause. The arguments + store the loop bounds info, which is required in doacorss init runtime call. + The optional `order` attribute specifies which order the iterations of the associate loops are executed in. Currently the only option for this attribute is "concurrent". @@ -295,6 +299,7 @@ Confined, [IntMinValue<0>]>:$collapse_val, UnitAttr:$nowait, Confined, [IntMinValue<0>]>:$ordered_val, + Variadic:$doacross_vars, OptionalAttr:$order_val, UnitAttr:$inclusive); @@ -311,8 +316,9 @@ "ValueRange":$linear_step_vars, "ValueRange":$reduction_vars, "StringAttr":$schedule_val, "Value":$schedule_chunk_var, "IntegerAttr":$collapse_val, "UnitAttr":$nowait, - "IntegerAttr":$ordered_val, "StringAttr":$order_val, - "UnitAttr":$inclusive, CArg<"bool", "true">:$buildBody)>, + "IntegerAttr":$ordered_val, "ValueRange":$doacross_vars, + "StringAttr":$order_val, "UnitAttr":$inclusive, + CArg<"bool", "true">:$buildBody)>, OpBuilder<(ins "TypeRange":$resultTypes, "ValueRange":$operands, CArg<"ArrayRef", "{}">:$attributes)> ]; Index: mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp =================================================================== --- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -536,6 +536,7 @@ collapseClause, orderClause, orderedClause, + doacrossVirturalClause, memoryOrderClause, hintClause, COUNT @@ -608,6 +609,11 @@ SmallVector linearTypes; SmallVector linearSteps; + // "doacross" is not one real clause and it is attached with "ordered" clause + // when ordered value is greater than 0. + SmallVector doacrossVars; + SmallVector doacrossTypes; + SmallString<8> schedule; SmallVector> modifiers; Optional scheduleChunkSize; @@ -742,17 +748,18 @@ result.addAttribute("collapse_val", attr); } else if (clauseKeyword == "ordered") { mlir::IntegerAttr attr; - if (checkAllowed(orderedClause)) + auto type = parser.getBuilder().getI64Type(); + if (checkAllowed(orderedClause) || parser.parseLParen() || + parser.parseAttribute(attr, type) || parser.parseRParen()) return failure(); - if (succeeded(parser.parseOptionalLParen())) { - auto type = parser.getBuilder().getI64Type(); - if (parser.parseAttribute(attr, type) || parser.parseRParen()) + result.addAttribute("ordered_val", attr); + if (attr.getValue().getSExtValue() > 0) { + if (checkAllowed(doacrossVirturalClause) || + parser.parseKeyword("doacross") || + parseOperandAndTypeList(parser, doacrossVars, doacrossTypes)) return failure(); - } else { - // Use 0 to represent no ordered parameter was specified - attr = parser.getBuilder().getI64IntegerAttr(0); + clauseSegments[pos[doacrossVirturalClause]] = doacrossVars.size(); } - result.addAttribute("ordered_val", attr); } else if (clauseKeyword == "order") { StringRef order; if (checkAllowed(orderClause) || parser.parseLParen() || @@ -880,6 +887,13 @@ } } + // Add ordered doacross parameters + if (done[doacrossVirturalClause] && + clauseSegments[pos[doacrossVirturalClause]] && + failed(parser.resolveOperands(doacrossVars, doacrossTypes, + doacrossVars[0].location, result.operands))) + return failure(); + segments.insert(segments.end(), clauseSegments.begin(), clauseSegments.end()); return success(); @@ -1040,9 +1054,9 @@ return failure(); SmallVector clauses = { - privateClause, firstprivateClause, lastprivateClause, linearClause, - reductionClause, collapseClause, orderClause, orderedClause, - nowaitClause, scheduleClause}; + privateClause, firstprivateClause, lastprivateClause, linearClause, + reductionClause, collapseClause, orderClause, orderedClause, + nowaitClause, scheduleClause, doacrossVirturalClause}; SmallVector segments{numIVs, numIVs, numIVs}; if (failed(parseClauses(parser, result, clauses, segments))) return failure(); @@ -1085,8 +1099,11 @@ if (op.nowait()) p << "nowait "; - if (auto ordered = op.ordered_val()) + if (auto ordered = op.ordered_val()) { p << "ordered(" << ordered << ") "; + if (ordered.getValue() > 0) + printDataVars(p, op.doacross_vars(), "doacross"); + } if (auto order = op.order_val()) p << "order(" << order << ") "; @@ -1190,7 +1207,8 @@ /*linear_vars=*/ValueRange(), /*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(), /*schedule_val=*/nullptr, /*schedule_chunk_var=*/nullptr, /*collapse_val=*/nullptr, - /*nowait=*/nullptr, /*ordered_val=*/nullptr, /*order_val=*/nullptr, + /*nowait=*/nullptr, /*ordered_val=*/nullptr, + /*doacross_vars=*/ValueRange(), /*order_val=*/nullptr, /*inclusive=*/nullptr, /*buildBody=*/false); state.addAttributes(attributes); } @@ -1212,8 +1230,8 @@ ValueRange linearStepVars, ValueRange reductionVars, StringAttr scheduleVal, Value scheduleChunkVar, IntegerAttr collapseVal, UnitAttr nowait, - IntegerAttr orderedVal, StringAttr orderVal, - UnitAttr inclusive, bool buildBody) { + IntegerAttr orderedVal, ValueRange doacrossVars, + StringAttr orderVal, UnitAttr inclusive, bool buildBody) { result.addOperands(lowerBounds); result.addOperands(upperBounds); result.addOperands(steps); @@ -1223,6 +1241,7 @@ result.addOperands(linearStepVars); if (scheduleChunkVar) result.addOperands(scheduleChunkVar); + result.addOperands(doacrossVars); if (scheduleVal) result.addAttribute("schedule_val", scheduleVal); @@ -1248,7 +1267,8 @@ static_cast(linearVars.size()), static_cast(linearStepVars.size()), static_cast(reductionVars.size()), - static_cast(scheduleChunkVar != nullptr ? 1 : 0)})); + static_cast(scheduleChunkVar != nullptr ? 1 : 0), + static_cast(doacrossVars.size())})); Region *bodyRegion = result.addRegion(); if (buildBody) { Index: mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp =================================================================== --- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -749,9 +749,9 @@ return failure(); } - // Collapse loops. Store the insertion point because LoopInfos may get + // Collapse loops. Store the basic block because LoopInfos may get // invalidated. - llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); + llvm::BasicBlock *afterBB = loopInfos.front()->getAfter(); llvm::CanonicalLoopInfo *loopInfo = ompBuilder->collapseLoops(diLoc, loopInfos, {}); @@ -759,6 +759,12 @@ bool isSimd = loop.simd_modifier(); + // Store the BBs since loopInfo get invalidated after apply*WorkshareLoop. + llvm::BasicBlock *preHeaderBB = loopInfo->getPreheader(); + llvm::BasicBlock *exitBB = loopInfo->getExit(); + + std::int64_t orderedVal = + loop.ordered_val().hasValue() ? loop.ordered_val().getValue() : -1; if (schedule == omp::ClauseScheduleKind::Static) { ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, !loop.nowait(), chunk); @@ -803,15 +809,23 @@ break; } } - afterIP = ompBuilder->applyDynamicWorkshareLoop( - ompLoc.DL, loopInfo, allocaIP, schedType, !loop.nowait(), chunk); + ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, + schedType, !loop.nowait(), chunk); + } + + if (orderedVal > 0) { + SmallVector doacrossVars = + moduleTranslation.lookupValues(loop.doacross_vars()); + ompBuilder->applyDoacrossLoop(ompLoc.DL, allocaIP, preHeaderBB, exitBB, + orderedVal, doacrossVars); } // Continue building IR after the loop. Note that the LoopInfo returned by // `collapseLoops` points inside the outermost loop and is intended for - // potential further loop transformations. Use the insertion point stored - // before collapsing loops instead. - builder.restoreIP(afterIP); + // potential further loop transformations. Use the after basic block stored + // before collapsing loops instead and insert the created instructions + // appended to the after basic block. + builder.SetInsertPoint(afterBB, afterBB->end()); // Process the reductions if required. if (numReductions == 0) Index: mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir =================================================================== --- mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -57,7 +57,7 @@ // CHECK: "test.payload"(%[[CAST_ARG6]], %[[CAST_ARG7]]) : (index, index) -> () "test.payload"(%arg6, %arg7) : (index, index) -> () omp.yield - }) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 0, 0, 0, 0]> : vector<10xi32>} : (index, index, index, index, index, index) -> () + }) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0]> : vector<11xi32>} : (index, index, index, index, index, index) -> () omp.terminator } return Index: mlir/test/Dialect/OpenMP/invalid.mlir =================================================================== --- mlir/test/Dialect/OpenMP/invalid.mlir +++ mlir/test/Dialect/OpenMP/invalid.mlir @@ -120,7 +120,7 @@ func @ordered_not_allowed() { // expected-error@+1 {{ordered is not a valid clause for the omp.parallel operation}} - omp.parallel ordered(2) {} + omp.parallel ordered(0) {} } // ----- @@ -448,8 +448,8 @@ // ----- -func @omp_ordered1(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () { - omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(1) { +func @omp_ordered1(%arg1 : i32, %arg2 : i32, %arg3 : i32, %doacross_var1 : i64, %doacross_var2 : i64, %doacross_var3 : i64) -> () { + omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(1) doacross(%doacross_var1 : i64, %doacross_var2 : i64, %doacross_var3 : i64) { // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}} omp.ordered_region { omp.terminator @@ -493,8 +493,8 @@ } // ----- -func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64) -> () { - omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(1) { +func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64, %doacross_var1 : i64, %doacross_var2 : i64, %doacross_var3 : i64) -> () { + omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(1) doacross(%doacross_var1 : i64, %doacross_var2 : i64, %doacross_var3 : i64) { // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}} omp.ordered depend_type("dependsource") depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64} @@ -794,7 +794,7 @@ func @omp_sections() { // expected-error @below {{ordered is not a valid clause for the omp.sections operation}} - omp.sections ordered(2) { + omp.sections ordered(0) { omp.terminator } return Index: mlir/test/Dialect/OpenMP/ops.mlir =================================================================== --- mlir/test/Dialect/OpenMP/ops.mlir +++ mlir/test/Dialect/OpenMP/ops.mlir @@ -147,52 +147,51 @@ } // CHECK-LABEL: omp_wsloop -func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref, %linear_var : i32, %chunk_var : i32) -> () { +func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref, %linear_var : i32, %chunk_var : i32, %doacross_var1 : i64, %doacross_var2 : i64, %doacross_var3 : i64, %doacross_var4 : i64, %doacross_var5 : i64, %doacross_var6 : i64) -> () { - // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref, %{{.*}} : memref) collapse(2) ordered(1) - "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var) ({ + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref, %{{.*}} : memref) collapse(2) ordered(1) doacross(%{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64) + "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %doacross_var1, %doacross_var2, %doacross_var3) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = dense<[1,1,1,2,0,0,0,0,0,0]> : vector<10xi32>, collapse_val = 2, ordered_val = 1} : - (index, index, index, memref, memref) -> () + }) {operand_segment_sizes = dense<[1,1,1,2,0,0,0,0,0,0,3]> : vector<11xi32>, collapse_val = 2, ordered_val = 1} : + (index, index, index, memref, memref, i64, i64, i64) -> () // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref) schedule(static) "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = dense<[1,1,1,0,0,0,1,1,0,0]> : vector<10xi32>, schedule_val = "Static"} : + }) {operand_segment_sizes = dense<[1,1,1,0,0,0,1,1,0,0,0]> : vector<11xi32>, schedule_val = "Static"} : (index, index, index, memref, i32) -> () // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref, %{{.*}} = %{{.*}} : memref) schedule(static) "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %linear_var, %linear_var) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = dense<[1,1,1,0,0,0,2,2,0,0]> : vector<10xi32>, schedule_val = "Static"} : + }) {operand_segment_sizes = dense<[1,1,1,0,0,0,2,2,0,0,0]> : vector<11xi32>, schedule_val = "Static"} : (index, index, index, memref, memref, i32, i32) -> () - // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}) collapse(3) ordered(2) - "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %data_var, %data_var, %linear_var, %chunk_var) ({ + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}) collapse(3) ordered(2) doacross(%{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64) + "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %data_var, %data_var, %linear_var, %chunk_var, %doacross_var1, %doacross_var2, %doacross_var3, %doacross_var4, %doacross_var5, %doacross_var6) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = dense<[1,1,1,1,1,1,1,1,0,1]> : vector<10xi32>, schedule_val = "Dynamic", collapse_val = 3, ordered_val = 2} : - (index, index, index, memref, memref, memref, memref, i32, i32) -> () + }) {operand_segment_sizes = dense<[1,1,1,1,1,1,1,1,0,1,6]> : vector<11xi32>, schedule_val = "Dynamic", collapse_val = 3, ordered_val = 2} : + (index, index, index, memref, memref, memref, memref, i32, i32, i64, i64, i64, i64, i64, i64) -> () // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) schedule(auto) nowait "omp.wsloop" (%lb, %ub, %step, %data_var) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = dense<[1,1,1,1,0,0,0,0,0,0]> : vector<10xi32>, nowait, schedule_val = "Auto"} : + }) {operand_segment_sizes = dense<[1,1,1,1,0,0,0,0,0,0,0]> : vector<11xi32>, nowait, schedule_val = "Auto"} : (index, index, index, memref) -> () return } // CHECK-LABEL: omp_wsloop_pretty -func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, - %data_var : memref, %linear_var : i32, %chunk_var : i32) -> () { +func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var : memref, %linear_var : i32, %chunk_var : i32, %doacross_var1 : i64, %doacross_var2 : i64, %doacross_var3 : i64, %doacross_var4 : i64, %doacross_var5 : i64, %doacross_var6 : i64) -> () { // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) - omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) private(%data_var : memref) collapse(2) ordered(2) { + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) private(%data_var : memref) collapse(2) { omp.yield } @@ -201,22 +200,22 @@ omp.yield } - // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(static = %{{.*}}) collapse(3) ordered(2) - omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(static = %{{.*}}) collapse(3) ordered(2) doacross(%{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) doacross(%doacross_var1 : i64, %doacross_var2 : i64, %doacross_var3 : i64, %doacross_var4 : i64, %doacross_var5 : i64, %doacross_var6 : i64) private(%data_var : memref) firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) schedule(static = %chunk_var) collapse(3) { omp.yield } - // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}, nonmonotonic) collapse(3) ordered(2) - omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}, nonmonotonic) collapse(3) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) private(%data_var : memref) firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) schedule(dynamic = %chunk_var, nonmonotonic) collapse(3) { omp.yield } - // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}, monotonic) collapse(3) ordered(2) - omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}, monotonic) collapse(3) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) private(%data_var : memref) firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) schedule(dynamic = %chunk_var, monotonic) collapse(3) { omp.yield @@ -450,8 +449,7 @@ return } -func @omp_ordered(%arg1 : i32, %arg2 : i32, %arg3 : i32, - %vec0 : i64, %vec1 : i64, %vec2 : i64, %vec3 : i64) -> () { +func @omp_ordered(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64, %vec2 : i64, %vec3 : i64, %doacross_var1 : i64, %doacross_var2 : i64, %doacross_var3 : i64, %doacross_var4 : i64, %doacross_var5 : i64, %doacross_var6 : i64) -> () { // CHECK: omp.ordered_region omp.ordered_region { // CHECK: omp.terminator @@ -465,7 +463,7 @@ omp.yield } - omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(1) { + omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(1) doacross(%doacross_var1 : i64, %doacross_var2 : i64, %doacross_var3 : i64) { // Only one DEPEND(SINK: vec) clause // CHECK: omp.ordered depend_type("dependsink") depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64} omp.ordered depend_type("dependsink") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} @@ -476,7 +474,7 @@ omp.yield } - omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(2) { + omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(2) doacross(%doacross_var1 : i64, %doacross_var2 : i64, %doacross_var3 : i64, %doacross_var4 : i64, %doacross_var5 : i64, %doacross_var6 : i64) { // Multiple DEPEND(SINK: vec) clauses // CHECK: omp.ordered depend_type("dependsink") depend_vec(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, i64, i64) {num_loops_val = 2 : i64} omp.ordered depend_type("dependsink") depend_vec(%vec0, %vec1, %vec2, %vec3 : i64, i64, i64, i64) {num_loops_val = 2 : i64} Index: mlir/test/Target/LLVMIR/openmp-llvm.mlir =================================================================== --- mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -379,7 +379,7 @@ llvm.store %3, %4 : !llvm.ptr omp.yield // CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @[[$wsloop_loc_struct]], - }) {operand_segment_sizes = dense<[1, 1, 1, 0, 0, 0, 0, 0, 0, 0]> : vector<10xi32>} : (i64, i64, i64) -> () + }) {operand_segment_sizes = dense<[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]> : vector<11xi32>} : (i64, i64, i64) -> () omp.terminator } llvm.return @@ -399,7 +399,7 @@ %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr llvm.store %3, %4 : !llvm.ptr omp.yield - }) {operand_segment_sizes = dense<[1, 1, 1, 0, 0, 0, 0, 0, 0, 0]> : vector<10xi32>} : (i64, i64, i64) -> () + }) {operand_segment_sizes = dense<[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]> : vector<11xi32>} : (i64, i64, i64) -> () llvm.return } @@ -417,7 +417,7 @@ %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr llvm.store %3, %4 : !llvm.ptr omp.yield - }) {inclusive, operand_segment_sizes = dense<[1, 1, 1, 0, 0, 0, 0, 0, 0, 0]> : vector<10xi32>} : (i64, i64, i64) -> () + }) {inclusive, operand_segment_sizes = dense<[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]> : vector<11xi32>} : (i64, i64, i64) -> () llvm.return } @@ -631,9 +631,103 @@ // ----- -// CHECK-LABEL: @omp_ordered -llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64, - %arg4: i64, %arg5: i64, %arg6: i64) -> () { +// Check that the loop bounds are emitted in the correct location in case of +// collapse for dynamic schedule. This only checks the overall shape of the IR, +// detailed checking is done by the OpenMPIRBuilder. + +// CHECK-LABEL: @collapse_wsloop_dynamic +// CHECK: i32* noalias %[[TIDADDR:[0-9A-Za-z.]*]] +// CHECK: load i32, i32* %[[TIDADDR]] +// CHECK: store +// CHECK: load +// CHECK: %[[LB0:.*]] = load i32 +// CHECK: %[[UB0:.*]] = load i32 +// CHECK: %[[STEP0:.*]] = load i32 +// CHECK: %[[LB1:.*]] = load i32 +// CHECK: %[[UB1:.*]] = load i32 +// CHECK: %[[STEP1:.*]] = load i32 +// CHECK: %[[LB2:.*]] = load i32 +// CHECK: %[[UB2:.*]] = load i32 +// CHECK: %[[STEP2:.*]] = load i32 + +llvm.func @collapse_wsloop_dynamic( + %0: i32, %1: i32, %2: i32, + %3: i32, %4: i32, %5: i32, + %6: i32, %7: i32, %8: i32, + %20: !llvm.ptr) { + omp.parallel { + // CHECK: icmp slt i32 %[[LB0]], 0 + // CHECK-COUNT-4: select + // CHECK: %[[TRIPCOUNT0:.*]] = select + // CHECK: br label %[[PREHEADER:.*]] + // + // CHECK: [[PREHEADER]]: + // CHECK: icmp slt i32 %[[LB1]], 0 + // CHECK-COUNT-4: select + // CHECK: %[[TRIPCOUNT1:.*]] = select + // CHECK: icmp slt i32 %[[LB2]], 0 + // CHECK-COUNT-4: select + // CHECK: %[[TRIPCOUNT2:.*]] = select + // CHECK: %[[PROD:.*]] = mul nuw i32 %[[TRIPCOUNT0]], %[[TRIPCOUNT1]] + // CHECK: %[[TOTAL:.*]] = mul nuw i32 %[[PROD]], %[[TRIPCOUNT2]] + // CHECK: br label %[[COLLAPSED_PREHEADER:.*]] + // + // CHECK: [[COLLAPSED_PREHEADER]]: + // CHECK: store i32 1, i32* + // CHECK: store i32 %[[TOTAL]], i32* + // CHECK: call void @__kmpc_dispatch_init_4u + omp.wsloop (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) schedule(dynamic) { + %31 = llvm.load %20 : !llvm.ptr + %32 = llvm.add %31, %arg0 : i32 + %33 = llvm.add %32, %arg1 : i32 + %34 = llvm.add %33, %arg2 : i32 + llvm.store %34, %20 : !llvm.ptr + omp.yield + } + omp.terminator + } + llvm.return +} + +// ----- + +// CHECK-LABEL: @omp_ordered_clause_para +llvm.func @omp_ordered_clause_para(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64, + %arg4: i64, %arg5: i64, %arg6: i64, %arg7 : i64, %arg8 : i64) -> () { + // CHECK: [[DIMS:%.*]] = alloca [2 x [[KMPDIM:%.*]]], align 8 + omp.wsloop (%arg) : i32 = (%arg0) to (%arg1) step (%arg2) ordered(2) doacross(%arg3 : i64, %arg4 : i64, %arg5 : i64, %arg6 : i64, %arg7 : i64, %arg8 : i64) { + // CHECK: omp_loop.preheader: + // CHECK: [[ADDR0:%.*]] = getelementptr inbounds [2 x [[KMPDIM]]], [2 x [[KMPDIM]]]* [[DIMS]], i64 0, i64 0 + // CHECK: [[LB0:%.*]] = getelementptr inbounds [[KMPDIM]], [[KMPDIM]]* [[ADDR0]], i32 0, i32 0 + // CHECK: store i64 [[ARG3:%.*]], i64* [[LB0]], align 8 + // CHECK: [[UB0:%.*]] = getelementptr inbounds [[KMPDIM]], [[KMPDIM]]* [[ADDR0]], i32 0, i32 1 + // CHECK: store i64 [[ARG4:%.*]], i64* [[UB0]], align 8 + // CHECK: [[STEP0:%.*]] = getelementptr inbounds [[KMPDIM]], [[KMPDIM]]* [[ADDR0]], i32 0, i32 2 + // CHECK: store i64 [[ARG5:%.*]], i64* [[STEP0]], align 8 + // CHECK: [[ADDR1:%.*]] = getelementptr inbounds [2 x [[KMPDIM]]], [2 x [[KMPDIM]]]* [[DIMS]], i64 0, i64 1 + // CHECK: [[LB1:%.*]] = getelementptr inbounds [[KMPDIM]], [[KMPDIM]]* [[ADDR1]], i32 0, i32 0 + // CHECK: store i64 [[ARG6:%.*]], i64* [[LB1:%.*]], align 8 + // CHECK: [[UB1:%.*]] = getelementptr inbounds [[KMPDIM]], [[KMPDIM]]* [[ADDR1]], i32 0, i32 1 + // CHECK: store i64 [[ARG7:%.*]], i64* [[UB1:%.*]], align 8 + // CHECK: [[STEP1:%.*]] = getelementptr inbounds [[KMPDIM]], [[KMPDIM]]* [[ADDR1]], i32 0, i32 2 + // CHECK: store i64 [[ARG8:%.*]], i64* [[STEP1:%.*]], align 8 + // CHECK: [[BASE:%.*]] = getelementptr inbounds [2 x [[KMPDIM]]], [2 x [[KMPDIM]]]* %dims, i64 0, i64 0 + // CHECK: [[BASEI8:%.*]] = bitcast [[KMPDIM]]* [[BASE]] to i8* + // CHECK: [[OMP_THREAD:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) + // CHECK: call void @__kmpc_doacross_init(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]], i32 2, i8* [[BASEI8]]) + // CHECK: omp_loop.exit: + // CHECK: call void @__kmpc_doacross_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]]) + omp.yield + } + + llvm.return +} +// ----- + +// CHECK-LABEL: @omp_ordered_construct +llvm.func @omp_ordered_construct(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64, + %arg4: i64, %arg5: i64, %arg6: i64, %arg8 : i64, %arg9 : i64, %arg10 : i64, + %arg11 : i64, %arg12 : i64, %arg13 : i64) -> () { // CHECK: [[ADDR9:%.*]] = alloca [2 x i64], align 8 // CHECK: [[ADDR7:%.*]] = alloca [2 x i64], align 8 // CHECK: [[ADDR5:%.*]] = alloca [2 x i64], align 8 @@ -657,7 +751,7 @@ omp.yield } - omp.wsloop (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) ordered(1) { + omp.wsloop (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) ordered(1) doacross(%arg8 : i64, %arg9 : i64, %arg10 : i64) { // CHECK: [[TMP:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR]], i64 0, i64 0 // CHECK: store i64 [[ARG0:%.*]], i64* [[TMP]], align 4 // CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR]], i64 0, i64 0 @@ -675,7 +769,7 @@ omp.yield } - omp.wsloop (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) ordered(2) { + omp.wsloop (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) ordered(2) doacross(%arg8 : i64, %arg9 : i64, %arg10 : i64, %arg11 : i64, %arg12 : i64, %arg13 : i64) { // CHECK: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 0 // CHECK: store i64 [[ARG0]], i64* [[TMP5]], align 4 // CHECK: [[TMP6:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 1 @@ -779,10 +873,10 @@ // CHECK: [[EXIT]]: // CHECK: call void @__kmpc_for_static_fini({{.*}}) - // CHECK: call void @__kmpc_barrier({{.*}}) // CHECK: br label %[[AFTER:.*]] // CHECK: [[AFTER]]: + // CHECK: call void @__kmpc_barrier({{.*}}) // CHECK: br label %[[END:.*]] // CHECK: [[END]]: