Index: clang/lib/Sema/SemaOpenMP.cpp =================================================================== --- clang/lib/Sema/SemaOpenMP.cpp +++ clang/lib/Sema/SemaOpenMP.cpp @@ -15401,13 +15401,16 @@ break; } [[fallthrough]]; - case OMPD_target_teams_loop: case OMPD_target_teams_distribute_parallel_for: // If this clause applies to the nested 'parallel' region, capture within // the 'teams' region, otherwise do not capture. if (NameModifier == OMPD_unknown || NameModifier == OMPD_parallel) CaptureRegion = OMPD_teams; break; + case OMPD_target_teams_loop: + // This is equivalent to 'target teams distribute parallel for' with + // 'if(target:val)'. For this case, there is no capture region. + break; case OMPD_teams_distribute_parallel_for_simd: if (OpenMPVersion >= 50 && (NameModifier == OMPD_unknown || NameModifier == OMPD_simd)) { Index: clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp +++ clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp @@ -428,13 +428,8 @@ // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 // CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4 @@ -472,69 +467,52 @@ // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83() #[[ATTR2]] // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr @Arg, align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1 -// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP17:%.*]] = select i1 [[TOBOOL1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[TMP20]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 2 // CHECK1-NEXT: store ptr null, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK1-NEXT: [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 2, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP37]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [[TMP26]], ptr [[TMP38]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP39]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]]) -// CHECK1-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 -// CHECK1-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] -// CHECK1: omp_offload.failed7: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP17]]) #[[ATTR2]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]] -// CHECK1: omp_offload.cont8: +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [[TMP18]], ptr [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP31]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP17]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS3]]) +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED4:%.*]], label [[OMP_OFFLOAD_CONT5:%.*]] +// CHECK1: omp_offload.failed4: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90() #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT5]] +// CHECK1: omp_offload.cont5: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP17]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90() #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr @Arg, align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP42]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP34]]) // CHECK1-NEXT: ret i32 [[CALL]] // // @@ -820,26 +798,17 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 -// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined, i64 [[TMP1]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -850,7 +819,6 @@ // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -882,8 +850,8 @@ // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])