diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -517,6 +517,10 @@ F->setDoesNotThrow(); F->setDoesNotRecurse(); + // Always inline the outlined function if optimizations are enabled. + if (CGM.getCodeGenOpts().OptimizationLevel != 0) + F->addFnAttr(llvm::Attribute::AlwaysInline); + // Generate the function. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), @@ -5303,6 +5307,8 @@ CGF.CapturedStmtInfo = &CapStmtInfo; llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); Fn->setDoesNotRecurse(); + if (CGM.getCodeGenOpts().OptimizationLevel != 0) + Fn->addFnAttr(llvm::Attribute::AlwaysInline); return Fn; } diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -11,9 +11,6 @@ #include // Verify we do not add tbaa metadata to type punned memory operations: - - - template void complex_reduction() { #pragma omp target teams distribute @@ -33,7 +30,7 @@ complex_reduction(); } #endif -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l19 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 @@ -126,7 +123,7 @@ // CHECK1-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR1]] // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR8:[0-9]+]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR9:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -191,7 +188,7 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR8]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR9]] // CHECK1-NEXT: ret void // // @@ -281,7 +278,7 @@ // CHECK1-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR1]] // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR8]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR9]] // CHECK1-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR1]] // CHECK1-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -343,8 +340,8 @@ // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float // CHECK1-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR8]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR8]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR9]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR9]] // CHECK1-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR1]] // CHECK1-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* @@ -385,7 +382,7 @@ // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR8]] +// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR9]] // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: @@ -423,13 +420,13 @@ // CHECK1-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR8]] +// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR9]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK1-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR8]] +// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR9]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -438,7 +435,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -515,7 +512,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -576,7 +573,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -600,7 +597,7 @@ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l19 +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16 // CHECK1-SAME: () #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 @@ -693,7 +690,7 @@ // CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR1]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR8]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR9]] // CHECK1-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR1]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -758,7 +755,7 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR8]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR9]] // CHECK1-NEXT: ret void // // @@ -848,7 +845,7 @@ // CHECK1-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR1]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR8]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR9]] // CHECK1-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR1]] // CHECK1-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -910,8 +907,8 @@ // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK1-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR8]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR8]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR9]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR9]] // CHECK1-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR1]] // CHECK1-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* @@ -952,7 +949,7 @@ // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR8]] +// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR9]] // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: @@ -990,13 +987,13 @@ // CHECK1-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR8]] +// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR9]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK1-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR8]] +// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR9]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] @@ -1005,7 +1002,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -1096,7 +1093,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1157,7 +1154,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1267,7 +1264,7 @@ // CHECK1-NEXT: ret double [[TMP0]] // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l19 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16 // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 @@ -1360,7 +1357,7 @@ // CHECK2-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR1]] // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR8:[0-9]+]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR9:[0-9]+]] // CHECK2-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -1425,7 +1422,7 @@ // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK2-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR8]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -1515,7 +1512,7 @@ // CHECK2-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR1]] // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR8]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR9]] // CHECK2-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR1]] // CHECK2-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -1577,8 +1574,8 @@ // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float // CHECK2-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR8]] -// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR8]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR9]] +// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR9]] // CHECK2-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR1]] // CHECK2-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* @@ -1619,7 +1616,7 @@ // CHECK2-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK2-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR8]] +// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR9]] // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: @@ -1657,13 +1654,13 @@ // CHECK2-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR8]] +// CHECK2-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR9]] // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK2-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR8]] +// CHECK2-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR9]] // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK2-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -1672,7 +1669,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -1749,7 +1746,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1810,7 +1807,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1834,7 +1831,7 @@ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l19 +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16 // CHECK2-SAME: () #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 @@ -1927,7 +1924,7 @@ // CHECK2-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR1]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR8]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR9]] // CHECK2-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR1]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -1992,7 +1989,7 @@ // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK2-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR8]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -2082,7 +2079,7 @@ // CHECK2-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR1]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR8]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR9]] // CHECK2-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR1]] // CHECK2-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -2144,8 +2141,8 @@ // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK2-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR8]] -// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR8]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR9]] +// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR9]] // CHECK2-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR1]] // CHECK2-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* @@ -2186,7 +2183,7 @@ // CHECK2-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK2-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR8]] +// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR9]] // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: @@ -2224,13 +2221,13 @@ // CHECK2-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR8]] +// CHECK2-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR9]] // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK2-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR8]] +// CHECK2-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR9]] // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK2-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] @@ -2239,7 +2236,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -2330,7 +2327,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -2391,7 +2388,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -2501,7 +2498,7 @@ // CHECK2-NEXT: ret double [[TMP0]] // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l19 +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16 // CHECK3-SAME: () #[[ATTR0:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 @@ -2594,7 +2591,7 @@ // CHECK3-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR1]] // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR8:[0-9]+]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR9:[0-9]+]] // CHECK3-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -2659,7 +2656,7 @@ // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK3-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR8]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR9]] // CHECK3-NEXT: ret void // // @@ -2749,7 +2746,7 @@ // CHECK3-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR1]] // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR8]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR9]] // CHECK3-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR1]] // CHECK3-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* @@ -2811,8 +2808,8 @@ // CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float // CHECK3-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR8]] -// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR8]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR9]] +// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR9]] // CHECK3-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR1]] // CHECK3-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* @@ -2853,7 +2850,7 @@ // CHECK3-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK3-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR8]] +// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR9]] // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: @@ -2891,13 +2888,13 @@ // CHECK3-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR8]] +// CHECK3-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR9]] // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK3-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK3-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR8]] +// CHECK3-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR9]] // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK3-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -2906,7 +2903,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -2983,7 +2980,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -3044,7 +3041,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -3068,7 +3065,7 @@ // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l19 +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16 // CHECK3-SAME: () #[[ATTR0]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 @@ -3161,7 +3158,7 @@ // CHECK3-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR1]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR8]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR9]] // CHECK3-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR1]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -3226,7 +3223,7 @@ // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK3-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR8]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR9]] // CHECK3-NEXT: ret void // // @@ -3316,7 +3313,7 @@ // CHECK3-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR1]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR8]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR9]] // CHECK3-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR1]] // CHECK3-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* @@ -3378,8 +3375,8 @@ // CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK3-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR8]] -// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR8]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR9]] +// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR9]] // CHECK3-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR1]] // CHECK3-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* @@ -3420,7 +3417,7 @@ // CHECK3-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK3-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR8]] +// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR9]] // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: @@ -3458,13 +3455,13 @@ // CHECK3-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR8]] +// CHECK3-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR9]] // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK3-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR8]] +// CHECK3-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR9]] // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK3-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] @@ -3473,7 +3470,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -3564,7 +3561,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -3625,7 +3622,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4