diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5838,25 +5838,38 @@ // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' // expression is simple and atomic is allowed for the given type for the // target platform. - if (BO == BO_Comma || !Update.isScalar() || - !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || + if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() || (!isa(Update.getScalarVal()) && (Update.getScalarVal()->getType() != X.getAddress(CGF).getElementType())) || - !X.getAddress(CGF).getElementType()->isIntegerTy() || !Context.getTargetInfo().hasBuiltinAtomic( Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) return std::make_pair(false, RValue::get(nullptr)); + auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) { + if (T->isIntegerTy()) + return true; + + if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub)) + return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T)); + + return false; + }; + + if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) || + !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO)) + return std::make_pair(false, RValue::get(nullptr)); + + bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy(); llvm::AtomicRMWInst::BinOp RMWOp; switch (BO) { case BO_Add: - RMWOp = llvm::AtomicRMWInst::Add; + RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd; break; case BO_Sub: if (!IsXLHSInRHSPart) return std::make_pair(false, RValue::get(nullptr)); - RMWOp = llvm::AtomicRMWInst::Sub; + RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub; break; case BO_And: RMWOp = llvm::AtomicRMWInst::And; @@ -5914,9 +5927,13 @@ } llvm::Value *UpdateVal = Update.getScalarVal(); if (auto *IC = dyn_cast(UpdateVal)) { - UpdateVal = CGF.Builder.CreateIntCast( - IC, X.getAddress(CGF).getElementType(), - X.getType()->hasSignedIntegerRepresentation()); + if (IsInteger) + UpdateVal = CGF.Builder.CreateIntCast( + IC, X.getAddress(CGF).getElementType(), + X.getType()->hasSignedIntegerRepresentation()); + else + UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC, + X.getAddress(CGF).getElementType()); } llvm::Value *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); diff --git a/clang/test/OpenMP/atomic_capture_codegen.cpp b/clang/test/OpenMP/atomic_capture_codegen.cpp --- a/clang/test/OpenMP/atomic_capture_codegen.cpp +++ b/clang/test/OpenMP/atomic_capture_codegen.cpp @@ -216,20 +216,8 @@ #pragma omp atomic capture llv = ullx |= ullv; // CHECK: [[EXPR:%.+]] = load float, float* @{{.+}}, -// CHECK: [[X:%.+]] = load atomic i32, i32* bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic, align 4 -// CHECK: br label %[[CONT:.+]] -// CHECK: [[CONT]] -// CHECK: [[EXPECTED:%.+]] = phi i32 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ] -// CHECK: [[TEMP_I:%.+]] = bitcast float* [[TEMP:%.+]] to i32* -// CHECK: [[OLD:%.+]] = bitcast i32 [[EXPECTED]] to float +// CHECK: [[OLD:%.+]] = atomicrmw fadd float* @{{.+}}, float [[EXPR]] monotonic, align 4 // CHECK: [[ADD:%.+]] = fadd float [[OLD]], [[EXPR]] -// CHECK: store float [[ADD]], float* [[TEMP]], -// CHECK: [[DESIRED:%.+]] = load i32, i32* [[TEMP_I]], -// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4 -// CHECK: [[OLD_X:%.+]] = extractvalue { i32, i1 } [[RES]], 0 -// CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1 -// CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]] -// CHECK: [[EXIT]] // CHECK: [[CAST:%.+]] = fpext float [[ADD]] to double // CHECK: store double [[CAST]], double* @{{.+}}, #pragma omp atomic capture diff --git a/clang/test/OpenMP/atomic_update_codegen.cpp b/clang/test/OpenMP/atomic_update_codegen.cpp --- a/clang/test/OpenMP/atomic_update_codegen.cpp +++ b/clang/test/OpenMP/atomic_update_codegen.cpp @@ -82,7 +82,7 @@ register int rix __asm__("esp"); int main(void) { -// CHECK-NOT: atomicrmw +// CHECK: atomicrmw fadd double* @{{.+}}, double 1.000000e+00 monotonic, align 8 #pragma omp atomic ++dv; // CHECK: atomicrmw add i8* @{{.+}}, i8 1 monotonic, align 1 @@ -192,20 +192,7 @@ #pragma omp atomic ullx |= ullv; // CHECK: [[EXPR:%.+]] = load float, float* @{{.+}}, -// CHECK: [[OLD:%.+]] = load atomic i32, i32* bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic, align 4 -// CHECK: br label %[[CONT:.+]] -// CHECK: [[CONT]] -// CHECK: [[EXPECTED:%.+]] = phi i32 [ [[OLD]], %{{.+}} ], [ [[PREV:%.+]], %[[CONT]] ] -// CHECK: [[BITCAST:%.+]] = bitcast float* [[TEMP:%.+]] to i32* -// CHECK: [[OLD:%.+]] = bitcast i32 [[EXPECTED]] to float -// CHECK: [[ADD:%.+]] = fadd float [[OLD]], [[EXPR]] -// CHECK: store float [[ADD]], float* [[TEMP]], -// CHECK: [[DESIRED:%.+]] = load i32, i32* [[BITCAST]], -// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4 -// CHECK: [[PREV:%.+]] = extractvalue { i32, i1 } [[RES]], 0 -// CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1 -// CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]] -// CHECK: [[EXIT]] +// CHECK: atomicrmw fadd float* @{{.+}}, float [[EXPR]] monotonic, align 4 #pragma omp atomic update fx = fx + fv; // CHECK: [[EXPR:%.+]] = load double, double* @{{.+}}, diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp --- a/clang/test/OpenMP/for_reduction_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_codegen.cpp @@ -698,11 +698,9 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP23:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[_TMP22:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[REF_TMP25:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[ATOMIC_TEMP35:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[_TMP36:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[_TMP31:%.*]] = alloca float, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8 @@ -843,77 +841,59 @@ // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: // CHECK1-NEXT: [[TMP44:%.*]] = load float, float* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP0]] to i32* -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP45]] monotonic, align 4 -// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP46:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP54:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* -// CHECK1-NEXT: [[TMP48:%.*]] = bitcast i32 [[TMP46]] to float -// CHECK1-NEXT: store float [[TMP48]], float* [[_TMP22]], align 4 -// CHECK1-NEXT: [[TMP49:%.*]] = load float, float* [[_TMP22]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = load float, float* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[ADD23:%.*]] = fadd float [[TMP49]], [[TMP50]] -// CHECK1-NEXT: store float [[ADD23]], float* [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[TMP47]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP0]] to i32* -// CHECK1-NEXT: [[TMP53:%.*]] = cmpxchg i32* [[TMP52]], i32 [[TMP46]], i32 [[TMP51]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP54]] = extractvalue { i32, i1 } [[TMP53]], 0 -// CHECK1-NEXT: [[TMP55:%.*]] = extractvalue { i32, i1 } [[TMP53]], 1 -// CHECK1-NEXT: br i1 [[TMP55]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK1: atomic_exit: +// CHECK1-NEXT: [[TMP45:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP44]] monotonic, align 4 // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL24:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK1-NEXT: [[TMP57:%.*]] = bitcast %struct.S* [[CALL24]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP56]], i8* align 4 [[TMP57]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL22:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP7]] to i8* +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[CALL22]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 4, i1 false) // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL26:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL27:%.*]] = fcmp une float [[CALL26]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL27]], label [[LAND_RHS28:%.*]], label [[LAND_END31:%.*]] -// CHECK1: land.rhs28: -// CHECK1-NEXT: [[CALL29:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL30:%.*]] = fcmp une float [[CALL29]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END31]] -// CHECK1: land.end31: -// CHECK1-NEXT: [[TMP58:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL30]], [[LAND_RHS28]] ] -// CHECK1-NEXT: [[CONV32:%.*]] = uitofp i1 [[TMP58]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]], float noundef [[CONV32]]) -// CHECK1-NEXT: [[TMP59:%.*]] = bitcast %struct.S* [[TMP2]] to i8* -// CHECK1-NEXT: [[TMP60:%.*]] = bitcast %struct.S* [[REF_TMP25]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP59]], i8* align 4 [[TMP60]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[CALL24:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) +// CHECK1-NEXT: [[TOBOOL25:%.*]] = fcmp une float [[CALL24]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL25]], label [[LAND_RHS26:%.*]], label [[LAND_END29:%.*]] +// CHECK1: land.rhs26: +// CHECK1-NEXT: [[CALL27:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) +// CHECK1-NEXT: [[TOBOOL28:%.*]] = fcmp une float [[CALL27]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END29]] +// CHECK1: land.end29: +// CHECK1-NEXT: [[TMP48:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL28]], [[LAND_RHS26]] ] +// CHECK1-NEXT: [[CONV30:%.*]] = uitofp i1 [[TMP48]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]], float noundef [[CONV30]]) +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[TMP2]] to i8* +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[REF_TMP23]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP49]], i8* align 4 [[TMP50]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]]) #[[ATTR5]] // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP61:%.*]] = load float, float* [[T_VAR17]], align 4 -// CHECK1-NEXT: [[TMP62:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK1-NEXT: [[ATOMIC_LOAD33:%.*]] = load atomic i32, i32* [[TMP62]] monotonic, align 4 -// CHECK1-NEXT: br label [[ATOMIC_CONT34:%.*]] -// CHECK1: atomic_cont34: -// CHECK1-NEXT: [[TMP63:%.*]] = phi i32 [ [[ATOMIC_LOAD33]], [[LAND_END31]] ], [ [[TMP73:%.*]], [[COND_END40:%.*]] ] -// CHECK1-NEXT: [[TMP64:%.*]] = bitcast float* [[ATOMIC_TEMP35]] to i32* -// CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32 [[TMP63]] to float -// CHECK1-NEXT: store float [[TMP65]], float* [[_TMP36]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = load float, float* [[_TMP36]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = load float, float* [[T_VAR17]], align 4 -// CHECK1-NEXT: [[CMP37:%.*]] = fcmp olt float [[TMP66]], [[TMP67]] -// CHECK1-NEXT: br i1 [[CMP37]], label [[COND_TRUE38:%.*]], label [[COND_FALSE39:%.*]] -// CHECK1: cond.true38: -// CHECK1-NEXT: [[TMP68:%.*]] = load float, float* [[_TMP36]], align 4 -// CHECK1-NEXT: br label [[COND_END40]] -// CHECK1: cond.false39: -// CHECK1-NEXT: [[TMP69:%.*]] = load float, float* [[T_VAR17]], align 4 -// CHECK1-NEXT: br label [[COND_END40]] -// CHECK1: cond.end40: -// CHECK1-NEXT: [[COND41:%.*]] = phi float [ [[TMP68]], [[COND_TRUE38]] ], [ [[TMP69]], [[COND_FALSE39]] ] -// CHECK1-NEXT: store float [[COND41]], float* [[ATOMIC_TEMP35]], align 4 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, i32* [[TMP64]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK1-NEXT: [[TMP72:%.*]] = cmpxchg i32* [[TMP71]], i32 [[TMP63]], i32 [[TMP70]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP73]] = extractvalue { i32, i1 } [[TMP72]], 0 -// CHECK1-NEXT: [[TMP74:%.*]] = extractvalue { i32, i1 } [[TMP72]], 1 -// CHECK1-NEXT: br i1 [[TMP74]], label [[ATOMIC_EXIT42:%.*]], label [[ATOMIC_CONT34]] -// CHECK1: atomic_exit42: +// CHECK1-NEXT: [[TMP51:%.*]] = load float, float* [[T_VAR17]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP3]] to i32* +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP52]] monotonic, align 4 +// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] +// CHECK1: atomic_cont: +// CHECK1-NEXT: [[TMP53:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END29]] ], [ [[TMP63:%.*]], [[COND_END35:%.*]] ] +// CHECK1-NEXT: [[TMP54:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* +// CHECK1-NEXT: [[TMP55:%.*]] = bitcast i32 [[TMP53]] to float +// CHECK1-NEXT: store float [[TMP55]], float* [[_TMP31]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load float, float* [[_TMP31]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = load float, float* [[T_VAR17]], align 4 +// CHECK1-NEXT: [[CMP32:%.*]] = fcmp olt float [[TMP56]], [[TMP57]] +// CHECK1-NEXT: br i1 [[CMP32]], label [[COND_TRUE33:%.*]], label [[COND_FALSE34:%.*]] +// CHECK1: cond.true33: +// CHECK1-NEXT: [[TMP58:%.*]] = load float, float* [[_TMP31]], align 4 +// CHECK1-NEXT: br label [[COND_END35]] +// CHECK1: cond.false34: +// CHECK1-NEXT: [[TMP59:%.*]] = load float, float* [[T_VAR17]], align 4 +// CHECK1-NEXT: br label [[COND_END35]] +// CHECK1: cond.end35: +// CHECK1-NEXT: [[COND36:%.*]] = phi float [ [[TMP58]], [[COND_TRUE33]] ], [ [[TMP59]], [[COND_FALSE34]] ] +// CHECK1-NEXT: store float [[COND36]], float* [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32* +// CHECK1-NEXT: [[TMP62:%.*]] = cmpxchg i32* [[TMP61]], i32 [[TMP53]], i32 [[TMP60]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP63]] = extractvalue { i32, i1 } [[TMP62]], 0 +// CHECK1-NEXT: [[TMP64:%.*]] = extractvalue { i32, i1 } [[TMP62]], 1 +// CHECK1-NEXT: br i1 [[TMP64]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1: atomic_exit: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: @@ -4633,11 +4613,9 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK2-NEXT: [[REF_TMP23:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK2-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 -// CHECK2-NEXT: [[_TMP22:%.*]] = alloca float, align 4 -// CHECK2-NEXT: [[REF_TMP25:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK2-NEXT: [[ATOMIC_TEMP35:%.*]] = alloca float, align 4 -// CHECK2-NEXT: [[_TMP36:%.*]] = alloca float, align 4 +// CHECK2-NEXT: [[_TMP31:%.*]] = alloca float, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8 @@ -4778,77 +4756,59 @@ // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: // CHECK2-NEXT: [[TMP44:%.*]] = load float, float* [[T_VAR3]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP0]] to i32* -// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP45]] monotonic, align 4 -// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK2: atomic_cont: -// CHECK2-NEXT: [[TMP46:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP54:%.*]], [[ATOMIC_CONT]] ] -// CHECK2-NEXT: [[TMP47:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* -// CHECK2-NEXT: [[TMP48:%.*]] = bitcast i32 [[TMP46]] to float -// CHECK2-NEXT: store float [[TMP48]], float* [[_TMP22]], align 4 -// CHECK2-NEXT: [[TMP49:%.*]] = load float, float* [[_TMP22]], align 4 -// CHECK2-NEXT: [[TMP50:%.*]] = load float, float* [[T_VAR3]], align 4 -// CHECK2-NEXT: [[ADD23:%.*]] = fadd float [[TMP49]], [[TMP50]] -// CHECK2-NEXT: store float [[ADD23]], float* [[ATOMIC_TEMP]], align 4 -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[TMP47]], align 4 -// CHECK2-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP0]] to i32* -// CHECK2-NEXT: [[TMP53:%.*]] = cmpxchg i32* [[TMP52]], i32 [[TMP46]], i32 [[TMP51]] monotonic monotonic, align 4 -// CHECK2-NEXT: [[TMP54]] = extractvalue { i32, i1 } [[TMP53]], 0 -// CHECK2-NEXT: [[TMP55:%.*]] = extractvalue { i32, i1 } [[TMP53]], 1 -// CHECK2-NEXT: br i1 [[TMP55]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK2: atomic_exit: +// CHECK2-NEXT: [[TMP45:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP44]] monotonic, align 4 // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[CALL24:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK2-NEXT: [[TMP56:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK2-NEXT: [[TMP57:%.*]] = bitcast %struct.S* [[CALL24]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP56]], i8* align 4 [[TMP57]], i64 4, i1 false) +// CHECK2-NEXT: [[CALL22:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK2-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP7]] to i8* +// CHECK2-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[CALL22]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 4, i1 false) // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[CALL26:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK2-NEXT: [[TOBOOL27:%.*]] = fcmp une float [[CALL26]], 0.000000e+00 -// CHECK2-NEXT: br i1 [[TOBOOL27]], label [[LAND_RHS28:%.*]], label [[LAND_END31:%.*]] -// CHECK2: land.rhs28: -// CHECK2-NEXT: [[CALL29:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK2-NEXT: [[TOBOOL30:%.*]] = fcmp une float [[CALL29]], 0.000000e+00 -// CHECK2-NEXT: br label [[LAND_END31]] -// CHECK2: land.end31: -// CHECK2-NEXT: [[TMP58:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL30]], [[LAND_RHS28]] ] -// CHECK2-NEXT: [[CONV32:%.*]] = uitofp i1 [[TMP58]] to float -// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]], float noundef [[CONV32]]) -// CHECK2-NEXT: [[TMP59:%.*]] = bitcast %struct.S* [[TMP2]] to i8* -// CHECK2-NEXT: [[TMP60:%.*]] = bitcast %struct.S* [[REF_TMP25]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP59]], i8* align 4 [[TMP60]], i64 4, i1 false) -// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]]) #[[ATTR5]] +// CHECK2-NEXT: [[CALL24:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) +// CHECK2-NEXT: [[TOBOOL25:%.*]] = fcmp une float [[CALL24]], 0.000000e+00 +// CHECK2-NEXT: br i1 [[TOBOOL25]], label [[LAND_RHS26:%.*]], label [[LAND_END29:%.*]] +// CHECK2: land.rhs26: +// CHECK2-NEXT: [[CALL27:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) +// CHECK2-NEXT: [[TOBOOL28:%.*]] = fcmp une float [[CALL27]], 0.000000e+00 +// CHECK2-NEXT: br label [[LAND_END29]] +// CHECK2: land.end29: +// CHECK2-NEXT: [[TMP48:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL28]], [[LAND_RHS26]] ] +// CHECK2-NEXT: [[CONV30:%.*]] = uitofp i1 [[TMP48]] to float +// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]], float noundef [[CONV30]]) +// CHECK2-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[TMP2]] to i8* +// CHECK2-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[REF_TMP23]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP49]], i8* align 4 [[TMP50]], i64 4, i1 false) +// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]]) #[[ATTR5]] // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[TMP61:%.*]] = load float, float* [[T_VAR17]], align 4 -// CHECK2-NEXT: [[TMP62:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK2-NEXT: [[ATOMIC_LOAD33:%.*]] = load atomic i32, i32* [[TMP62]] monotonic, align 4 -// CHECK2-NEXT: br label [[ATOMIC_CONT34:%.*]] -// CHECK2: atomic_cont34: -// CHECK2-NEXT: [[TMP63:%.*]] = phi i32 [ [[ATOMIC_LOAD33]], [[LAND_END31]] ], [ [[TMP73:%.*]], [[COND_END40:%.*]] ] -// CHECK2-NEXT: [[TMP64:%.*]] = bitcast float* [[ATOMIC_TEMP35]] to i32* -// CHECK2-NEXT: [[TMP65:%.*]] = bitcast i32 [[TMP63]] to float -// CHECK2-NEXT: store float [[TMP65]], float* [[_TMP36]], align 4 -// CHECK2-NEXT: [[TMP66:%.*]] = load float, float* [[_TMP36]], align 4 -// CHECK2-NEXT: [[TMP67:%.*]] = load float, float* [[T_VAR17]], align 4 -// CHECK2-NEXT: [[CMP37:%.*]] = fcmp olt float [[TMP66]], [[TMP67]] -// CHECK2-NEXT: br i1 [[CMP37]], label [[COND_TRUE38:%.*]], label [[COND_FALSE39:%.*]] -// CHECK2: cond.true38: -// CHECK2-NEXT: [[TMP68:%.*]] = load float, float* [[_TMP36]], align 4 -// CHECK2-NEXT: br label [[COND_END40]] -// CHECK2: cond.false39: -// CHECK2-NEXT: [[TMP69:%.*]] = load float, float* [[T_VAR17]], align 4 -// CHECK2-NEXT: br label [[COND_END40]] -// CHECK2: cond.end40: -// CHECK2-NEXT: [[COND41:%.*]] = phi float [ [[TMP68]], [[COND_TRUE38]] ], [ [[TMP69]], [[COND_FALSE39]] ] -// CHECK2-NEXT: store float [[COND41]], float* [[ATOMIC_TEMP35]], align 4 -// CHECK2-NEXT: [[TMP70:%.*]] = load i32, i32* [[TMP64]], align 4 -// CHECK2-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK2-NEXT: [[TMP72:%.*]] = cmpxchg i32* [[TMP71]], i32 [[TMP63]], i32 [[TMP70]] monotonic monotonic, align 4 -// CHECK2-NEXT: [[TMP73]] = extractvalue { i32, i1 } [[TMP72]], 0 -// CHECK2-NEXT: [[TMP74:%.*]] = extractvalue { i32, i1 } [[TMP72]], 1 -// CHECK2-NEXT: br i1 [[TMP74]], label [[ATOMIC_EXIT42:%.*]], label [[ATOMIC_CONT34]] -// CHECK2: atomic_exit42: +// CHECK2-NEXT: [[TMP51:%.*]] = load float, float* [[T_VAR17]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP3]] to i32* +// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP52]] monotonic, align 4 +// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] +// CHECK2: atomic_cont: +// CHECK2-NEXT: [[TMP53:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END29]] ], [ [[TMP63:%.*]], [[COND_END35:%.*]] ] +// CHECK2-NEXT: [[TMP54:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* +// CHECK2-NEXT: [[TMP55:%.*]] = bitcast i32 [[TMP53]] to float +// CHECK2-NEXT: store float [[TMP55]], float* [[_TMP31]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load float, float* [[_TMP31]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = load float, float* [[T_VAR17]], align 4 +// CHECK2-NEXT: [[CMP32:%.*]] = fcmp olt float [[TMP56]], [[TMP57]] +// CHECK2-NEXT: br i1 [[CMP32]], label [[COND_TRUE33:%.*]], label [[COND_FALSE34:%.*]] +// CHECK2: cond.true33: +// CHECK2-NEXT: [[TMP58:%.*]] = load float, float* [[_TMP31]], align 4 +// CHECK2-NEXT: br label [[COND_END35]] +// CHECK2: cond.false34: +// CHECK2-NEXT: [[TMP59:%.*]] = load float, float* [[T_VAR17]], align 4 +// CHECK2-NEXT: br label [[COND_END35]] +// CHECK2: cond.end35: +// CHECK2-NEXT: [[COND36:%.*]] = phi float [ [[TMP58]], [[COND_TRUE33]] ], [ [[TMP59]], [[COND_FALSE34]] ] +// CHECK2-NEXT: store float [[COND36]], float* [[ATOMIC_TEMP]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK2-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32* +// CHECK2-NEXT: [[TMP62:%.*]] = cmpxchg i32* [[TMP61]], i32 [[TMP53]], i32 [[TMP60]] monotonic monotonic, align 4 +// CHECK2-NEXT: [[TMP63]] = extractvalue { i32, i1 } [[TMP62]], 0 +// CHECK2-NEXT: [[TMP64:%.*]] = extractvalue { i32, i1 } [[TMP62]], 1 +// CHECK2-NEXT: br i1 [[TMP64]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK2: atomic_exit: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: @@ -8425,10 +8385,6 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[ATOMIC_TEMP11:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP12:%.*]] = alloca double, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** @g1, align 8 @@ -8514,43 +8470,9 @@ // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: // CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[G]], align 8 -// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8 -// CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK3: atomic_cont: -// CHECK3-NEXT: [[TMP26:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP33:%.*]], [[ATOMIC_CONT]] ] -// CHECK3-NEXT: [[TMP27:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64* -// CHECK3-NEXT: [[TMP28:%.*]] = bitcast i64 [[TMP26]] to double -// CHECK3-NEXT: store double [[TMP28]], double* [[_TMP7]], align 8 -// CHECK3-NEXT: [[TMP29:%.*]] = load double, double* [[_TMP7]], align 8 -// CHECK3-NEXT: [[TMP30:%.*]] = load double, double* [[G]], align 8 -// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[TMP29]], [[TMP30]] -// CHECK3-NEXT: store double [[ADD8]], double* [[ATOMIC_TEMP]], align 8 -// CHECK3-NEXT: [[TMP31:%.*]] = load i64, i64* [[TMP27]], align 8 -// CHECK3-NEXT: [[TMP32:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP26]], i64 [[TMP31]] monotonic monotonic, align 8 -// CHECK3-NEXT: [[TMP33]] = extractvalue { i64, i1 } [[TMP32]], 0 -// CHECK3-NEXT: [[TMP34:%.*]] = extractvalue { i64, i1 } [[TMP32]], 1 -// CHECK3-NEXT: br i1 [[TMP34]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK3: atomic_exit: -// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[G1]], align 8 -// CHECK3-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP1]] to i64* -// CHECK3-NEXT: [[ATOMIC_LOAD9:%.*]] = load atomic i64, i64* [[TMP36]] monotonic, align 8 -// CHECK3-NEXT: br label [[ATOMIC_CONT10:%.*]] -// CHECK3: atomic_cont10: -// CHECK3-NEXT: [[TMP37:%.*]] = phi i64 [ [[ATOMIC_LOAD9]], [[ATOMIC_EXIT]] ], [ [[TMP45:%.*]], [[ATOMIC_CONT10]] ] -// CHECK3-NEXT: [[TMP38:%.*]] = bitcast double* [[ATOMIC_TEMP11]] to i64* -// CHECK3-NEXT: [[TMP39:%.*]] = bitcast i64 [[TMP37]] to double -// CHECK3-NEXT: store double [[TMP39]], double* [[_TMP12]], align 8 -// CHECK3-NEXT: [[TMP40:%.*]] = load double, double* [[_TMP12]], align 8 -// CHECK3-NEXT: [[TMP41:%.*]] = load double, double* [[G1]], align 8 -// CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[TMP40]], [[TMP41]] -// CHECK3-NEXT: store double [[ADD13]], double* [[ATOMIC_TEMP11]], align 8 -// CHECK3-NEXT: [[TMP42:%.*]] = load i64, i64* [[TMP38]], align 8 -// CHECK3-NEXT: [[TMP43:%.*]] = bitcast double* [[TMP1]] to i64* -// CHECK3-NEXT: [[TMP44:%.*]] = cmpxchg i64* [[TMP43]], i64 [[TMP37]], i64 [[TMP42]] monotonic monotonic, align 8 -// CHECK3-NEXT: [[TMP45]] = extractvalue { i64, i1 } [[TMP44]], 0 -// CHECK3-NEXT: [[TMP46:%.*]] = extractvalue { i64, i1 } [[TMP44]], 1 -// CHECK3-NEXT: br i1 [[TMP46]], label [[ATOMIC_EXIT14:%.*]], label [[ATOMIC_CONT10]] -// CHECK3: atomic_exit14: +// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw fadd double* @g, double [[TMP25]] monotonic, align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[G1]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = atomicrmw fadd double* [[TMP1]], double [[TMP27]] monotonic, align 8 // CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: @@ -8633,10 +8555,6 @@ // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>, align 8 // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK4-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8 -// CHECK4-NEXT: [[_TMP8:%.*]] = alloca double, align 8 -// CHECK4-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca double, align 8 -// CHECK4-NEXT: [[_TMP13:%.*]] = alloca double, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP0:%.*]] = load double*, double** @g1, align 8 @@ -8739,43 +8657,9 @@ // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: // CHECK4-NEXT: [[TMP29:%.*]] = load double, double* [[G]], align 8 -// CHECK4-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8 -// CHECK4-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK4: atomic_cont: -// CHECK4-NEXT: [[TMP30:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP37:%.*]], [[ATOMIC_CONT]] ] -// CHECK4-NEXT: [[TMP31:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64* -// CHECK4-NEXT: [[TMP32:%.*]] = bitcast i64 [[TMP30]] to double -// CHECK4-NEXT: store double [[TMP32]], double* [[_TMP8]], align 8 -// CHECK4-NEXT: [[TMP33:%.*]] = load double, double* [[_TMP8]], align 8 -// CHECK4-NEXT: [[TMP34:%.*]] = load double, double* [[G]], align 8 -// CHECK4-NEXT: [[ADD9:%.*]] = fadd double [[TMP33]], [[TMP34]] -// CHECK4-NEXT: store double [[ADD9]], double* [[ATOMIC_TEMP]], align 8 -// CHECK4-NEXT: [[TMP35:%.*]] = load i64, i64* [[TMP31]], align 8 -// CHECK4-NEXT: [[TMP36:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP30]], i64 [[TMP35]] monotonic monotonic, align 8 -// CHECK4-NEXT: [[TMP37]] = extractvalue { i64, i1 } [[TMP36]], 0 -// CHECK4-NEXT: [[TMP38:%.*]] = extractvalue { i64, i1 } [[TMP36]], 1 -// CHECK4-NEXT: br i1 [[TMP38]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK4: atomic_exit: -// CHECK4-NEXT: [[TMP39:%.*]] = load double, double* [[G1]], align 8 -// CHECK4-NEXT: [[TMP40:%.*]] = bitcast double* [[TMP1]] to i64* -// CHECK4-NEXT: [[ATOMIC_LOAD10:%.*]] = load atomic i64, i64* [[TMP40]] monotonic, align 8 -// CHECK4-NEXT: br label [[ATOMIC_CONT11:%.*]] -// CHECK4: atomic_cont11: -// CHECK4-NEXT: [[TMP41:%.*]] = phi i64 [ [[ATOMIC_LOAD10]], [[ATOMIC_EXIT]] ], [ [[TMP49:%.*]], [[ATOMIC_CONT11]] ] -// CHECK4-NEXT: [[TMP42:%.*]] = bitcast double* [[ATOMIC_TEMP12]] to i64* -// CHECK4-NEXT: [[TMP43:%.*]] = bitcast i64 [[TMP41]] to double -// CHECK4-NEXT: store double [[TMP43]], double* [[_TMP13]], align 8 -// CHECK4-NEXT: [[TMP44:%.*]] = load double, double* [[_TMP13]], align 8 -// CHECK4-NEXT: [[TMP45:%.*]] = load double, double* [[G1]], align 8 -// CHECK4-NEXT: [[ADD14:%.*]] = fadd double [[TMP44]], [[TMP45]] -// CHECK4-NEXT: store double [[ADD14]], double* [[ATOMIC_TEMP12]], align 8 -// CHECK4-NEXT: [[TMP46:%.*]] = load i64, i64* [[TMP42]], align 8 -// CHECK4-NEXT: [[TMP47:%.*]] = bitcast double* [[TMP1]] to i64* -// CHECK4-NEXT: [[TMP48:%.*]] = cmpxchg i64* [[TMP47]], i64 [[TMP41]], i64 [[TMP46]] monotonic monotonic, align 8 -// CHECK4-NEXT: [[TMP49]] = extractvalue { i64, i1 } [[TMP48]], 0 -// CHECK4-NEXT: [[TMP50:%.*]] = extractvalue { i64, i1 } [[TMP48]], 1 -// CHECK4-NEXT: br i1 [[TMP50]], label [[ATOMIC_EXIT15:%.*]], label [[ATOMIC_CONT11]] -// CHECK4: atomic_exit15: +// CHECK4-NEXT: [[TMP30:%.*]] = atomicrmw fadd double* @g, double [[TMP29]] monotonic, align 8 +// CHECK4-NEXT: [[TMP31:%.*]] = load double, double* [[G1]], align 8 +// CHECK4-NEXT: [[TMP32:%.*]] = atomicrmw fadd double* [[TMP1]], double [[TMP31]] monotonic, align 8 // CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: diff --git a/clang/test/OpenMP/parallel_reduction_codegen.cpp b/clang/test/OpenMP/parallel_reduction_codegen.cpp --- a/clang/test/OpenMP/parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_codegen.cpp @@ -605,11 +605,9 @@ // CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP12:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[REF_TMP13:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[ATOMIC_TEMP23:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca float, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 @@ -697,77 +695,59 @@ // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: // CHECK1-NEXT: [[TMP32:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP1]] to i32* -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP33]] monotonic, align 4 -// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP34:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP42:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast i32 [[TMP34]] to float -// CHECK1-NEXT: store float [[TMP36]], float* [[TMP]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load float, float* [[TMP]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = fadd float [[TMP37]], [[TMP38]] -// CHECK1-NEXT: store float [[ADD11]], float* [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast float* [[TMP1]] to i32* -// CHECK1-NEXT: [[TMP41:%.*]] = cmpxchg i32* [[TMP40]], i32 [[TMP34]], i32 [[TMP39]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP42]] = extractvalue { i32, i1 } [[TMP41]], 0 -// CHECK1-NEXT: [[TMP43:%.*]] = extractvalue { i32, i1 } [[TMP41]], 1 -// CHECK1-NEXT: br i1 [[TMP43]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK1: atomic_exit: +// CHECK1-NEXT: [[TMP33:%.*]] = atomicrmw fadd float* [[TMP1]], float [[TMP32]] monotonic, align 4 // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[CALL12]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[CALL11]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL15:%.*]] = fcmp une float [[CALL14]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL15]], label [[LAND_RHS16:%.*]], label [[LAND_END19:%.*]] -// CHECK1: land.rhs16: -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END19]] -// CHECK1: land.end19: -// CHECK1-NEXT: [[TMP46:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL18]], [[LAND_RHS16]] ] -// CHECK1-NEXT: [[CONV20:%.*]] = uitofp i1 [[TMP46]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]], float noundef [[CONV20]]) -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP4]] to i8* -// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[REF_TMP13]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]]) #[[ATTR5]] +// CHECK1-NEXT: [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]]) +// CHECK1-NEXT: [[TOBOOL14:%.*]] = fcmp une float [[CALL13]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END18:%.*]] +// CHECK1: land.rhs15: +// CHECK1-NEXT: [[CALL16:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) +// CHECK1-NEXT: [[TOBOOL17:%.*]] = fcmp une float [[CALL16]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END18]] +// CHECK1: land.end18: +// CHECK1-NEXT: [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL17]], [[LAND_RHS15]] ] +// CHECK1-NEXT: [[CONV19:%.*]] = uitofp i1 [[TMP36]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]], float noundef [[CONV19]]) +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[REF_TMP12]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]]) #[[ATTR5]] // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP49:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = bitcast float* [[TMP5]] to i32* -// CHECK1-NEXT: [[ATOMIC_LOAD21:%.*]] = load atomic i32, i32* [[TMP50]] monotonic, align 4 -// CHECK1-NEXT: br label [[ATOMIC_CONT22:%.*]] -// CHECK1: atomic_cont22: -// CHECK1-NEXT: [[TMP51:%.*]] = phi i32 [ [[ATOMIC_LOAD21]], [[LAND_END19]] ], [ [[TMP61:%.*]], [[COND_END28:%.*]] ] -// CHECK1-NEXT: [[TMP52:%.*]] = bitcast float* [[ATOMIC_TEMP23]] to i32* -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i32 [[TMP51]] to float -// CHECK1-NEXT: store float [[TMP53]], float* [[_TMP24]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = load float, float* [[_TMP24]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP25:%.*]] = fcmp olt float [[TMP54]], [[TMP55]] -// CHECK1-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]] -// CHECK1: cond.true26: -// CHECK1-NEXT: [[TMP56:%.*]] = load float, float* [[_TMP24]], align 4 -// CHECK1-NEXT: br label [[COND_END28]] -// CHECK1: cond.false27: -// CHECK1-NEXT: [[TMP57:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: br label [[COND_END28]] -// CHECK1: cond.end28: -// CHECK1-NEXT: [[COND29:%.*]] = phi float [ [[TMP56]], [[COND_TRUE26]] ], [ [[TMP57]], [[COND_FALSE27]] ] -// CHECK1-NEXT: store float [[COND29]], float* [[ATOMIC_TEMP23]], align 4 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP52]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = bitcast float* [[TMP5]] to i32* -// CHECK1-NEXT: [[TMP60:%.*]] = cmpxchg i32* [[TMP59]], i32 [[TMP51]], i32 [[TMP58]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP61]] = extractvalue { i32, i1 } [[TMP60]], 0 -// CHECK1-NEXT: [[TMP62:%.*]] = extractvalue { i32, i1 } [[TMP60]], 1 -// CHECK1-NEXT: br i1 [[TMP62]], label [[ATOMIC_EXIT30:%.*]], label [[ATOMIC_CONT22]] -// CHECK1: atomic_exit30: +// CHECK1-NEXT: [[TMP39:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast float* [[TMP5]] to i32* +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP40]] monotonic, align 4 +// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] +// CHECK1: atomic_cont: +// CHECK1-NEXT: [[TMP41:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END18]] ], [ [[TMP51:%.*]], [[COND_END23:%.*]] ] +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast i32 [[TMP41]] to float +// CHECK1-NEXT: store float [[TMP43]], float* [[TMP]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load float, float* [[TMP]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK1-NEXT: [[CMP20:%.*]] = fcmp olt float [[TMP44]], [[TMP45]] +// CHECK1-NEXT: br i1 [[CMP20]], label [[COND_TRUE21:%.*]], label [[COND_FALSE22:%.*]] +// CHECK1: cond.true21: +// CHECK1-NEXT: [[TMP46:%.*]] = load float, float* [[TMP]], align 4 +// CHECK1-NEXT: br label [[COND_END23]] +// CHECK1: cond.false22: +// CHECK1-NEXT: [[TMP47:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK1-NEXT: br label [[COND_END23]] +// CHECK1: cond.end23: +// CHECK1-NEXT: [[COND24:%.*]] = phi float [ [[TMP46]], [[COND_TRUE21]] ], [ [[TMP47]], [[COND_FALSE22]] ] +// CHECK1-NEXT: store float [[COND24]], float* [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP5]] to i32* +// CHECK1-NEXT: [[TMP50:%.*]] = cmpxchg i32* [[TMP49]], i32 [[TMP41]], i32 [[TMP48]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP51]] = extractvalue { i32, i1 } [[TMP50]], 0 +// CHECK1-NEXT: [[TMP52:%.*]] = extractvalue { i32, i1 } [[TMP50]], 1 +// CHECK1-NEXT: br i1 [[TMP52]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1: atomic_exit: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]] @@ -2005,11 +1985,9 @@ // CHECK2-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK2-NEXT: [[REF_TMP12:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK2-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca float, align 4 -// CHECK2-NEXT: [[REF_TMP13:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK2-NEXT: [[ATOMIC_TEMP23:%.*]] = alloca float, align 4 -// CHECK2-NEXT: [[_TMP24:%.*]] = alloca float, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 @@ -2097,77 +2075,59 @@ // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: // CHECK2-NEXT: [[TMP32:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP1]] to i32* -// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP33]] monotonic, align 4 -// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK2: atomic_cont: -// CHECK2-NEXT: [[TMP34:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP42:%.*]], [[ATOMIC_CONT]] ] -// CHECK2-NEXT: [[TMP35:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* -// CHECK2-NEXT: [[TMP36:%.*]] = bitcast i32 [[TMP34]] to float -// CHECK2-NEXT: store float [[TMP36]], float* [[TMP]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load float, float* [[TMP]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = fadd float [[TMP37]], [[TMP38]] -// CHECK2-NEXT: store float [[ADD11]], float* [[ATOMIC_TEMP]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = bitcast float* [[TMP1]] to i32* -// CHECK2-NEXT: [[TMP41:%.*]] = cmpxchg i32* [[TMP40]], i32 [[TMP34]], i32 [[TMP39]] monotonic monotonic, align 4 -// CHECK2-NEXT: [[TMP42]] = extractvalue { i32, i1 } [[TMP41]], 0 -// CHECK2-NEXT: [[TMP43:%.*]] = extractvalue { i32, i1 } [[TMP41]], 1 -// CHECK2-NEXT: br i1 [[TMP43]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK2: atomic_exit: +// CHECK2-NEXT: [[TMP33:%.*]] = atomicrmw fadd float* [[TMP1]], float [[TMP32]] monotonic, align 4 // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK2-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK2-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[CALL12]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) +// CHECK2-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) +// CHECK2-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP3]] to i8* +// CHECK2-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[CALL11]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[CALL14:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK2-NEXT: [[TOBOOL15:%.*]] = fcmp une float [[CALL14]], 0.000000e+00 -// CHECK2-NEXT: br i1 [[TOBOOL15]], label [[LAND_RHS16:%.*]], label [[LAND_END19:%.*]] -// CHECK2: land.rhs16: -// CHECK2-NEXT: [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK2-NEXT: [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00 -// CHECK2-NEXT: br label [[LAND_END19]] -// CHECK2: land.end19: -// CHECK2-NEXT: [[TMP46:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL18]], [[LAND_RHS16]] ] -// CHECK2-NEXT: [[CONV20:%.*]] = uitofp i1 [[TMP46]] to float -// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]], float noundef [[CONV20]]) -// CHECK2-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP4]] to i8* -// CHECK2-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[REF_TMP13]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) -// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]]) #[[ATTR5]] +// CHECK2-NEXT: [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]]) +// CHECK2-NEXT: [[TOBOOL14:%.*]] = fcmp une float [[CALL13]], 0.000000e+00 +// CHECK2-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END18:%.*]] +// CHECK2: land.rhs15: +// CHECK2-NEXT: [[CALL16:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) +// CHECK2-NEXT: [[TOBOOL17:%.*]] = fcmp une float [[CALL16]], 0.000000e+00 +// CHECK2-NEXT: br label [[LAND_END18]] +// CHECK2: land.end18: +// CHECK2-NEXT: [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL17]], [[LAND_RHS15]] ] +// CHECK2-NEXT: [[CONV19:%.*]] = uitofp i1 [[TMP36]] to float +// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]], float noundef [[CONV19]]) +// CHECK2-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP4]] to i8* +// CHECK2-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[REF_TMP12]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) +// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]]) #[[ATTR5]] // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[TMP49:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK2-NEXT: [[TMP50:%.*]] = bitcast float* [[TMP5]] to i32* -// CHECK2-NEXT: [[ATOMIC_LOAD21:%.*]] = load atomic i32, i32* [[TMP50]] monotonic, align 4 -// CHECK2-NEXT: br label [[ATOMIC_CONT22:%.*]] -// CHECK2: atomic_cont22: -// CHECK2-NEXT: [[TMP51:%.*]] = phi i32 [ [[ATOMIC_LOAD21]], [[LAND_END19]] ], [ [[TMP61:%.*]], [[COND_END28:%.*]] ] -// CHECK2-NEXT: [[TMP52:%.*]] = bitcast float* [[ATOMIC_TEMP23]] to i32* -// CHECK2-NEXT: [[TMP53:%.*]] = bitcast i32 [[TMP51]] to float -// CHECK2-NEXT: store float [[TMP53]], float* [[_TMP24]], align 4 -// CHECK2-NEXT: [[TMP54:%.*]] = load float, float* [[_TMP24]], align 4 -// CHECK2-NEXT: [[TMP55:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK2-NEXT: [[CMP25:%.*]] = fcmp olt float [[TMP54]], [[TMP55]] -// CHECK2-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]] -// CHECK2: cond.true26: -// CHECK2-NEXT: [[TMP56:%.*]] = load float, float* [[_TMP24]], align 4 -// CHECK2-NEXT: br label [[COND_END28]] -// CHECK2: cond.false27: -// CHECK2-NEXT: [[TMP57:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK2-NEXT: br label [[COND_END28]] -// CHECK2: cond.end28: -// CHECK2-NEXT: [[COND29:%.*]] = phi float [ [[TMP56]], [[COND_TRUE26]] ], [ [[TMP57]], [[COND_FALSE27]] ] -// CHECK2-NEXT: store float [[COND29]], float* [[ATOMIC_TEMP23]], align 4 -// CHECK2-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP52]], align 4 -// CHECK2-NEXT: [[TMP59:%.*]] = bitcast float* [[TMP5]] to i32* -// CHECK2-NEXT: [[TMP60:%.*]] = cmpxchg i32* [[TMP59]], i32 [[TMP51]], i32 [[TMP58]] monotonic monotonic, align 4 -// CHECK2-NEXT: [[TMP61]] = extractvalue { i32, i1 } [[TMP60]], 0 -// CHECK2-NEXT: [[TMP62:%.*]] = extractvalue { i32, i1 } [[TMP60]], 1 -// CHECK2-NEXT: br i1 [[TMP62]], label [[ATOMIC_EXIT30:%.*]], label [[ATOMIC_CONT22]] -// CHECK2: atomic_exit30: +// CHECK2-NEXT: [[TMP39:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = bitcast float* [[TMP5]] to i32* +// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP40]] monotonic, align 4 +// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] +// CHECK2: atomic_cont: +// CHECK2-NEXT: [[TMP41:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END18]] ], [ [[TMP51:%.*]], [[COND_END23:%.*]] ] +// CHECK2-NEXT: [[TMP42:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* +// CHECK2-NEXT: [[TMP43:%.*]] = bitcast i32 [[TMP41]] to float +// CHECK2-NEXT: store float [[TMP43]], float* [[TMP]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load float, float* [[TMP]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK2-NEXT: [[CMP20:%.*]] = fcmp olt float [[TMP44]], [[TMP45]] +// CHECK2-NEXT: br i1 [[CMP20]], label [[COND_TRUE21:%.*]], label [[COND_FALSE22:%.*]] +// CHECK2: cond.true21: +// CHECK2-NEXT: [[TMP46:%.*]] = load float, float* [[TMP]], align 4 +// CHECK2-NEXT: br label [[COND_END23]] +// CHECK2: cond.false22: +// CHECK2-NEXT: [[TMP47:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK2-NEXT: br label [[COND_END23]] +// CHECK2: cond.end23: +// CHECK2-NEXT: [[COND24:%.*]] = phi float [ [[TMP46]], [[COND_TRUE21]] ], [ [[TMP47]], [[COND_FALSE22]] ] +// CHECK2-NEXT: store float [[COND24]], float* [[ATOMIC_TEMP]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP5]] to i32* +// CHECK2-NEXT: [[TMP50:%.*]] = cmpxchg i32* [[TMP49]], i32 [[TMP41]], i32 [[TMP48]] monotonic monotonic, align 4 +// CHECK2-NEXT: [[TMP51]] = extractvalue { i32, i1 } [[TMP50]], 0 +// CHECK2-NEXT: [[TMP52:%.*]] = extractvalue { i32, i1 } [[TMP50]], 1 +// CHECK2-NEXT: br i1 [[TMP52]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK2: atomic_exit: // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]] diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp --- a/clang/test/OpenMP/reduction_implicit_map.cpp +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -502,8 +502,6 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP30:%.*]] = alloca double, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8 @@ -668,39 +666,21 @@ // CHECK1: .omp.reduction.case2: // CHECK1-NEXT: [[TMP59:%.*]] = getelementptr double, double* [[ARRAYIDX3]], i64 [[TMP5]] // CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq double* [[ARRAYIDX3]], [[TMP59]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE33:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]] // CHECK1: omp.arraycpy.body27: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY27]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY27]] ] // CHECK1-NEXT: [[TMP60:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = bitcast double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]] to i64* -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* [[TMP61]] monotonic, align 8 -// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP62:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY27]] ], [ [[TMP70:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: [[TMP63:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64* -// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i64 [[TMP62]] to double -// CHECK1-NEXT: store double [[TMP64]], double* [[_TMP30]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load double, double* [[_TMP30]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8 -// CHECK1-NEXT: [[ADD31:%.*]] = fadd double [[TMP65]], [[TMP66]] -// CHECK1-NEXT: store double [[ADD31]], double* [[ATOMIC_TEMP]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = load i64, i64* [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = bitcast double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]] to i64* -// CHECK1-NEXT: [[TMP69:%.*]] = cmpxchg i64* [[TMP68]], i64 [[TMP62]], i64 [[TMP67]] monotonic monotonic, align 8 -// CHECK1-NEXT: [[TMP70]] = extractvalue { i64, i1 } [[TMP69]], 0 -// CHECK1-NEXT: [[TMP71:%.*]] = extractvalue { i64, i1 } [[TMP69]], 1 -// CHECK1-NEXT: br i1 [[TMP71]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] -// CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP59]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY27]] -// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: [[TMP61:%.*]] = atomicrmw fadd double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP60]] monotonic, align 8 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT30]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT31]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP59]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_BODY27]] +// CHECK1: omp.arraycpy.done33: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP72:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP72]]) +// CHECK1-NEXT: [[TMP62:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP62]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/sections_reduction_codegen.cpp b/clang/test/OpenMP/sections_reduction_codegen.cpp --- a/clang/test/OpenMP/sections_reduction_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_codegen.cpp @@ -268,11 +268,9 @@ // CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[REF_TMP17:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[ATOMIC_TEMP27:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca float, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8 @@ -401,77 +399,59 @@ // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: // CHECK1-NEXT: [[TMP43:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast float* [[TMP0]] to i32* -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP44]] monotonic, align 4 -// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP45:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP53:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast i32 [[TMP45]] to float -// CHECK1-NEXT: store float [[TMP47]], float* [[TMP]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = load float, float* [[TMP]], align 4 -// CHECK1-NEXT: [[TMP49:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = fadd float [[TMP48]], [[TMP49]] -// CHECK1-NEXT: store float [[ADD15]], float* [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP0]] to i32* -// CHECK1-NEXT: [[TMP52:%.*]] = cmpxchg i32* [[TMP51]], i32 [[TMP45]], i32 [[TMP50]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP53]] = extractvalue { i32, i1 } [[TMP52]], 0 -// CHECK1-NEXT: [[TMP54:%.*]] = extractvalue { i32, i1 } [[TMP52]], 1 -// CHECK1-NEXT: br i1 [[TMP54]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK1: atomic_exit: +// CHECK1-NEXT: [[TMP44:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP43]] monotonic, align 4 // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: [[TMP55:%.*]] = bitcast %struct.S* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %struct.S* [[CALL16]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP1]] to i8* +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[CALL15]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL18:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL19:%.*]] = fcmp une float [[CALL18]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL19]], label [[LAND_RHS20:%.*]], label [[LAND_END23:%.*]] -// CHECK1: land.rhs20: -// CHECK1-NEXT: [[CALL21:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL22:%.*]] = fcmp une float [[CALL21]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END23]] -// CHECK1: land.end23: -// CHECK1-NEXT: [[TMP57:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL22]], [[LAND_RHS20]] ] -// CHECK1-NEXT: [[CONV24:%.*]] = uitofp i1 [[TMP57]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]], float noundef [[CONV24]]) -// CHECK1-NEXT: [[TMP58:%.*]] = bitcast %struct.S* [[TMP2]] to i8* -// CHECK1-NEXT: [[TMP59:%.*]] = bitcast %struct.S* [[REF_TMP17]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP58]], i8* align 4 [[TMP59]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]]) #[[ATTR4]] +// CHECK1-NEXT: [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) +// CHECK1-NEXT: [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL18]], label [[LAND_RHS19:%.*]], label [[LAND_END22:%.*]] +// CHECK1: land.rhs19: +// CHECK1-NEXT: [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) +// CHECK1-NEXT: [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END22]] +// CHECK1: land.end22: +// CHECK1-NEXT: [[TMP47:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL21]], [[LAND_RHS19]] ] +// CHECK1-NEXT: [[CONV23:%.*]] = uitofp i1 [[TMP47]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]], float noundef [[CONV23]]) +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP2]] to i8* +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[REF_TMP16]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP48]], i8* align 4 [[TMP49]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP60:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK1-NEXT: [[ATOMIC_LOAD25:%.*]] = load atomic i32, i32* [[TMP61]] monotonic, align 4 -// CHECK1-NEXT: br label [[ATOMIC_CONT26:%.*]] -// CHECK1: atomic_cont26: -// CHECK1-NEXT: [[TMP62:%.*]] = phi i32 [ [[ATOMIC_LOAD25]], [[LAND_END23]] ], [ [[TMP72:%.*]], [[COND_END32:%.*]] ] -// CHECK1-NEXT: [[TMP63:%.*]] = bitcast float* [[ATOMIC_TEMP27]] to i32* -// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32 [[TMP62]] to float -// CHECK1-NEXT: store float [[TMP64]], float* [[_TMP28]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = load float, float* [[_TMP28]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP29:%.*]] = fcmp olt float [[TMP65]], [[TMP66]] -// CHECK1-NEXT: br i1 [[CMP29]], label [[COND_TRUE30:%.*]], label [[COND_FALSE31:%.*]] -// CHECK1: cond.true30: -// CHECK1-NEXT: [[TMP67:%.*]] = load float, float* [[_TMP28]], align 4 -// CHECK1-NEXT: br label [[COND_END32]] -// CHECK1: cond.false31: -// CHECK1-NEXT: [[TMP68:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: br label [[COND_END32]] -// CHECK1: cond.end32: -// CHECK1-NEXT: [[COND33:%.*]] = phi float [ [[TMP67]], [[COND_TRUE30]] ], [ [[TMP68]], [[COND_FALSE31]] ] -// CHECK1-NEXT: store float [[COND33]], float* [[ATOMIC_TEMP27]], align 4 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32, i32* [[TMP63]], align 4 -// CHECK1-NEXT: [[TMP70:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK1-NEXT: [[TMP71:%.*]] = cmpxchg i32* [[TMP70]], i32 [[TMP62]], i32 [[TMP69]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP72]] = extractvalue { i32, i1 } [[TMP71]], 0 -// CHECK1-NEXT: [[TMP73:%.*]] = extractvalue { i32, i1 } [[TMP71]], 1 -// CHECK1-NEXT: br i1 [[TMP73]], label [[ATOMIC_EXIT34:%.*]], label [[ATOMIC_CONT26]] -// CHECK1: atomic_exit34: +// CHECK1-NEXT: [[TMP50:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP3]] to i32* +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP51]] monotonic, align 4 +// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] +// CHECK1: atomic_cont: +// CHECK1-NEXT: [[TMP52:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END22]] ], [ [[TMP62:%.*]], [[COND_END27:%.*]] ] +// CHECK1-NEXT: [[TMP53:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* +// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i32 [[TMP52]] to float +// CHECK1-NEXT: store float [[TMP54]], float* [[TMP]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load float, float* [[TMP]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK1-NEXT: [[CMP24:%.*]] = fcmp olt float [[TMP55]], [[TMP56]] +// CHECK1-NEXT: br i1 [[CMP24]], label [[COND_TRUE25:%.*]], label [[COND_FALSE26:%.*]] +// CHECK1: cond.true25: +// CHECK1-NEXT: [[TMP57:%.*]] = load float, float* [[TMP]], align 4 +// CHECK1-NEXT: br label [[COND_END27]] +// CHECK1: cond.false26: +// CHECK1-NEXT: [[TMP58:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK1-NEXT: br label [[COND_END27]] +// CHECK1: cond.end27: +// CHECK1-NEXT: [[COND28:%.*]] = phi float [ [[TMP57]], [[COND_TRUE25]] ], [ [[TMP58]], [[COND_FALSE26]] ] +// CHECK1-NEXT: store float [[COND28]], float* [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP53]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = bitcast float* [[TMP3]] to i32* +// CHECK1-NEXT: [[TMP61:%.*]] = cmpxchg i32* [[TMP60]], i32 [[TMP52]], i32 [[TMP59]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP62]] = extractvalue { i32, i1 } [[TMP61]], 0 +// CHECK1-NEXT: [[TMP63:%.*]] = extractvalue { i32, i1 } [[TMP61]], 1 +// CHECK1-NEXT: br i1 [[TMP63]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1: atomic_exit: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: @@ -1100,11 +1080,9 @@ // CHECK2-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK2-NEXT: [[REF_TMP16:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK2-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca float, align 4 -// CHECK2-NEXT: [[REF_TMP17:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK2-NEXT: [[ATOMIC_TEMP27:%.*]] = alloca float, align 4 -// CHECK2-NEXT: [[_TMP28:%.*]] = alloca float, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8 @@ -1233,77 +1211,59 @@ // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: // CHECK2-NEXT: [[TMP43:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = bitcast float* [[TMP0]] to i32* -// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP44]] monotonic, align 4 -// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK2: atomic_cont: -// CHECK2-NEXT: [[TMP45:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP53:%.*]], [[ATOMIC_CONT]] ] -// CHECK2-NEXT: [[TMP46:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* -// CHECK2-NEXT: [[TMP47:%.*]] = bitcast i32 [[TMP45]] to float -// CHECK2-NEXT: store float [[TMP47]], float* [[TMP]], align 4 -// CHECK2-NEXT: [[TMP48:%.*]] = load float, float* [[TMP]], align 4 -// CHECK2-NEXT: [[TMP49:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = fadd float [[TMP48]], [[TMP49]] -// CHECK2-NEXT: store float [[ADD15]], float* [[ATOMIC_TEMP]], align 4 -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK2-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP0]] to i32* -// CHECK2-NEXT: [[TMP52:%.*]] = cmpxchg i32* [[TMP51]], i32 [[TMP45]], i32 [[TMP50]] monotonic monotonic, align 4 -// CHECK2-NEXT: [[TMP53]] = extractvalue { i32, i1 } [[TMP52]], 0 -// CHECK2-NEXT: [[TMP54:%.*]] = extractvalue { i32, i1 } [[TMP52]], 1 -// CHECK2-NEXT: br i1 [[TMP54]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK2: atomic_exit: +// CHECK2-NEXT: [[TMP44:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP43]] monotonic, align 4 // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK2-NEXT: [[TMP55:%.*]] = bitcast %struct.S* [[TMP1]] to i8* -// CHECK2-NEXT: [[TMP56:%.*]] = bitcast %struct.S* [[CALL16]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i64 4, i1 false) +// CHECK2-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) +// CHECK2-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP1]] to i8* +// CHECK2-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[CALL15]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[CALL18:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK2-NEXT: [[TOBOOL19:%.*]] = fcmp une float [[CALL18]], 0.000000e+00 -// CHECK2-NEXT: br i1 [[TOBOOL19]], label [[LAND_RHS20:%.*]], label [[LAND_END23:%.*]] -// CHECK2: land.rhs20: -// CHECK2-NEXT: [[CALL21:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK2-NEXT: [[TOBOOL22:%.*]] = fcmp une float [[CALL21]], 0.000000e+00 -// CHECK2-NEXT: br label [[LAND_END23]] -// CHECK2: land.end23: -// CHECK2-NEXT: [[TMP57:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL22]], [[LAND_RHS20]] ] -// CHECK2-NEXT: [[CONV24:%.*]] = uitofp i1 [[TMP57]] to float -// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]], float noundef [[CONV24]]) -// CHECK2-NEXT: [[TMP58:%.*]] = bitcast %struct.S* [[TMP2]] to i8* -// CHECK2-NEXT: [[TMP59:%.*]] = bitcast %struct.S* [[REF_TMP17]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP58]], i8* align 4 [[TMP59]], i64 4, i1 false) -// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]]) #[[ATTR4]] +// CHECK2-NEXT: [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) +// CHECK2-NEXT: [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00 +// CHECK2-NEXT: br i1 [[TOBOOL18]], label [[LAND_RHS19:%.*]], label [[LAND_END22:%.*]] +// CHECK2: land.rhs19: +// CHECK2-NEXT: [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) +// CHECK2-NEXT: [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00 +// CHECK2-NEXT: br label [[LAND_END22]] +// CHECK2: land.end22: +// CHECK2-NEXT: [[TMP47:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL21]], [[LAND_RHS19]] ] +// CHECK2-NEXT: [[CONV23:%.*]] = uitofp i1 [[TMP47]] to float +// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]], float noundef [[CONV23]]) +// CHECK2-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP2]] to i8* +// CHECK2-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[REF_TMP16]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP48]], i8* align 4 [[TMP49]], i64 4, i1 false) +// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[TMP60:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK2-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK2-NEXT: [[ATOMIC_LOAD25:%.*]] = load atomic i32, i32* [[TMP61]] monotonic, align 4 -// CHECK2-NEXT: br label [[ATOMIC_CONT26:%.*]] -// CHECK2: atomic_cont26: -// CHECK2-NEXT: [[TMP62:%.*]] = phi i32 [ [[ATOMIC_LOAD25]], [[LAND_END23]] ], [ [[TMP72:%.*]], [[COND_END32:%.*]] ] -// CHECK2-NEXT: [[TMP63:%.*]] = bitcast float* [[ATOMIC_TEMP27]] to i32* -// CHECK2-NEXT: [[TMP64:%.*]] = bitcast i32 [[TMP62]] to float -// CHECK2-NEXT: store float [[TMP64]], float* [[_TMP28]], align 4 -// CHECK2-NEXT: [[TMP65:%.*]] = load float, float* [[_TMP28]], align 4 -// CHECK2-NEXT: [[TMP66:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK2-NEXT: [[CMP29:%.*]] = fcmp olt float [[TMP65]], [[TMP66]] -// CHECK2-NEXT: br i1 [[CMP29]], label [[COND_TRUE30:%.*]], label [[COND_FALSE31:%.*]] -// CHECK2: cond.true30: -// CHECK2-NEXT: [[TMP67:%.*]] = load float, float* [[_TMP28]], align 4 -// CHECK2-NEXT: br label [[COND_END32]] -// CHECK2: cond.false31: -// CHECK2-NEXT: [[TMP68:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK2-NEXT: br label [[COND_END32]] -// CHECK2: cond.end32: -// CHECK2-NEXT: [[COND33:%.*]] = phi float [ [[TMP67]], [[COND_TRUE30]] ], [ [[TMP68]], [[COND_FALSE31]] ] -// CHECK2-NEXT: store float [[COND33]], float* [[ATOMIC_TEMP27]], align 4 -// CHECK2-NEXT: [[TMP69:%.*]] = load i32, i32* [[TMP63]], align 4 -// CHECK2-NEXT: [[TMP70:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK2-NEXT: [[TMP71:%.*]] = cmpxchg i32* [[TMP70]], i32 [[TMP62]], i32 [[TMP69]] monotonic monotonic, align 4 -// CHECK2-NEXT: [[TMP72]] = extractvalue { i32, i1 } [[TMP71]], 0 -// CHECK2-NEXT: [[TMP73:%.*]] = extractvalue { i32, i1 } [[TMP71]], 1 -// CHECK2-NEXT: br i1 [[TMP73]], label [[ATOMIC_EXIT34:%.*]], label [[ATOMIC_CONT26]] -// CHECK2: atomic_exit34: +// CHECK2-NEXT: [[TMP50:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP3]] to i32* +// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP51]] monotonic, align 4 +// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] +// CHECK2: atomic_cont: +// CHECK2-NEXT: [[TMP52:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END22]] ], [ [[TMP62:%.*]], [[COND_END27:%.*]] ] +// CHECK2-NEXT: [[TMP53:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* +// CHECK2-NEXT: [[TMP54:%.*]] = bitcast i32 [[TMP52]] to float +// CHECK2-NEXT: store float [[TMP54]], float* [[TMP]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = load float, float* [[TMP]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK2-NEXT: [[CMP24:%.*]] = fcmp olt float [[TMP55]], [[TMP56]] +// CHECK2-NEXT: br i1 [[CMP24]], label [[COND_TRUE25:%.*]], label [[COND_FALSE26:%.*]] +// CHECK2: cond.true25: +// CHECK2-NEXT: [[TMP57:%.*]] = load float, float* [[TMP]], align 4 +// CHECK2-NEXT: br label [[COND_END27]] +// CHECK2: cond.false26: +// CHECK2-NEXT: [[TMP58:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK2-NEXT: br label [[COND_END27]] +// CHECK2: cond.end27: +// CHECK2-NEXT: [[COND28:%.*]] = phi float [ [[TMP57]], [[COND_TRUE25]] ], [ [[TMP58]], [[COND_FALSE26]] ] +// CHECK2-NEXT: store float [[COND28]], float* [[ATOMIC_TEMP]], align 4 +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP53]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = bitcast float* [[TMP3]] to i32* +// CHECK2-NEXT: [[TMP61:%.*]] = cmpxchg i32* [[TMP60]], i32 [[TMP52]], i32 [[TMP59]] monotonic monotonic, align 4 +// CHECK2-NEXT: [[TMP62]] = extractvalue { i32, i1 } [[TMP61]], 0 +// CHECK2-NEXT: [[TMP63:%.*]] = extractvalue { i32, i1 } [[TMP61]], 1 +// CHECK2-NEXT: br i1 [[TMP63]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK2: atomic_exit: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: @@ -1868,8 +1828,6 @@ // CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[TMP:%.*]] = alloca double, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 @@ -1933,23 +1891,7 @@ // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: // CHECK3-NEXT: [[TMP17:%.*]] = load double, double* [[G]], align 8 -// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8 -// CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK3: atomic_cont: -// CHECK3-NEXT: [[TMP18:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP25:%.*]], [[ATOMIC_CONT]] ] -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64* -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i64 [[TMP18]] to double -// CHECK3-NEXT: store double [[TMP20]], double* [[TMP]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load double, double* [[TMP]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[G]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP21]], [[TMP22]] -// CHECK3-NEXT: store double [[ADD2]], double* [[ATOMIC_TEMP]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load i64, i64* [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP18]], i64 [[TMP23]] monotonic monotonic, align 8 -// CHECK3-NEXT: [[TMP25]] = extractvalue { i64, i1 } [[TMP24]], 0 -// CHECK3-NEXT: [[TMP26:%.*]] = extractvalue { i64, i1 } [[TMP24]], 1 -// CHECK3-NEXT: br i1 [[TMP26]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK3: atomic_exit: +// CHECK3-NEXT: [[TMP18:%.*]] = atomicrmw fadd double* @g, double [[TMP17]] monotonic, align 8 // CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: @@ -2017,8 +1959,6 @@ // CHECK4-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double }>, align 8 // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK4-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8 -// CHECK4-NEXT: [[TMP:%.*]] = alloca double, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 @@ -2099,23 +2039,7 @@ // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: // CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[G]], align 8 -// CHECK4-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8 -// CHECK4-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK4: atomic_cont: -// CHECK4-NEXT: [[TMP23:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], [[ATOMIC_CONT]] ] -// CHECK4-NEXT: [[TMP24:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64* -// CHECK4-NEXT: [[TMP25:%.*]] = bitcast i64 [[TMP23]] to double -// CHECK4-NEXT: store double [[TMP25]], double* [[TMP]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load double, double* [[TMP]], align 8 -// CHECK4-NEXT: [[TMP27:%.*]] = load double, double* [[G]], align 8 -// CHECK4-NEXT: [[ADD2:%.*]] = fadd double [[TMP26]], [[TMP27]] -// CHECK4-NEXT: store double [[ADD2]], double* [[ATOMIC_TEMP]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = load i64, i64* [[TMP24]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP23]], i64 [[TMP28]] monotonic monotonic, align 8 -// CHECK4-NEXT: [[TMP30]] = extractvalue { i64, i1 } [[TMP29]], 0 -// CHECK4-NEXT: [[TMP31:%.*]] = extractvalue { i64, i1 } [[TMP29]], 1 -// CHECK4-NEXT: br i1 [[TMP31]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK4: atomic_exit: +// CHECK4-NEXT: [[TMP23:%.*]] = atomicrmw fadd double* @g, double [[TMP22]] monotonic, align 8 // CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: