diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5327,6 +5327,8 @@ IntegerLiteral *Zero = IntegerLiteral::Create( Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 0), LogicalTy, {}); + IntegerLiteral *One = IntegerLiteral::Create( + Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 1), LogicalTy, {}); Expr *Dist; if (Rel == BO_NE) { // When using a != comparison, the increment can be +1 or -1. This can be @@ -5381,18 +5383,25 @@ if (Rel == BO_LE || Rel == BO_GE) { // Add one to the range if the relational operator is inclusive. - Range = AssertSuccess(Actions.BuildBinOp( - nullptr, {}, BO_Add, Range, - Actions.ActOnIntegerConstant(SourceLocation(), 1).get())); + Range = + AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_Add, Range, One)); } - // Divide by the absolute step amount. + // Divide by the absolute step amount. If the range is not a multiple of + // the step size, rounding-up the effective upper bound ensures that the + // last iteration is included. + // Note that the rounding-up may cause an overflow in a temporry that + // could be avoided, but would have occured in a C-style for-loop as well. Expr *Divisor = BuildVarRef(NewStep); if (Rel == BO_GE || Rel == BO_GT) Divisor = AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_Minus, Divisor)); + Expr *DivisorMinusOne = + AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_Sub, Divisor, One)); + Expr *RangeRoundUp = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, BO_Add, Range, DivisorMinusOne)); Dist = AssertSuccess( - Actions.BuildBinOp(nullptr, {}, BO_Div, Range, Divisor)); + Actions.BuildBinOp(nullptr, {}, BO_Div, RangeRoundUp, Divisor)); // If there is not at least one iteration, the range contains garbage. Fix // to zero in this case. diff --git a/clang/test/OpenMP/irbuilder_for_unsigned.c b/clang/test/OpenMP/irbuilder_for_unsigned.c --- a/clang/test/OpenMP/irbuilder_for_unsigned.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned.c @@ -123,14 +123,17 @@ // CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTSTART]], align 4 // CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP6]], [[TMP7]] // CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTSTEP]], align 4 -// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], [[TMP8]] +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] // CHECK-NEXT: br label [[COND_END:%.*]] // CHECK: cond.false: // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 [[COND]], i32* [[TMP9]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], i32* [[TMP10]], align 4 // CHECK-NEXT: ret void // // diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_for_unsigned_down.c copy from clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c copy to clang/test/OpenMP/irbuilder_for_unsigned_down.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_down.c @@ -1,25 +1,23 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic( +// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_down( // CHECK-NEXT: [[ENTRY:.*]]: // CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8 // CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 // CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 // CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 // CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 +// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 // CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8 -// CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8 -// CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8 -// CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, i32* %[[I]], align 4 +// CHECK-NEXT: store i32 32000000, i32* %[[I]], align 4 // CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0 // CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8 // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0 @@ -30,6 +28,16 @@ // CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: +// CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: %[[TMP3:.+]] = sub i32 %[[DOTCOUNT]], 1 +// CHECK-NEXT: store i32 %[[TMP3]], i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4 +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4 +// CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4 +// CHECK-NEXT: %[[TMP6:.+]] = sub i32 %[[TMP5]], %[[TMP4]] +// CHECK-NEXT: %[[TMP7:.+]] = add i32 %[[TMP6]], 1 // CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_HEADER]]: @@ -37,53 +45,48 @@ // CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]] +// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[TMP7]] // CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, i32* %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4 +// CHECK-NEXT: %[[TMP8:.+]] = add i32 %[[OMP_LOOP_IV]], %[[TMP4]] +// CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP8]], %struct.anon.0* %[[AGG_CAPTURED1]]) +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[CONV:.+]] = uitofp i32 %[[TMP9]] to float +// CHECK-NEXT: %[[TMP10:.+]] = load float*, float** %[[A_ADDR]], align 8 +// CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[I]], align 4 +// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP11]] to i64 +// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP10]], i64 %[[IDXPROM]] +// CHECK-NEXT: store float %[[CONV]], float* %[[ARRAYIDX]], align 4 // CHECK-NEXT: br label %[[OMP_LOOP_INC]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_INC]]: // CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] +// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_EXIT]]: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM2:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM2]]) // CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_AFTER]]: // CHECK-NEXT: ret void // CHECK-NEXT: } -void unroll_partial_heuristic(float *a, float *b, float *c, float *d) { -#pragma omp unroll partial - for (int i = 0; i < 2; i++) { - a[i] = b[i] * c[i] * d[i]; + +extern "C" void workshareloop_unsigned_down(float *a) { +#pragma omp for + for (unsigned i = 32000000; i > 33; i -= 7) { + a[i] = i; } } #endif // HEADER +// +// +// +// +// // CHECK-LABEL: define {{.*}}@__captured_stmt( // CHECK-NEXT: [[ENTRY:.*]]: @@ -99,19 +102,24 @@ // CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 // CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: store i32 33, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 -7, i32* %[[DOTSTEP]], align 4 // CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] +// CHECK-NEXT: %[[CMP:.+]] = icmp ugt i32 %[[TMP4]], %[[TMP5]] // CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] +// CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTSTART]], align 4 +// CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTOP]], align 4 +// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub nsw i32 0, %[[TMP8]] +// CHECK-NEXT: %[[SUB2:.+]] = sub i32 %[[SUB1]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB2]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[SUB3:.+]] = sub nsw i32 0, %[[TMP9]] +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[SUB3]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -119,8 +127,8 @@ // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } @@ -137,7 +145,7 @@ // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0 // CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] +// CHECK-NEXT: %[[MUL:.+]] = mul i32 -7, %[[TMP3]] // CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] // CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4 @@ -146,7 +154,5 @@ // CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} +// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} // CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} diff --git a/clang/test/OpenMP/irbuilder_unroll_full.c b/clang/test/OpenMP/irbuilder_unroll_full.c --- a/clang/test/OpenMP/irbuilder_unroll_full.c +++ b/clang/test/OpenMP/irbuilder_unroll_full.c @@ -111,7 +111,10 @@ // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -119,8 +122,8 @@ // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_heuristic.c --- a/clang/test/OpenMP/irbuilder_unroll_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_heuristic.c @@ -112,7 +112,10 @@ // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -120,8 +123,8 @@ // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor.c @@ -111,7 +111,10 @@ // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -119,8 +122,8 @@ // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c @@ -180,7 +180,10 @@ // CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]] // CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP11]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP12:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP12]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -188,8 +191,8 @@ // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP12]], align 4 +// CHECK-NEXT: %[[TMP13:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP13]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c @@ -111,7 +111,10 @@ // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -119,8 +122,8 @@ // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c @@ -201,7 +201,10 @@ // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -209,8 +212,8 @@ // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c @@ -206,7 +206,10 @@ // CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]] // CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP11]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP12:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP12]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -214,8 +217,8 @@ // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP12]], align 4 +// CHECK-NEXT: %[[TMP13:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP13]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c @@ -173,7 +173,10 @@ // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -181,8 +184,8 @@ // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c @@ -154,7 +154,10 @@ // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]] +// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 +// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] +// CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4 +// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] // CHECK-NEXT: br label %[[COND_END:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[COND_FALSE]]: @@ -162,8 +165,8 @@ // CHECK-EMPTY: // CHECK-NEXT: [[COND_END]]: // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP9]], align 4 +// CHECK-NEXT: %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP10]], align 4 // CHECK-NEXT: ret void // CHECK-NEXT: }