diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4668,11 +4668,12 @@ CaptureExpr->getExprLoc()); } -static ExprResult buildCapture(Sema &S, Expr *CaptureExpr, DeclRefExpr *&Ref) { +static ExprResult buildCapture(Sema &S, Expr *CaptureExpr, DeclRefExpr *&Ref, + StringRef Name) { CaptureExpr = S.DefaultLvalueConversion(CaptureExpr).get(); if (!Ref) { OMPCapturedExprDecl *CD = buildCaptureDecl( - S, &S.getASTContext().Idents.get(".capture_expr."), CaptureExpr, + S, &S.getASTContext().Idents.get(Name), CaptureExpr, /*WithInit=*/true, S.CurContext, /*AsExpression=*/true); Ref = buildDeclRefExpr(S, CD, CD->getType().getNonReferenceType(), CaptureExpr->getExprLoc()); @@ -8441,7 +8442,8 @@ static ExprResult tryBuildCapture(Sema &SemaRef, Expr *Capture, - llvm::MapVector &Captures) { + llvm::MapVector &Captures, + StringRef Name = ".capture_expr.") { if (SemaRef.CurContext->isDependentContext() || Capture->containsErrors()) return Capture; if (Capture->isEvaluatable(SemaRef.Context, Expr::SE_AllowSideEffects)) @@ -8450,9 +8452,9 @@ /*AllowExplicit=*/true); auto I = Captures.find(Capture); if (I != Captures.end()) - return buildCapture(SemaRef, Capture, I->second); + return buildCapture(SemaRef, Capture, I->second, Name); DeclRefExpr *Ref = nullptr; - ExprResult Res = buildCapture(SemaRef, Capture, Ref); + ExprResult Res = buildCapture(SemaRef, Capture, Ref, Name); Captures[Capture] = Ref; return Res; } @@ -8464,7 +8466,7 @@ Expr *Lower, Expr *Upper, Expr *Step, QualType LCTy, bool TestIsStrictOp, bool RoundToStep, llvm::MapVector &Captures) { - ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures); + ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures, ".new_step"); if (!NewStep.isUsable()) return nullptr; llvm::APSInt LRes, SRes; @@ -8640,8 +8642,8 @@ return nullptr; Expr *LBVal = LB; Expr *UBVal = UB; - // LB = TestIsLessOp.getValue() ? min(LB(MinVal), LB(MaxVal)) : - // max(LB(MinVal), LB(MaxVal)) + // OuterVar = (LB = TestIsLessOp.getValue() ? min(LB(MinVal), LB(MaxVal)) : + // max(LB(MinVal), LB(MaxVal))) if (InitDependOnLC) { const LoopIterationSpace &IS = ResultIterSpaces[*InitDependOnLC - 1]; if (!IS.MinValue || !IS.MaxValue) @@ -8686,8 +8688,10 @@ if (!LBMaxVal.isUsable()) return nullptr; - Expr *LBMin = tryBuildCapture(SemaRef, LBMinVal.get(), Captures).get(); - Expr *LBMax = tryBuildCapture(SemaRef, LBMaxVal.get(), Captures).get(); + Expr *LBMin = + tryBuildCapture(SemaRef, LBMinVal.get(), Captures, ".lb_min").get(); + Expr *LBMax = + tryBuildCapture(SemaRef, LBMaxVal.get(), Captures, ".lb_max").get(); if (!LBMin || !LBMax) return nullptr; // LB(MinVal) < LB(MaxVal) @@ -8696,7 +8700,8 @@ if (!MinLessMaxRes.isUsable()) return nullptr; Expr *MinLessMax = - tryBuildCapture(SemaRef, MinLessMaxRes.get(), Captures).get(); + tryBuildCapture(SemaRef, MinLessMaxRes.get(), Captures, ".min_less_max") + .get(); if (!MinLessMax) return nullptr; if (*TestIsLessOp) { @@ -8716,6 +8721,12 @@ return nullptr; LBVal = MaxLB.get(); } + // OuterVar = LB + LBMinVal = + SemaRef.BuildBinOp(S, DefaultLoc, BO_Assign, IS.CounterVar, LBVal); + if (!LBMinVal.isUsable()) + return nullptr; + LBVal = LBMinVal.get(); } // UB = TestIsLessOp.getValue() ? max(UB(MinVal), UB(MaxVal)) : // min(UB(MinVal), UB(MaxVal)) @@ -8763,8 +8774,10 @@ if (!UBMaxVal.isUsable()) return nullptr; - Expr *UBMin = tryBuildCapture(SemaRef, UBMinVal.get(), Captures).get(); - Expr *UBMax = tryBuildCapture(SemaRef, UBMaxVal.get(), Captures).get(); + Expr *UBMin = + tryBuildCapture(SemaRef, UBMinVal.get(), Captures, ".ub_min").get(); + Expr *UBMax = + tryBuildCapture(SemaRef, UBMaxVal.get(), Captures, ".ub_max").get(); if (!UBMin || !UBMax) return nullptr; // UB(MinVal) > UB(MaxVal) @@ -8772,8 +8785,9 @@ SemaRef.BuildBinOp(S, DefaultLoc, BO_GT, UBMin, UBMax); if (!MinGreaterMaxRes.isUsable()) return nullptr; - Expr *MinGreaterMax = - tryBuildCapture(SemaRef, MinGreaterMaxRes.get(), Captures).get(); + Expr *MinGreaterMax = tryBuildCapture(SemaRef, MinGreaterMaxRes.get(), + Captures, ".min_greater_max") + .get(); if (!MinGreaterMax) return nullptr; if (*TestIsLessOp) { @@ -8796,8 +8810,8 @@ } Expr *UBExpr = *TestIsLessOp ? UBVal : LBVal; Expr *LBExpr = *TestIsLessOp ? LBVal : UBVal; - Expr *Upper = tryBuildCapture(SemaRef, UBExpr, Captures).get(); - Expr *Lower = tryBuildCapture(SemaRef, LBExpr, Captures).get(); + Expr *Upper = tryBuildCapture(SemaRef, UBExpr, Captures, ".upper").get(); + Expr *Lower = tryBuildCapture(SemaRef, LBExpr, Captures, ".lower").get(); if (!Upper || !Lower) return nullptr; @@ -8891,7 +8905,7 @@ if (!Diff.isUsable()) return std::make_pair(nullptr, nullptr); - ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures); + ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures, ".new_step"); if (!NewStep.isUsable()) return std::make_pair(nullptr, nullptr); Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Mul, Diff.get(), NewStep.get()); diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp --- a/clang/test/OpenMP/for_codegen.cpp +++ b/clang/test/OpenMP/for_codegen.cpp @@ -78,6 +78,7 @@ // CHECK: br label %[[EXIT]] // CHECK: [[EXIT]]: // CHECK: [[J_LB_VAL:%.+]] = phi i32 [ [[J_LB_MIN_VAL]], %[[TRUE]] ], [ [[J_LB_MAX_VAL]], %[[FALSE]] ] + // CHECK: store i32 [[J_LB_VAL]], ptr [[I_TMP]], // CHECK: store i32 [[J_LB_VAL]], ptr [[J_LB:%.+]], // CHECK: [[J_UB_VAL:%.+]] = load i32, ptr [[J_UB]], // CHECK: [[J_LB_VAL:%.+]] = load i32, ptr [[J_LB]], diff --git a/clang/test/OpenMP/tile_codegen.cpp b/clang/test/OpenMP/tile_codegen.cpp --- a/clang/test/OpenMP/tile_codegen.cpp +++ b/clang/test/OpenMP/tile_codegen.cpp @@ -179,8 +179,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 @@ -191,56 +191,56 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[END_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTNEW_STEP]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], [[TMP5]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP5]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP6]] -// CHECK1-NEXT: [[SUB5:%.*]] = sub i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]] -// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END18:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP7]], [[ADD5]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END17:%.*]] // CHECK1: for.body: // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: br label [[FOR_COND7:%.*]] -// CHECK1: for.cond7: +// CHECK1-NEXT: br label [[FOR_COND6:%.*]] +// CHECK1: for.cond6: // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP11]], 1 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP12]], 5 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[ADD8]], [[ADD9]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP13]], 1 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP14]], 5 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[ADD11]], [[COND_TRUE]] ], [ [[ADD12]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CMP13:%.*]] = icmp ult i32 [[TMP10]], [[COND]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END:%.*]] -// CHECK1: for.body14: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP10]], [[COND]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[FOR_BODY13:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body13: // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP15]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[I]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add i32 [[TMP15]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 // CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP18]]) // CHECK1-NEXT: br label [[FOR_INC:%.*]] @@ -248,15 +248,15 @@ // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: for.end: -// CHECK1-NEXT: br label [[FOR_INC16:%.*]] -// CHECK1: for.inc16: +// CHECK1-NEXT: br label [[FOR_INC15:%.*]] +// CHECK1: for.inc15: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 5 -// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 5 +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] -// CHECK1: for.end18: +// CHECK1: for.end17: // CHECK1-NEXT: ret void // // @@ -1173,8 +1173,8 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 @@ -1185,56 +1185,56 @@ // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[END_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTNEW_STEP]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], [[TMP5]] -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP5]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP6]] -// CHECK2-NEXT: [[SUB5:%.*]] = sub i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]] -// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END18:%.*]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP7]], [[ADD5]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END17:%.*]] // CHECK2: for.body: // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND7:%.*]] -// CHECK2: for.cond7: +// CHECK2-NEXT: br label [[FOR_COND6:%.*]] +// CHECK2: for.cond6: // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add i32 [[TMP11]], 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add i32 [[TMP11]], 1 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP12]], 5 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp ult i32 [[ADD8]], [[ADD9]] -// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP13]], 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add i32 [[TMP13]], 1 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP14]], 5 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[ADD11]], [[COND_TRUE]] ], [ [[ADD12]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CMP13:%.*]] = icmp ult i32 [[TMP10]], [[COND]] -// CHECK2-NEXT: br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END:%.*]] -// CHECK2: for.body14: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP10]], [[COND]] +// CHECK2-NEXT: br i1 [[CMP12]], label [[FOR_BODY13:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body13: // CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP15]], [[MUL]] -// CHECK2-NEXT: store i32 [[ADD15]], ptr [[I]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add i32 [[TMP15]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 // CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP18]]) // CHECK2-NEXT: br label [[FOR_INC:%.*]] @@ -1242,15 +1242,15 @@ // CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: br label [[FOR_INC16:%.*]] -// CHECK2: for.inc16: +// CHECK2-NEXT: br label [[FOR_INC15:%.*]] +// CHECK2: for.inc15: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 5 -// CHECK2-NEXT: store i32 [[ADD17]], ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 5 +// CHECK2-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] -// CHECK2: for.end18: +// CHECK2: for.end17: // CHECK2-NEXT: ret void // // diff --git a/openmp/runtime/test/worksharing/for/omp_for_collapse_non_rectangular.c b/openmp/runtime/test/worksharing/for/omp_for_collapse_non_rectangular.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/worksharing/for/omp_for_collapse_non_rectangular.c @@ -0,0 +1,22 @@ +// RUN: %libomp-compile-and-run + +#include + +#define N 3 + +int arr[N][N][N]; +int main() { +#pragma omp for collapse(3) + for (unsigned int i = 0; i < N; ++i) + for (unsigned int j = i; j < N; ++j) + for (unsigned int k = j; k < N; ++k) + arr[i][j][k] = 1; + int num_failed = 0; + for (unsigned int i = 0; i < N; ++i) + for (unsigned int j = 0; j < N; ++j) + for (unsigned int k = 0; k < N; ++k) + if (arr[i][j][k] == (j >= i && k >= j) ? 0 : 1) + ++num_failed; + + return num_failed; +}