diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -157,7 +157,6 @@ namespace { -typedef SmallSet SCEVSet; typedef DenseMap SCEVConstValPairMap; /// A helper class to do the following SCEV expression conversions. @@ -165,13 +164,14 @@ /// 2) "SOME_CONSTANT_VALUE smax %val" to "%val" class SCEVExprConverter { public: + Loop *CurLoop; ScalarEvolution &SE; SCEVConstValPairMap CheckSltMap; SCEVExprConverter(ScalarEvolution &SE) : SE(SE) {} - const SCEV *convertSCEV(const SCEV *Expr); + const SCEV *convertSCEV(const SCEV *Expr, bool AddRuntimCheck); }; class LoopIdiomRecognize { @@ -349,7 +349,8 @@ } // end anonymous namespace /// Implementation of SCEVExprConverter. -const SCEV *SCEVExprConverter::convertSCEV(const SCEV *Expr) { +/// Tries to fold the SCEV with regard to loop guards of CurLoop +const SCEV *SCEVExprConverter::convertSCEV(const SCEV *Expr, bool AddRuntimeCheck) { switch (Expr->getSCEVType()) { case scConstant: case scUnknown: @@ -358,98 +359,108 @@ case scTruncate: { const SCEVTruncateExpr *Trunc = cast(Expr); Type *Ty = Trunc->getType(); - const SCEV *NewTrunc = convertSCEV(Trunc->getOperand()); + const SCEV *NewTrunc = convertSCEV(Trunc->getOperand(), AddRuntimeCheck); return SE.getTruncateExpr(NewTrunc, Ty); } case scZeroExtend: { const SCEVZeroExtendExpr *Zext = cast(Expr); Type *Ty = Zext->getType(); - const SCEV *NewZext = convertSCEV(Zext->getOperand()); + const SCEV *NewZext = convertSCEV(Zext->getOperand(), AddRuntimeCheck); return SE.getZeroExtendExpr(NewZext, Ty); } case scSignExtend: { - // Record the original SCEV sext expression, and - // convert it to zext. + // Return original SCEV if expression is not guarded by loop with Zero and + // dont want to add runtime check. Otherwise we fold constant and add appropriate + // runtime check. const SCEVSignExtendExpr *Sext = cast(Expr); - if (CheckSltMap[Sext] < 0) - CheckSltMap[Sext] = 0; - Type *Ty = Sext->getType(); - const SCEV *NewZext = convertSCEV(Sext->getOperand()); - return SE.getZeroExtendExpr(NewZext, Ty); + if (SE.isLoopEntryGuardedByCond(CurLoop, ICmpInst::ICMP_SGE, Sext, SE.getZero(Sext->getType())) == false) { + if (AddRuntimeCheck == false) + return Sext; + if (CheckSltMap[Sext] < 0) + CheckSltMap[Sext] = 0; + } + const SCEV *NewZext = convertSCEV(Sext->getOperand(), AddRuntimeCheck); + return SE.getZeroExtendExpr(NewZext, Sext->getType()); } case scAddExpr: { const SCEVAddExpr *Add = cast(Expr); - const SCEV *NewAdd = convertSCEV(Add->getOperand(0)); + const SCEV *NewAdd = convertSCEV(Add->getOperand(0), AddRuntimeCheck); for (int I = 1, E = Add->getNumOperands(); I != E; ++I) { - NewAdd = SE.getAddExpr(NewAdd, convertSCEV(Add->getOperand(I))); + NewAdd = SE.getAddExpr(NewAdd, convertSCEV(Add->getOperand(I), AddRuntimeCheck)); } return NewAdd; } case scMulExpr: { const SCEVMulExpr *Mul = cast(Expr); - const SCEV *NewMul = convertSCEV(Mul->getOperand(0)); + const SCEV *NewMul = convertSCEV(Mul->getOperand(0), AddRuntimeCheck); for (int I = 1, E = Mul->getNumOperands(); I != E; ++I) { - NewMul = SE.getMulExpr(NewMul, convertSCEV(Mul->getOperand(I))); + NewMul = SE.getMulExpr(NewMul, convertSCEV(Mul->getOperand(I), AddRuntimeCheck)); } return NewMul; } case scUDivExpr: { const SCEVUDivExpr *UDiv = cast(Expr); - const SCEV *NewLHS = convertSCEV(UDiv->getLHS()); - const SCEV *NewRHS = convertSCEV(UDiv->getRHS()); + const SCEV *NewLHS = convertSCEV(UDiv->getLHS(), AddRuntimeCheck); + const SCEV *NewRHS = convertSCEV(UDiv->getRHS(), AddRuntimeCheck); return SE.getUDivExpr(NewLHS, NewRHS); } case scAddRecExpr: assert(false && "Do not expect AddRec here!"); case scUMaxExpr: { const SCEVUMaxExpr *UMax = cast(Expr); - const SCEV *NewUMax = convertSCEV(UMax->getOperand(0)); + const SCEV *NewUMax = convertSCEV(UMax->getOperand(0), AddRuntimeCheck); for (int I = 1, E = UMax->getNumOperands(); I != E; ++I) { - NewUMax = SE.getUMaxExpr(NewUMax, convertSCEV(UMax->getOperand(I))); + NewUMax = SE.getUMaxExpr(NewUMax, convertSCEV(UMax->getOperand(I), AddRuntimeCheck)); } return NewUMax; } case scSMaxExpr: { + // Return original SCEV if expression is not guarded by loop with Constant and + // dont want to add runtime check. Otherwise we fold constant and add appropriate + // runtime check. const SCEVSMaxExpr *SMax = cast(Expr); const int NumOfOps = SMax->getNumOperands(); bool Fold = false; - // If an operand is constant zero, it will be the first operand. + // If an operand is constant, it will be the first operand. const SCEV *SMaxOp0 = SMax->getOperand(0); - const SCEVConstant *LHSC = dyn_cast(SMaxOp0); + const SCEVConstant *Cst = dyn_cast(SMaxOp0); - if (LHSC) { - // fold the constant with the other operands. there will be - // runtime-check to check our assumption for folding the smax is - // feasible. + if (Cst) { + // check if the operand is guarded to the constant + // if not, return orignal expression Fold = true; for (int I = 1, E = NumOfOps; I != E; ++I) { auto Ev = SMax->getOperand(I); - auto Cst = LHSC->getAPInt().roundToDouble(); - if (CheckSltMap[Ev] < Cst) - CheckSltMap[Ev] = Cst; + if (SE.isLoopEntryGuardedByCond(CurLoop, ICmpInst::ICMP_SGE, Ev, Cst) == false) { + if (AddRuntimeCheck == false) + return SMax; + int CstValue = Cst->getAPInt().roundToDouble(); + if (CheckSltMap[SMax] < CstValue) + CheckSltMap[SMax] = CstValue; + } } } const int StartIdx = Fold ? 1 : 0; - const SCEV *NewSMax = convertSCEV(SMax->getOperand(StartIdx)); + const SCEV *NewSMax = convertSCEV(SMax->getOperand(StartIdx), AddRuntimeCheck); for (int I = StartIdx + 1, E = NumOfOps; I != E; ++I) { - NewSMax = SE.getSMaxExpr(NewSMax, convertSCEV(SMax->getOperand(I))); + NewSMax = SE.getSMaxExpr(NewSMax, convertSCEV(SMax->getOperand(I), AddRuntimeCheck)); } return NewSMax; } case scUMinExpr: { const SCEVUMinExpr *UMin = cast(Expr); - const SCEV *NewUMin = convertSCEV(UMin->getOperand(0)); + const SCEV *NewUMin = convertSCEV(UMin->getOperand(0), AddRuntimeCheck); for (int I = 1, E = UMin->getNumOperands(); I != E; ++I) { - NewUMin = SE.getUMinExpr(NewUMin, convertSCEV(UMin->getOperand(I))); + NewUMin = SE.getUMinExpr(NewUMin, convertSCEV(UMin->getOperand(I), AddRuntimeCheck)); } return NewUMin; } case scSMinExpr: { const SCEVSMinExpr *SMin = cast(Expr); - const SCEV *NewSMin = convertSCEV(SMin->getOperand(0)); + const SCEV *NewSMin = convertSCEV(SMin->getOperand(0), AddRuntimeCheck); for (int I = 1, E = SMin->getNumOperands(); I != E; ++I) { - NewSMin = SE.getSMinExpr(NewSMin, convertSCEV(SMin->getOperand(I))); + NewSMin = SE.getSMinExpr(NewSMin, convertSCEV(SMin->getOperand(I), AddRuntimeCheck)); } return NewSMin; } @@ -1196,10 +1207,11 @@ // if they are equal. If they match, then we know that every byte is // touched in the loop. We only handle memset length and stride that // are invariant for the top level loop. + // To be conservative, in runtime we would not promote pointers that isn't + // in address space zero LLVM_DEBUG(dbgs() << " memset size is non-constant\n"); - if (LN == nullptr) { - LLVM_DEBUG(dbgs() << " need to call LNIR for non-constant memset" - << "optimization\n"); + if (Pointer->getType()->getPointerAddressSpace() != 0) { + LLVM_DEBUG(dbgs() << " pointer is not in address space zero\n"); return false; } if (!SE->isLoopInvariant(MemsetSizeSCEV, TopLoop) || @@ -1218,23 +1230,37 @@ << "\n"); if (PositiveStrideSCEV != MemsetSizeSCEV) { - // We will convert the SCEV expressions, and compare again. - // required conversion to SCEV will be saved inside Converter. - // if this function returns true, which means the optimization does happen, - // the pair will be added when we return to processLoopMemIntrinsic. + // If the original StrideSCEV and MemsetSizeSCEV does not match, the pass will + // fold expressions that is covered by the loop guard at loop entry. + // We will compare again after the folding and proceed if equal Converter.CheckSltMap.clear(); - const SCEV *PositiveStrideSCEVConv = - Converter.convertSCEV(PositiveStrideSCEV); - const SCEV *MemsetSizeSCEVConv = - Converter.convertSCEV(MemsetSizeSCEV); - LLVM_DEBUG(dbgs() << " Try to convert SCEV expression and compare again\n" - << " MemsetSCEVConv: " << *MemsetSizeSCEVConv << "\n" - << " PositiveStrideSCEVConv: " - << *PositiveStrideSCEVConv << "\n"); - - if (PositiveStrideSCEVConv != MemsetSizeSCEVConv) { - LLVM_DEBUG(dbgs() << " Converted SCEV still inequal, abort\n"); - return false; + Converter.CurLoop = CurLoop; + const SCEV *FoldedPositiveStride = + Converter.convertSCEV(PositiveStrideSCEV, /*AddRuntimeCheck=*/false); + const SCEV *FoldedMemsetSize = + Converter.convertSCEV(MemsetSizeSCEV, /*AddRuntimeCheck=*/false); + LLVM_DEBUG(dbgs() << " Try to fold SCEV expression covered by loop guard\n" + << " FoldedMemsetSCEV: " << *FoldedMemsetSize << "\n" + << " FoldedPositiveStrideSCEV: " + << *FoldedPositiveStride << "\n"); + + if (FoldedPositiveStride != FoldedMemsetSize) { + if (LN == nullptr || ForceNoLoopVersion) { + LLVM_DEBUG(dbgs() << " unable to do loop versioning here, abort\n"); + return false; + } + const SCEV *ConvertedPositiveStride = + Converter.convertSCEV(FoldedPositiveStride, /*AddRuntimeCheck=*/true); + const SCEV *ConvertedMemsetSize = + Converter.convertSCEV(FoldedMemsetSize, /*AddRuntimeCheck=*/true); + LLVM_DEBUG(dbgs() << " Try to convert SCEV expression and add appropriate runtime check\n" + << " ConvertedMemsetSCEV: " << *ConvertedMemsetSize << "\n" + << " ConvertedPositiveStrideSCEV: " + << *ConvertedPositiveStride << "\n"); + if (ConvertedPositiveStride != ConvertedMemsetSize) { + LLVM_DEBUG(dbgs() << " Converted SCEV inequal, abort\n"); + return false; + } } } } @@ -1253,7 +1279,7 @@ // if we have successfully changed with processLoopStridedStore // add the require runtime check information into list. - if (Changed) { + if (Changed && isTopLoopVersioned()) { for (auto Pair : Converter.CheckSltMap) { auto Ev = Pair.first; auto Cst = Pair.second; @@ -1494,16 +1520,18 @@ // NumBytes = TripCount * StoreSize const SCEV *TripCountS = getTripCount(BECount, IntIdxTy, CurLoop, DL, SE); - // This check is possible only for LoopNestIdiomRecognize, since we are - // trying to version on the top-level loop. - // Give up if the store size is not constant and the trip count SCEV - // expression is variant to the top level loop. In this sense versioning is - // needed and compile option enforces not to. - if (LN != nullptr && !SE->isLoopInvariant(TripCountS, TopLoop)) { - const bool IsConstantSize = isa(StoreSizeSCEV); - if (IsLoopMemset && !IsConstantSize && ForceNoLoopVersion) { - LLVM_DEBUG(dbgs() << "requires versioning but abort becuase " - << "ForceNoLoopVersion is set to true\n"); + // If store size is not constant and we need to add runtime checks for the optimization + // to proceed, then versioning is required. + const bool IsConstantSize = isa(StoreSizeSCEV); + if (IsLoopMemset && !IsConstantSize && Converter.CheckSltMap.size()) { + if (LN == nullptr) { + LLVM_DEBUG(dbgs() << "requires versioning but running LoopIdiomRecognizer, " + << "abort (run LoopNestIdiomRecognizer instead support versioning)\n"); + return Changed; + } + if (!SE->isLoopInvariant(TripCountS, TopLoop) || ForceNoLoopVersion) { + LLVM_DEBUG(dbgs() << "abort becuase TripCount is not TopLoop's invariant " + << "or ForceNoLoopVersion = true\n"); return Changed; } } @@ -1540,18 +1568,16 @@ // Here we check whether the top level clone has beed created yet, and create // it if it hasn't. The initial runtime check is set to false and the // conditions would be updated after we process all the loops. - const bool IsConstantSize = isa(StoreSizeSCEV); - if (LN != nullptr && IsLoopMemset && !IsConstantSize && !ForceNoLoopVersion) { - if (!isTopLoopVersioned() && Converter.CheckSltMap.size()) { - LLVM_DEBUG(dbgs() << " Create versioning for top loop because SCEV folding is needed\n"); - versionTopLoop(); - - // If current loop is the top loop, versioning would change the loop's - // preheader to RuntimeCheckBB, so we need to reset the insert point. - if (CurLoop == TopLoop) { - Preheader = CurLoop->getLoopPreheader(); - Builder.SetInsertPoint(Preheader->getTerminator()); - } + if (LN != nullptr && IsLoopMemset && !IsConstantSize && !ForceNoLoopVersion && + !isTopLoopVersioned() && Converter.CheckSltMap.size()) { + LLVM_DEBUG(dbgs() << " Create versioning for top loop because runtime check for SCEV is needed\n"); + versionTopLoop(); + + // If current loop is the top loop, versioning would change the loop's + // preheader to RuntimeCheckBB, so we need to reset the insert point. + if (CurLoop == TopLoop) { + Preheader = CurLoop->getLoopPreheader(); + Builder.SetInsertPoint(Preheader->getTerminator()); } } diff --git a/llvm/test/Transforms/LoopIdiom/memset-runtime.ll b/llvm/test/Transforms/LoopIdiom/memset-runtime-lir.ll rename from llvm/test/Transforms/LoopIdiom/memset-runtime.ll rename to llvm/test/Transforms/LoopIdiom/memset-runtime-lir.ll --- a/llvm/test/Transforms/LoopIdiom/memset-runtime.ll +++ b/llvm/test/Transforms/LoopIdiom/memset-runtime-lir.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes="function(loop(loop-nest-idiom,loop-deletion),simplifycfg)" < %s -S | FileCheck %s +; RUN: opt -passes="function(loop(loop-idiom,loop-deletion),simplifycfg)" < %s -S | FileCheck %s ; The C code to generate this testcase: ; void test(int ar[][m], long n, long m) ; { @@ -12,10 +12,7 @@ ; The optimized IR should be similar to the following: ; void test(int ar[][m], long n, long m) ; { -; if (n < 0 || m < 0 || (n >> 32) != 0 || (4 * m >> 32) != 0) -; /* optimization result identical to LoopIdiomRecognize */ -; else -; /* hoists memset to loop-preheader */ +; memset(ar, 0, m * n * sizeof(int)); ; } define void @test_simple(i32* nocapture %ar, i64 %n, i64 %m) { ; CHECK-LABEL: @test_simple( @@ -65,8 +62,7 @@ ; } define void @test_nested_do_while(i32 %n, i32 %m, i32 %o, i32* nocapture %ar){ ; CHECK-LABEL: @test_nested_do_while( -; CHECK-NEXT: do.body.lver.check: -; CHECK-NEXT: [[AR2:%.*]] = bitcast i32* [[AR:%.*]] to i8* +; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[M:%.*]] to i64 ; CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[O:%.*]] to i64 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV2]], [[CONV]] @@ -77,37 +73,18 @@ ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[CONV2]], [[CONV]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[SMAX]], [[CONV2]] ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[SMAX27]] -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i64 [[CONV]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[CONV2]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label [[DO_BODY_LVER_ORIG:%.*]], label [[DO_BODY_PH:%.*]] -; CHECK: do.body.lver.orig: -; CHECK-NEXT: [[I_0_LVER_ORIG:%.*]] = phi i64 [ [[INC11_LVER_ORIG:%.*]], [[DO_END_LVER_ORIG:%.*]] ], [ 0, [[DO_BODY_LVER_CHECK:%.*]] ] -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP1]], [[I_0_LVER_ORIG]] -; CHECK-NEXT: [[SCEVGEP_LVER_ORIG:%.*]] = getelementptr i32, i32* [[AR]], i64 [[TMP10]] -; CHECK-NEXT: [[SCEVGEP1_LVER_ORIG:%.*]] = bitcast i32* [[SCEVGEP_LVER_ORIG]] to i8* -; CHECK-NEXT: [[MUL3_LVER_ORIG:%.*]] = mul i64 [[MUL]], [[I_0_LVER_ORIG]] -; CHECK-NEXT: [[ADD_PTR_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[AR]], i64 [[MUL3_LVER_ORIG]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[SCEVGEP1_LVER_ORIG]], i8 0, i64 [[TMP3]], i1 false) -; CHECK-NEXT: br label [[DO_BODY1_LVER_ORIG:%.*]] -; CHECK: do.body1.lver.orig: -; CHECK-NEXT: [[J_0_LVER_ORIG:%.*]] = phi i64 [ 0, [[DO_BODY_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[DO_BODY1_LVER_ORIG]] ] -; CHECK-NEXT: [[MUL5_LVER_ORIG:%.*]] = mul nsw i64 [[J_0_LVER_ORIG]], [[CONV2]] -; CHECK-NEXT: [[ADD_PTR6_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[ADD_PTR_LVER_ORIG]], i64 [[MUL5_LVER_ORIG]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ADD_PTR6_LVER_ORIG]] to i8* -; CHECK-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[J_0_LVER_ORIG]], 1 -; CHECK-NEXT: [[EXITCOND_NOT_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[SMAX]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_LVER_ORIG]], label [[DO_END_LVER_ORIG]], label [[DO_BODY1_LVER_ORIG]] -; CHECK: do.end.lver.orig: -; CHECK-NEXT: [[INC11_LVER_ORIG]] = add nuw nsw i64 [[I_0_LVER_ORIG]], 1 -; CHECK-NEXT: [[EXITCOND28_NOT_LVER_ORIG:%.*]] = icmp eq i64 [[INC11_LVER_ORIG]], [[SMAX27]] -; CHECK-NEXT: br i1 [[EXITCOND28_NOT_LVER_ORIG]], label [[DO_END16:%.*]], label [[DO_BODY_LVER_ORIG]] -; CHECK: do.body.ph: -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[AR2]], i8 0, i64 [[TMP5]], i1 false) -; CHECK-NEXT: br label [[DO_END16]] +; CHECK-NEXT: br label [[DO_BODY:%.*]] +; CHECK: do.body: +; CHECK-NEXT: [[I_0:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC11:%.*]], [[DO_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], [[I_0]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[AR:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = bitcast i32* [[SCEVGEP]] to i8* +; CHECK-NEXT: [[MUL3:%.*]] = mul i64 [[MUL]], [[I_0]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[AR]], i64 [[MUL3]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[SCEVGEP1]], i8 0, i64 [[TMP3]], i1 false) +; CHECK-NEXT: [[INC11]] = add nuw nsw i64 [[I_0]], 1 +; CHECK-NEXT: [[EXITCOND28_NOT:%.*]] = icmp eq i64 [[INC11]], [[SMAX27]] +; CHECK-NEXT: br i1 [[EXITCOND28_NOT]], label [[DO_END16:%.*]], label [[DO_BODY]] ; CHECK: do.end16: ; CHECK-NEXT: ret void ; @@ -151,7 +128,7 @@ ; for (int i=0; i> 32) != 0 || (4 * m >> 32) != 0) -; /* optimization result identical to LoopIdiomRecognize */ -; else -; /* hoists memset to loop-preheader */ +; memset(ar, 0, m * n * sizeof(int)); ; } define void @test_simple(i32* nocapture %ar, i64 %n, i64 %m) { ; CHECK-LABEL: @test_simple( @@ -79,15 +76,17 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[SMAX27]] ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i64 [[CONV]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i64 [[CONV]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[CONV2]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label [[DO_BODY_LVER_ORIG:%.*]], label [[DO_BODY_PH:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[SMAX]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]] +; CHECK-NEXT: br i1 [[TMP11]], label [[DO_BODY_LVER_ORIG:%.*]], label [[DO_BODY_PH:%.*]] ; CHECK: do.body.lver.orig: ; CHECK-NEXT: [[I_0_LVER_ORIG:%.*]] = phi i64 [ [[INC11_LVER_ORIG:%.*]], [[DO_END_LVER_ORIG:%.*]] ], [ 0, [[DO_BODY_LVER_CHECK:%.*]] ] -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP1]], [[I_0_LVER_ORIG]] -; CHECK-NEXT: [[SCEVGEP_LVER_ORIG:%.*]] = getelementptr i32, i32* [[AR]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP1]], [[I_0_LVER_ORIG]] +; CHECK-NEXT: [[SCEVGEP_LVER_ORIG:%.*]] = getelementptr i32, i32* [[AR]], i64 [[TMP12]] ; CHECK-NEXT: [[SCEVGEP1_LVER_ORIG:%.*]] = bitcast i32* [[SCEVGEP_LVER_ORIG]] to i8* ; CHECK-NEXT: [[MUL3_LVER_ORIG:%.*]] = mul i64 [[MUL]], [[I_0_LVER_ORIG]] ; CHECK-NEXT: [[ADD_PTR_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[AR]], i64 [[MUL3_LVER_ORIG]] @@ -97,7 +96,7 @@ ; CHECK-NEXT: [[J_0_LVER_ORIG:%.*]] = phi i64 [ 0, [[DO_BODY_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[DO_BODY1_LVER_ORIG]] ] ; CHECK-NEXT: [[MUL5_LVER_ORIG:%.*]] = mul nsw i64 [[J_0_LVER_ORIG]], [[CONV2]] ; CHECK-NEXT: [[ADD_PTR6_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[ADD_PTR_LVER_ORIG]], i64 [[MUL5_LVER_ORIG]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ADD_PTR6_LVER_ORIG]] to i8* +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[ADD_PTR6_LVER_ORIG]] to i8* ; CHECK-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[J_0_LVER_ORIG]], 1 ; CHECK-NEXT: [[EXITCOND_NOT_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[SMAX]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT_LVER_ORIG]], label [[DO_END_LVER_ORIG]], label [[DO_BODY1_LVER_ORIG]] @@ -151,7 +150,7 @@ ; for (int i=0; i