Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -880,7 +880,6 @@ // Parameters PHINode *OrigPhi; Type *WideType; - bool IsSigned; // Context LoopInfo *LI; @@ -897,13 +896,19 @@ SmallPtrSet Widened; SmallVector NarrowIVUsers; + enum ExtendKind { ZeroExtended, SignExtended, Unknown }; + // A map tracking the kind of extension used to widen each narrow IV + // and narrow IV user. + // Key: pointer to a narrow IV or IV user. + // Value: the kind of extension used to widen this Instruction. + DenseMap ExtendKindMap; + public: WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, SmallVectorImpl &DI) : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), - IsSigned(WI.IsSigned), LI(LInfo), L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), @@ -913,6 +918,7 @@ WideIncExpr(nullptr), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); + ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; } PHINode *createWideIV(SCEVExpander &Rewriter); @@ -926,9 +932,11 @@ const SCEVAddRecExpr *WideAR); Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); - const SCEVAddRecExpr *getWideRecurrence(Instruction *NarrowUse); + typedef std::pair WidenedRecTy; + + WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); - const SCEVAddRecExpr* getExtendedOperandRecurrence(NarrowIVDefUse DU); + WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, unsigned OpCode) const; @@ -937,6 +945,10 @@ bool widenLoopCompare(NarrowIVDefUse DU); + bool canWidenBySExt(NarrowIVDefUse DU); + + bool canWidenByZExt(NarrowIVDefUse DU); + void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); }; } // anonymous namespace @@ -1002,6 +1014,7 @@ // about the narrow operand yet so must insert a [sz]ext. It is probably loop // invariant and will be folded or hoisted. If it actually comes from a // widened IV, it should be removed during a future call to widenIVUse. + bool IsSigned = ExtendKindMap[NarrowDef] == SignExtended; Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(0), WideType, @@ -1086,7 +1099,7 @@ return WideUse == WideAR; }; - bool SignExtend = IsSigned; + bool SignExtend = ExtendKindMap[NarrowDef] == SignExtended; if (!GuessNonIVOperand(SignExtend)) { SignExtend = !SignExtend; if (!GuessNonIVOperand(SignExtend)) @@ -1127,15 +1140,16 @@ /// No-wrap operations can transfer sign extension of their result to their /// operands. Generate the SCEV value for the widened operation without /// actually modifying the IR yet. If the expression after extending the -/// operands is an AddRec for this loop, return it. -const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { +/// operands is an AddRec for this loop, return the AddRec and the kind of +/// extension used. +WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { // Handle the common case of add const unsigned OpCode = DU.NarrowUse->getOpcode(); // Only Add/Sub/Mul instructions supported yet. if (OpCode != Instruction::Add && OpCode != Instruction::Sub && OpCode != Instruction::Mul) - return nullptr; + return std::make_pair(nullptr, Unknown); // One operand (NarrowDef) has already been extended to WideDef. Now determine // if extending the other will lead to a recurrence. @@ -1146,14 +1160,15 @@ const SCEV *ExtendOperExpr = nullptr; const OverflowingBinaryOperator *OBO = cast(DU.NarrowUse); - if (IsSigned && OBO->hasNoSignedWrap()) + ExtendKind ExtKind = ExtendKindMap[DU.NarrowDef]; + if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) ExtendOperExpr = SE->getSignExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); - else if(!IsSigned && OBO->hasNoUnsignedWrap()) + else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) ExtendOperExpr = SE->getZeroExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); else - return nullptr; + return std::make_pair(nullptr, Unknown); // When creating this SCEV expr, don't apply the current operations NSW or NUW // flags. This instruction may be guarded by control flow that the no-wrap @@ -1171,33 +1186,49 @@ dyn_cast(getSCEVByOpCode(lhs, rhs, OpCode)); if (!AddRec || AddRec->getLoop() != L) - return nullptr; - return AddRec; + return std::make_pair(nullptr, Unknown); + + return std::make_pair(AddRec, ExtKind); } /// Is this instruction potentially interesting for further simplification after /// widening it's type? In other words, can the extend be safely hoisted out of /// the loop with SCEV reducing the value to a recurrence on the same loop. If -/// so, return the sign or zero extended recurrence. Otherwise return NULL. -const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) { - if (!SE->isSCEVable(NarrowUse->getType())) - return nullptr; +/// so, return the extended recurrence and the kind of extension used. Otherwise +/// return {nullptr, Unknown}. +WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { + if (!SE->isSCEVable(DU.NarrowUse->getType())) + return std::make_pair(nullptr, SignExtended); - const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); + const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= SE->getTypeSizeInBits(WideType)) { // NarrowUse implicitly widens its operand. e.g. a gep with a narrow // index. So don't follow this use. - return nullptr; + return std::make_pair(nullptr, Unknown); } - const SCEV *WideExpr = IsSigned ? - SE->getSignExtendExpr(NarrowExpr, WideType) : - SE->getZeroExtendExpr(NarrowExpr, WideType); + const SCEV *WideExpr; + ExtendKind ExtKind; + if (DU.NeverNegative) { + WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); + if (isa(WideExpr)) + ExtKind = SignExtended; + else { + WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); + ExtKind = ZeroExtended; + } + } else if (ExtendKindMap[DU.NarrowDef] == SignExtended) { + WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); + ExtKind = SignExtended; + } else { + WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); + ExtKind = ZeroExtended; + } const SCEVAddRecExpr *AddRec = dyn_cast(WideExpr); if (!AddRec || AddRec->getLoop() != L) - return nullptr; - return AddRec; + return std::make_pair(nullptr, Unknown); + return std::make_pair(AddRec, ExtKind); } /// This IV user cannot be widen. Replace this use of the original narrow IV @@ -1233,7 +1264,7 @@ // // (A) == icmp slt i32 sext(%narrow), sext(%val) // == icmp slt i32 zext(%narrow), sext(%val) - + bool IsSigned = ExtendKindMap[DU.NarrowDef] == SignExtended; if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) return false; @@ -1258,6 +1289,8 @@ /// Determine whether an individual user of the narrow IV can be widened. If so, /// return the wide clone of the user. Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { + assert(ExtendKindMap.count(DU.NarrowDef) && + "Should already know the kind of extension used to widen NarrowDef"); // Stop traversing the def-use chain at inner-loop phis or post-loop phis. if (PHINode *UsePhi = dyn_cast(DU.NarrowUse)) { @@ -1289,7 +1322,8 @@ } } // Our raison d'etre! Eliminate sign and zero extension. - if (IsSigned ? isa(DU.NarrowUse) : isa(DU.NarrowUse)) { + if ((isa(DU.NarrowUse) && canWidenBySExt(DU)) || + (isa(DU.NarrowUse) && canWidenByZExt(DU))) { Value *NewDef = DU.WideDef; if (DU.NarrowUse->getType() != WideType) { unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); @@ -1327,11 +1361,11 @@ } // Does this user itself evaluate to a recurrence after widening? - const SCEVAddRecExpr *WideAddRec = getWideRecurrence(DU.NarrowUse); - if (!WideAddRec) + WidenedRecTy WideAddRec = getWideRecurrence(DU); + if (!WideAddRec.first) WideAddRec = getExtendedOperandRecurrence(DU); - if (!WideAddRec) { + if (!WideAddRec.first) { // If use is a loop condition, try to promote the condition instead of // truncating the IV first. if (widenLoopCompare(DU)) @@ -1351,10 +1385,11 @@ // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. Instruction *WideUse = nullptr; - if (WideAddRec == WideIncExpr && Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) + if (WideAddRec.first == WideIncExpr && + Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) WideUse = WideInc; else { - WideUse = cloneIVUser(DU, WideAddRec); + WideUse = cloneIVUser(DU, WideAddRec.first); if (!WideUse) return nullptr; } @@ -1363,17 +1398,28 @@ // evaluates to the same expression as the extended narrow use, but doesn't // absolutely guarantee it. Hence the following failsafe check. In rare cases // where it fails, we simply throw away the newly created wide use. - if (WideAddRec != SE->getSCEV(WideUse)) { + if (WideAddRec.first != SE->getSCEV(WideUse)) { DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse - << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); + << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first << "\n"); DeadInsts.emplace_back(WideUse); return nullptr; } + ExtendKindMap[DU.NarrowUse] = WideAddRec.second; // Returning WideUse pushes it on the worklist. return WideUse; } +/// Return true if we can widen this narrow use by a sext. +bool WidenIV::canWidenBySExt(NarrowIVDefUse DU) { + return DU.NeverNegative || ExtendKindMap[DU.NarrowDef] == SignExtended; +} + +/// Return true if we can widen this narrow use by a zext. +bool WidenIV::canWidenByZExt(NarrowIVDefUse DU) { + return DU.NeverNegative || ExtendKindMap[DU.NarrowDef] == ZeroExtended; +} + /// Add eligible users of NarrowDef to NarrowIVUsers. /// void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { @@ -1408,9 +1454,9 @@ return nullptr; // Widen the induction variable expression. - const SCEV *WideIVExpr = IsSigned ? - SE->getSignExtendExpr(AddRec, WideType) : - SE->getZeroExtendExpr(AddRec, WideType); + const SCEV *WideIVExpr = ExtendKindMap[OrigPhi] == SignExtended + ? SE->getSignExtendExpr(AddRec, WideType) + : SE->getZeroExtendExpr(AddRec, WideType); assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && "Expect the new IV expression to preserve its type"); Index: test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll =================================================================== --- test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll +++ test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll @@ -0,0 +1,270 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -indvars -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-p:64:64:64-n8:16:32:64-S128" + +; When widening IV and its users, trunc and zext/sext are not needed +; if the original 32-bit user is known to be non-negative, whether +; the IV is considered signed or unsigned. +define void @foo(i32* %A, i32* %B, i32* %C, i32 %N) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %N +; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end +; CHECK: for.body.lr.ph: +; CHECK-NEXT: br label %for.body +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %B, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* %C, i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* %A, i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: br label %for.inc +; CHECK: for.inc: +; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %N to i64 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: br label %for.end +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %cmp1 = icmp slt i32 0, %N + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc + %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %idxprom = sext i32 %i.02 to i64 + %arrayidx = getelementptr inbounds i32, i32* %B, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %i.02, 2 + %idxprom1 = zext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %add3 = add nsw i32 %0, %1 + %idxprom4 = zext i32 %i.02 to i64 + %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4 + store i32 %add3, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %N + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret void +} + +define void @foo1(i32* %A, i32* %B, i32* %C, i32 %N) { +; CHECK-LABEL: @foo1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %N +; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end +; CHECK: for.body.lr.ph: +; CHECK-NEXT: br label %for.body +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %B, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* %C, i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* %A, i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: br label %for.inc +; CHECK: for.inc: +; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %N to i64 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: br label %for.end +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %cmp1 = icmp slt i32 0, %N + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc + %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %idxprom = zext i32 %i.02 to i64 + %arrayidx = getelementptr inbounds i32, i32* %B, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %i.02, 2 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %add3 = add nsw i32 %0, %1 + %idxprom4 = sext i32 %i.02 to i64 + %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4 + store i32 %add3, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %N + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret void +} + + +@a = common global [100 x i32] zeroinitializer, align 16 +@b = common global [100 x i32] zeroinitializer, align 16 + +define i32 @foo2(i32 %M) { +; CHECK-LABEL: @foo2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %M +; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 %M to i64 +; CHECK-NEXT: br label %for.body +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[TMP3]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: br label %for.inc +; CHECK: for.inc: +; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %M to i64 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: br label %for.end +; CHECK: for.end: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0)) +; CHECK-NEXT: ret i32 0 +; +entry: + %cmp1 = icmp slt i32 0, %M + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc + %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %idxprom = zext i32 %i.02 to i64 + %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %idxprom1 = sext i32 %i.02 to i64 + %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %0, %1 + %add3 = add nsw i32 %i.02, %M + %idxprom4 = sext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom4 + store i32 %add, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %M + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + %call = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0)) + ret i32 0 +} + +declare i32 @dummy(i32*, i32*) + +; A case where zext should not be eliminated when its operands could only be extended by sext. +define i32 @foo3(i32 %M) { +; CHECK-LABEL: @foo3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %M +; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 %M to i64 +; CHECK-NEXT: br label %for.body +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[IDXPROM4]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: br label %for.inc +; CHECK: for.inc: +; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %M to i64 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: br label %for.end +; CHECK: for.end: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0)) +; CHECK-NEXT: ret i32 0 +; +entry: + %cmp1 = icmp slt i32 0, %M + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc + %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %idxprom = sext i32 %i.02 to i64 + %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %idxprom1 = sext i32 %i.02 to i64 + %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %0, %1 + %add3 = add nsw i32 %i.02, %M + %idxprom4 = zext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom4 + store i32 %add, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %M + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + %call = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0)) + ret i32 0 +}