This is an archive of the discontinued LLVM Phabricator instance.

LoopVectorize: handle casted indvars in iv-select-cmp
AbandonedPublic

Authored by artagnon on Aug 14 2023, 6:06 AM.

Download Raw Diff

Details

Reviewers

Mel-Chen
fhahn
Ayal
shiva0217

Summary

As a follow-up to D150851, handle casted indvars in cases where a
runtime-check isn't necessary, hence vectorizing:

int test(int *a, int n) {
  int rdx = 331;
  for (int i = 0; i < n; i++) {
    if (a[i] > 3)
      rdx = i;
  }
  return rdx;
}

D150851 looks for the nsw flag on the increment of the indvar, and
concludes that the indvar can't wrap, and hence can't hit the sentinel
value:

%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1

The issue with vectorizing the above example is that IndVarSimplify
comes along and truncates the indvar as shown below:

%1 = trunc i64 %indvars.iv to i32
%spec.select = select i1 %cmp1, i32 %1, i32 %rdx.06

Now, the loop bounds are still on i64, so this truncated indvar may
still overflow, hitting the sentinel value:

%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

However, the exit condition of the loop, %wide.trip.count, has been
widened from an i32 %n by IndVarSimplify:

%wide.trip.count = zext i32 %n to i64
br label %for.body

This tells us that %n was orignally an i32, but we don't know if it is
signed. Now, from the loop guard, we know that %n was originally an i32,
and that is signed:

%cmp5 = icmp sgt i32 %n, 0
br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup

This patch pattern-matches a cast in the select, and an icmp in the loop
guard, which could perhaps be introduced by IndVarSimplify, and
determines when a truncated indvar can't overflow.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

artagnon created this revision.Aug 14 2023, 6:06 AM

Herald added a project: Restricted Project. · View Herald TranscriptAug 14 2023, 6:06 AM

Herald added subscribers: StephenFan, hiraditya. · View Herald Transcript

artagnon requested review of this revision.Aug 14 2023, 6:06 AM

Herald added a project: Restricted Project. · View Herald TranscriptAug 14 2023, 6:06 AM

Herald added subscribers: llvm-commits, wangpc. · View Herald Transcript

artagnon added parent revisions: D157861: LoopVectorize: vectorize finding first IV in select-cmp, D156124: LoopVectorize/iv-select-cmp: add tests for truncated IV.Aug 14 2023, 6:06 AM

Harbormaster completed remote builds in B252328: Diff 549900.Aug 14 2023, 6:07 AM

Will migrate to GitHub PRs once D150851 is migrated.

Herald added a subscriber: sunshaoce. · View Herald TranscriptSep 25 2023, 5:02 AM

Revision Contents

Path

Size

llvm/

lib/

Analysis/

IVDescriptors.cpp

99 lines

test/

Transforms/

LoopVectorize/

iv-select-cmp.ll

402 lines

Diff 549900

llvm/lib/Analysis/IVDescriptors.cpp

Show First 20 Lines • Show All 613 Lines • ▼ Show 20 Lines	RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
IsOrdered, CastInsts, MinWidthCastToRecurrenceType);		IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
RedDes = RD;		RedDes = RD;

return true;		return true;
}		}

enum class LoopInductionVariable { None, Increasing, Decreasing };		enum class LoopInductionVariable { None, Increasing, Decreasing };

// We are looking for loops that do something like this:		/// Handles the case where \p V is a casted induction variable (due to
// The reduction value (r) only has two states, in this example 0 or 3.		/// IndVarSimplify, for instance). In cases where \p V is truncated, we check
		/// that the orignal final IV value in the loop guard is the same type as the
		/// target of the truncation, and that the loop guard icmp is signed, in which
		/// case we don't need a runtime-check, and can directly say that the truncated
		/// IV will not hit the sentinel value.
		/// TODO: Only signed IVs are currently supported.
		static PHINode handleCastedIV(Value V, ScalarEvolution SE, Loop Loop) {
		if (auto *Cast = dyn_cast<CastInst>(V)) {
		auto *Phi = dyn_cast<PHINode>(Cast->getOperand(0));
		Type *CastDest = Cast->getDestTy();
		if (Cast->getSrcTy()->getScalarSizeInBits() <
		Cast->getDestTy()->getScalarSizeInBits())
		return Phi;
		else if (BranchInst *Guard = Loop->getLoopGuardBranch()) {
		if (auto *GuardCmp = dyn_cast<ICmpInst>(Guard->getCondition())) {
		if (GuardCmp->getOperand(0)->getType() == CastDest &&
		GuardCmp->isSigned())
		return Phi;
		}
		} else if (auto Bounds = Loop->getBounds(*SE))
		if (auto *ConstFinal = dyn_cast<ConstantInt>(&Bounds->getFinalIVValue()))
		if (ConstantInt::isValueValidForType(
		CastDest, ConstFinal->getValue().getSExtValue()))
		return Phi;
		return nullptr;
		}
		return dyn_cast<PHINode>(V);
		}

		/// Checks that \p V is a loop induction Phi that we can handle, for the
		/// purposes of SelectCmp with a non loop-invariant value in the select. We can
		/// currently handle indvars in loops that have a constant start value that's
		/// not INT_MIN (in the case of LoopInductionVariable::Increasing) or INT_MAX
		/// (in the case of LoopInductionVariable::Decreasing), and doesn't wrap to
		/// potentially hit these values, since these values are the sentinel values of
		/// the reductions.
		static LoopInductionVariable getLoopInduction(Value V, ScalarEvolution SE,
		Loop *Loop) {
		if (!SE)
		return LoopInductionVariable::None;

		if (auto *Phi = handleCastedIV(V, SE, Loop)) {
		InductionDescriptor ID;
		if (InductionDescriptor::isInductionPHI(Phi, Loop, SE, ID)) {
		const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
		if (!AR->hasNoSignedWrap())
		return LoopInductionVariable::None;

		if (const auto *IVStartValue =
		dyn_cast<ConstantInt>(ID.getStartValue())) {
		const SCEV *Step = ID.getStep();
		if (SE->isKnownPositive(Step) &&
		!IVStartValue->isMinValue(/* Signed */ true))
		return LoopInductionVariable::Increasing;
		else if (SE->isKnownNegative(Step) &&
		!IVStartValue->isMaxValue(/* Signed */ true))
		return LoopInductionVariable::Decreasing;
		}
		}
		}

		return LoopInductionVariable::None;
		}

		// We are looking for loops the following two types of loops:
		// 1) The reduction value (r) only has two states, in this example 0 or 3.
// int r = 0;		// int r = 0;
// for (int i = 0; i < n; i++) {		// for (int i = 0; i < n; i++) {
// if (src[i] > 3)		// if (src[i] > 3)
// r = 3;		// r = 3;
// }		// }
// The generated LLVM IR for this type of loop will be like this:		// The generated LLVM IR for this type of loop will be like this:
// for.body:		// for.body:
// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]		// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines	RecurrenceDescriptor::isFindFirstLastIVPattern(Loop Loop, PHINode OrigPhi,

if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))		if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
NonRdxPhi = SI->getFalseValue();		NonRdxPhi = SI->getFalseValue();
else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))		else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
NonRdxPhi = SI->getTrueValue();		NonRdxPhi = SI->getTrueValue();
else		else
return InstDesc(false, I);		return InstDesc(false, I);

auto GetLoopInduction = [&SE, &Loop](Value *V) {
auto *Phi = dyn_cast<PHINode>(V);
if (!SE \|\| !Phi)
return LoopInductionVariable::None;

InductionDescriptor ID;
if (!InductionDescriptor::isInductionPHI(Phi, Loop, SE, ID))
return LoopInductionVariable::None;

const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(V));
if (!AR->hasNoSignedWrap())
return LoopInductionVariable::None;

const auto *IVStartValue = dyn_cast<ConstantInt>(ID.getStartValue());
if (!IVStartValue)
return LoopInductionVariable::None;

const SCEV *Step = ID.getStep();
if (SE->isKnownPositive(Step) &&
!IVStartValue->isMinValue(/* Signed */ true))
return LoopInductionVariable::Increasing;
else if (SE->isKnownNegative(Step) &&
!IVStartValue->isMaxValue(/* Signed */ true))
return LoopInductionVariable::Decreasing;
else
return LoopInductionVariable::None;
};

// We are looking for selects of the form:		// We are looking for selects of the form:
// select(cmp(), phi, loop_induction) or		// select(cmp(), phi, loop_induction) or
// select(cmp(), loop_induction, phi)		// select(cmp(), loop_induction, phi)
switch (GetLoopInduction(NonRdxPhi)) {		switch (getLoopInduction(NonRdxPhi, SE, Loop)) {
case LoopInductionVariable::Increasing:		case LoopInductionVariable::Increasing:
return InstDesc(I, isa<ICmpInst>(I->getOperand(0))		return InstDesc(I, isa<ICmpInst>(I->getOperand(0))
? RecurKind::IFindLastIV		? RecurKind::IFindLastIV
: RecurKind::FFindLastIV);		: RecurKind::FFindLastIV);
case LoopInductionVariable::Decreasing:		case LoopInductionVariable::Decreasing:
return InstDesc(I, isa<ICmpInst>(I->getOperand(0))		return InstDesc(I, isa<ICmpInst>(I->getOperand(0))
? RecurKind::IFindFirstIV		? RecurKind::IFindFirstIV
: RecurKind::FFindFirstIV);		: RecurKind::FFindFirstIV);
▲ Show 20 Lines • Show All 898 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll

Show First 20 Lines • Show All 1,949 Lines • ▼ Show 20 Lines	for.body: ; preds = %entry, %for.body
%dec = add nsw i64 %iv, -1		%dec = add nsw i64 %iv, -1
%cmp.not = icmp eq i64 %iv, 0		%cmp.not = icmp eq i64 %iv, 0
br i1 %cmp.not, label %exit, label %for.body		br i1 %cmp.not, label %exit, label %for.body

exit: ; preds = %for.body		exit: ; preds = %for.body
ret i64 %spec.select		ret i64 %spec.select
}		}

; Negative tests		define i32 @select_icmp_const_truncated_iv_widened_exit(ptr nocapture readonly %a, i32 %n) {
		; CHECK-VF4IC1-LABEL: @select_icmp_const_truncated_iv_widened_exit(
define i32 @not_vectorized_select_icmp_const_truncated_iv_widened_exit(ptr nocapture readonly %a, i32 %n) {		; CHECK-VF4IC1-NEXT: entry:
; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_widened_exit		; CHECK-VF4IC1-NEXT: [[CMP_SGT:%.]] = icmp sgt i32 [[N:%.]], 0
; CHECK-NOT: vector.body:		; CHECK-VF4IC1-NEXT: br i1 [[CMP_SGT]], label [[FOR_BODY_PREHEADER:%.]], label [[EXIT:%.]]
		; CHECK-VF4IC1: for.body.preheader:
		; CHECK-VF4IC1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
		; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
		; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
		; CHECK-VF4IC1: vector.ph:
		; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
		; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
		; CHECK-VF4IC1-NEXT: br label [[VECTOR_BODY:%.*]]
		; CHECK-VF4IC1: vector.body:
		; CHECK-VF4IC1-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP4:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC1-NEXT: [[VEC_IND:%.]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
		; CHECK-VF4IC1-NEXT: [[TMP1:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP0]]
		; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
		; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
		; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], <i64 3, i64 3, i64 3, i64 3>
		; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
		; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
		; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
		; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
		; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
		; CHECK-VF4IC1: middle.block:
		; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
		; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648
		; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 331
		; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
		; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
		; CHECK-VF4IC1: scalar.ph:
		; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
		; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF4IC1-NEXT: br label [[FOR_BODY:%.*]]
		; CHECK-VF4IC1: for.body:
		; CHECK-VF4IC1-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.]], [[FOR_BODY]] ]
		; CHECK-VF4IC1-NEXT: [[RDX:%.]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SPEC_SELECT:%.]], [[FOR_BODY]] ]
		; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
		; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
		; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP7]], 3
		; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32
		; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]]
		; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
		; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
		; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
		; CHECK-VF4IC1: exit.loopexit:
		; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF4IC1-NEXT: br label [[EXIT]]
		; CHECK-VF4IC1: exit:
		; CHECK-VF4IC1-NEXT: [[RDX_LCSSA:%.]] = phi i32 [ 331, [[ENTRY:%.]] ], [ [[SPEC_SELECT_LCSSA]], [[EXIT_LOOPEXIT]] ]
		; CHECK-VF4IC1-NEXT: ret i32 [[RDX_LCSSA]]
		;
		; CHECK-VF4IC4-LABEL: @select_icmp_const_truncated_iv_widened_exit(
		; CHECK-VF4IC4-NEXT: entry:
		; CHECK-VF4IC4-NEXT: [[CMP_SGT:%.]] = icmp sgt i32 [[N:%.]], 0
		; CHECK-VF4IC4-NEXT: br i1 [[CMP_SGT]], label [[FOR_BODY_PREHEADER:%.]], label [[EXIT:%.]]
		; CHECK-VF4IC4: for.body.preheader:
		; CHECK-VF4IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
		; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
		; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
		; CHECK-VF4IC4: vector.ph:
		; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
		; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
		; CHECK-VF4IC4-NEXT: br label [[VECTOR_BODY:%.*]]
		; CHECK-VF4IC4: vector.body:
		; CHECK-VF4IC4-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP16:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP17:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP18:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP19:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[VEC_IND:%.]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
		; CHECK-VF4IC4-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4>
		; CHECK-VF4IC4-NEXT: [[STEP_ADD5:%.*]] = add <4 x i32> [[STEP_ADD4]], <i32 4, i32 4, i32 4, i32 4>
		; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
		; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
		; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
		; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
		; CHECK-VF4IC4-NEXT: [[TMP4:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP0]]
		; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
		; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
		; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
		; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
		; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
		; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4
		; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
		; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8
		; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8
		; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12
		; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8
		; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], <i64 3, i64 3, i64 3, i64 3>
		; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD7]], <i64 3, i64 3, i64 3, i64 3>
		; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD8]], <i64 3, i64 3, i64 3, i64 3>
		; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD9]], <i64 3, i64 3, i64 3, i64 3>
		; CHECK-VF4IC4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
		; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
		; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP14]], <4 x i32> [[STEP_ADD4]], <4 x i32> [[VEC_PHI2]]
		; CHECK-VF4IC4-NEXT: [[TMP19]] = select <4 x i1> [[TMP15]], <4 x i32> [[STEP_ADD5]], <4 x i32> [[VEC_PHI3]]
		; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
		; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD5]], <i32 4, i32 4, i32 4, i32 4>
		; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
		; CHECK-VF4IC4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
		; CHECK-VF4IC4: middle.block:
		; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP16]], <4 x i32> [[TMP17]])
		; CHECK-VF4IC4-NEXT: [[RDX_MINMAX10:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP18]])
		; CHECK-VF4IC4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX10]], <4 x i32> [[TMP19]])
		; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX11]])
		; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP21]], -2147483648
		; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP21]], i32 331
		; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
		; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
		; CHECK-VF4IC4: scalar.ph:
		; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
		; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF4IC4-NEXT: br label [[FOR_BODY:%.*]]
		; CHECK-VF4IC4: for.body:
		; CHECK-VF4IC4-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.]], [[FOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[RDX:%.]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SPEC_SELECT:%.]], [[FOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
		; CHECK-VF4IC4-NEXT: [[TMP22:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
		; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP22]], 3
		; CHECK-VF4IC4-NEXT: [[TMP23:%.*]] = trunc i64 [[IV]] to i32
		; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP23]], i32 [[RDX]]
		; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
		; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
		; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
		; CHECK-VF4IC4: exit.loopexit:
		; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF4IC4-NEXT: br label [[EXIT]]
		; CHECK-VF4IC4: exit:
		; CHECK-VF4IC4-NEXT: [[RDX_LCSSA:%.]] = phi i32 [ 331, [[ENTRY:%.]] ], [ [[SPEC_SELECT_LCSSA]], [[EXIT_LOOPEXIT]] ]
		; CHECK-VF4IC4-NEXT: ret i32 [[RDX_LCSSA]]
		;
		; CHECK-VF1IC4-LABEL: @select_icmp_const_truncated_iv_widened_exit(
		; CHECK-VF1IC4-NEXT: entry:
		; CHECK-VF1IC4-NEXT: [[CMP_SGT:%.]] = icmp sgt i32 [[N:%.]], 0
		; CHECK-VF1IC4-NEXT: br i1 [[CMP_SGT]], label [[FOR_BODY_PREHEADER:%.]], label [[EXIT:%.]]
		; CHECK-VF1IC4: for.body.preheader:
		; CHECK-VF1IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
		; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
		; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
		; CHECK-VF1IC4: vector.ph:
		; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
		; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
		; CHECK-VF1IC4-NEXT: br label [[VECTOR_BODY:%.*]]
		; CHECK-VF1IC4: vector.body:
		; CHECK-VF1IC4-NEXT: [[OFFSET_IDX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.]] = phi i32 [ -2147483648, [[VECTOR_PH]] ], [ [[TMP21:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.]] = phi i32 [ -2147483648, [[VECTOR_PH]] ], [ [[TMP22:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.]] = phi i32 [ -2147483648, [[VECTOR_PH]] ], [ [[TMP23:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.]] = phi i32 [ -2147483648, [[VECTOR_PH]] ], [ [[TMP24:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = trunc i64 [[OFFSET_IDX]] to i32
		; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
		; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1
		; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2
		; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3
		; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
		; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1
		; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2
		; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 3
		; CHECK-VF1IC4-NEXT: [[TMP9:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP5]]
		; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
		; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
		; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
		; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
		; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
		; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
		; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
		; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = icmp sgt i64 [[TMP13]], 3
		; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP14]], 3
		; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = icmp sgt i64 [[TMP15]], 3
		; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp sgt i64 [[TMP16]], 3
		; CHECK-VF1IC4-NEXT: [[TMP21]] = select i1 [[TMP17]], i32 [[TMP1]], i32 [[VEC_PHI]]
		; CHECK-VF1IC4-NEXT: [[TMP22]] = select i1 [[TMP18]], i32 [[TMP2]], i32 [[VEC_PHI1]]
		; CHECK-VF1IC4-NEXT: [[TMP23]] = select i1 [[TMP19]], i32 [[TMP3]], i32 [[VEC_PHI2]]
		; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP20]], i32 [[TMP4]], i32 [[VEC_PHI3]]
		; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 4
		; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
		; CHECK-VF1IC4-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
		; CHECK-VF1IC4: middle.block:
		; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]])
		; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]])
		; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]])
		; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648
		; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 331
		; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
		; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
		; CHECK-VF1IC4: scalar.ph:
		; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
		; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF1IC4-NEXT: br label [[FOR_BODY:%.*]]
		; CHECK-VF1IC4: for.body:
		; CHECK-VF1IC4-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.]], [[FOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[RDX:%.]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SPEC_SELECT:%.]], [[FOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
		; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
		; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP26]], 3
		; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = trunc i64 [[IV]] to i32
		; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]]
		; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
		; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
		; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
		; CHECK-VF1IC4: exit.loopexit:
		; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF1IC4-NEXT: br label [[EXIT]]
		; CHECK-VF1IC4: exit:
		; CHECK-VF1IC4-NEXT: [[RDX_LCSSA:%.]] = phi i32 [ 331, [[ENTRY:%.]] ], [ [[SPEC_SELECT_LCSSA]], [[EXIT_LOOPEXIT]] ]
		; CHECK-VF1IC4-NEXT: ret i32 [[RDX_LCSSA]]
;		;
entry:		entry:
%cmp.sgt = icmp sgt i32 %n, 0		%cmp.sgt = icmp sgt i32 %n, 0
br i1 %cmp.sgt, label %for.body.preheader, label %exit		br i1 %cmp.sgt, label %for.body.preheader, label %exit

for.body.preheader: ; preds = %entry		for.body.preheader: ; preds = %entry
%wide.trip.count = zext i32 %n to i64		%wide.trip.count = zext i32 %n to i64
br label %for.body		br label %for.body
Show All 10 Lines	for.body: ; preds = %for.body.preheader, %for.body
%exitcond.not = icmp eq i64 %inc, %wide.trip.count		%exitcond.not = icmp eq i64 %inc, %wide.trip.count
br i1 %exitcond.not, label %exit, label %for.body		br i1 %exitcond.not, label %exit, label %for.body

exit: ; preds = %for.body, %entry		exit: ; preds = %for.body, %entry
%rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]		%rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
ret i32 %rdx.lcssa		ret i32 %rdx.lcssa
}		}

define i32 @not_vectorized_select_icmp_const_truncated_iv_const_exit(ptr nocapture readonly %a) {		define i32 @select_icmp_const_truncated_iv_const_exit(ptr nocapture readonly %a) {
; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_const_exit		; CHECK-VF4IC1-LABEL: @select_icmp_const_truncated_iv_const_exit(
; CHECK-NOT: vector.body:		; CHECK-VF4IC1-NEXT: entry:
		; CHECK-VF4IC1-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
		; CHECK-VF4IC1: vector.ph:
		; CHECK-VF4IC1-NEXT: br label [[VECTOR_BODY:%.*]]
		; CHECK-VF4IC1: vector.body:
		; CHECK-VF4IC1-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP4:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC1-NEXT: [[VEC_IND:%.]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
		; CHECK-VF4IC1-NEXT: [[TMP1:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP0]]
		; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
		; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
		; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], <i64 3, i64 3, i64 3, i64 3>
		; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
		; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
		; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
		; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
		; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
		; CHECK-VF4IC1: middle.block:
		; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
		; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648
		; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 331
		; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 20000, 20000
		; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
		; CHECK-VF4IC1: scalar.ph:
		; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ 20000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
		; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF4IC1-NEXT: br label [[FOR_BODY:%.*]]
		; CHECK-VF4IC1: for.body:
		; CHECK-VF4IC1-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.]], [[FOR_BODY]] ]
		; CHECK-VF4IC1-NEXT: [[RDX:%.]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SPEC_SELECT:%.]], [[FOR_BODY]] ]
		; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
		; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
		; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP7]], 3
		; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32
		; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]]
		; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
		; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000
		; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
		; CHECK-VF4IC1: exit:
		; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
		;
		; CHECK-VF4IC4-LABEL: @select_icmp_const_truncated_iv_const_exit(
		; CHECK-VF4IC4-NEXT: entry:
		; CHECK-VF4IC4-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
		; CHECK-VF4IC4: vector.ph:
		; CHECK-VF4IC4-NEXT: br label [[VECTOR_BODY:%.*]]
		; CHECK-VF4IC4: vector.body:
		; CHECK-VF4IC4-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP16:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP17:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP18:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP19:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[VEC_IND:%.]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
		; CHECK-VF4IC4-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4>
		; CHECK-VF4IC4-NEXT: [[STEP_ADD5:%.*]] = add <4 x i32> [[STEP_ADD4]], <i32 4, i32 4, i32 4, i32 4>
		; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
		; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
		; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
		; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
		; CHECK-VF4IC4-NEXT: [[TMP4:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP0]]
		; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
		; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
		; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
		; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
		; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
		; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4
		; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
		; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8
		; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8
		; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12
		; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8
		; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], <i64 3, i64 3, i64 3, i64 3>
		; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD7]], <i64 3, i64 3, i64 3, i64 3>
		; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD8]], <i64 3, i64 3, i64 3, i64 3>
		; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD9]], <i64 3, i64 3, i64 3, i64 3>
		; CHECK-VF4IC4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
		; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
		; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP14]], <4 x i32> [[STEP_ADD4]], <4 x i32> [[VEC_PHI2]]
		; CHECK-VF4IC4-NEXT: [[TMP19]] = select <4 x i1> [[TMP15]], <4 x i32> [[STEP_ADD5]], <4 x i32> [[VEC_PHI3]]
		; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
		; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD5]], <i32 4, i32 4, i32 4, i32 4>
		; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
		; CHECK-VF4IC4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
		; CHECK-VF4IC4: middle.block:
		; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP16]], <4 x i32> [[TMP17]])
		; CHECK-VF4IC4-NEXT: [[RDX_MINMAX10:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP18]])
		; CHECK-VF4IC4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX10]], <4 x i32> [[TMP19]])
		; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX11]])
		; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP21]], -2147483648
		; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP21]], i32 331
		; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 20000, 20000
		; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
		; CHECK-VF4IC4: scalar.ph:
		; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ 20000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
		; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF4IC4-NEXT: br label [[FOR_BODY:%.*]]
		; CHECK-VF4IC4: for.body:
		; CHECK-VF4IC4-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.]], [[FOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[RDX:%.]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SPEC_SELECT:%.]], [[FOR_BODY]] ]
		; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
		; CHECK-VF4IC4-NEXT: [[TMP22:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
		; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP22]], 3
		; CHECK-VF4IC4-NEXT: [[TMP23:%.*]] = trunc i64 [[IV]] to i32
		; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP23]], i32 [[RDX]]
		; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
		; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000
		; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
		; CHECK-VF4IC4: exit:
		; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
		;
		; CHECK-VF1IC4-LABEL: @select_icmp_const_truncated_iv_const_exit(
		; CHECK-VF1IC4-NEXT: entry:
		; CHECK-VF1IC4-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
		; CHECK-VF1IC4: vector.ph:
		; CHECK-VF1IC4-NEXT: br label [[VECTOR_BODY:%.*]]
		; CHECK-VF1IC4: vector.body:
		; CHECK-VF1IC4-NEXT: [[OFFSET_IDX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.]] = phi i32 [ -2147483648, [[VECTOR_PH]] ], [ [[TMP21:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.]] = phi i32 [ -2147483648, [[VECTOR_PH]] ], [ [[TMP22:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.]] = phi i32 [ -2147483648, [[VECTOR_PH]] ], [ [[TMP23:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.]] = phi i32 [ -2147483648, [[VECTOR_PH]] ], [ [[TMP24:%.]], [[VECTOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = trunc i64 [[OFFSET_IDX]] to i32
		; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
		; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1
		; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2
		; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3
		; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
		; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1
		; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2
		; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 3
		; CHECK-VF1IC4-NEXT: [[TMP9:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP5]]
		; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
		; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
		; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
		; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
		; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
		; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
		; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
		; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = icmp sgt i64 [[TMP13]], 3
		; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP14]], 3
		; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = icmp sgt i64 [[TMP15]], 3
		; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp sgt i64 [[TMP16]], 3
		; CHECK-VF1IC4-NEXT: [[TMP21]] = select i1 [[TMP17]], i32 [[TMP1]], i32 [[VEC_PHI]]
		; CHECK-VF1IC4-NEXT: [[TMP22]] = select i1 [[TMP18]], i32 [[TMP2]], i32 [[VEC_PHI1]]
		; CHECK-VF1IC4-NEXT: [[TMP23]] = select i1 [[TMP19]], i32 [[TMP3]], i32 [[VEC_PHI2]]
		; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP20]], i32 [[TMP4]], i32 [[VEC_PHI3]]
		; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 4
		; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
		; CHECK-VF1IC4-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
		; CHECK-VF1IC4: middle.block:
		; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]])
		; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]])
		; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]])
		; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648
		; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 331
		; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 20000, 20000
		; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
		; CHECK-VF1IC4: scalar.ph:
		; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ 20000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
		; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF1IC4-NEXT: br label [[FOR_BODY:%.*]]
		; CHECK-VF1IC4: for.body:
		; CHECK-VF1IC4-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.]], [[FOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[RDX:%.]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SPEC_SELECT:%.]], [[FOR_BODY]] ]
		; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
		; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
		; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP26]], 3
		; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = trunc i64 [[IV]] to i32
		; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]]
		; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
		; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000
		; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
		; CHECK-VF1IC4: exit:
		; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
		; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
;		;
entry:		entry:
br label %for.body		br label %for.body

for.body: ; preds = %entry, %for.body		for.body: ; preds = %entry, %for.body
%iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]		%iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]		%rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv		%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%0 = load i64, ptr %arrayidx, align 8		%0 = load i64, ptr %arrayidx, align 8
%cmp = icmp sgt i64 %0, 3		%cmp = icmp sgt i64 %0, 3
%1 = trunc i64 %iv to i32		%1 = trunc i64 %iv to i32
%spec.select = select i1 %cmp, i32 %1, i32 %rdx		%spec.select = select i1 %cmp, i32 %1, i32 %rdx
%inc = add nuw nsw i64 %iv, 1		%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, 20000		%exitcond.not = icmp eq i64 %inc, 20000
br i1 %exitcond.not, label %exit, label %for.body		br i1 %exitcond.not, label %exit, label %for.body

exit: ; preds = %for.body		exit: ; preds = %for.body
ret i32 %spec.select		ret i32 %spec.select
}		}

		; Negative tests

define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(ptr nocapture readonly %a, i64 %n) {		define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(ptr nocapture readonly %a, i64 %n) {
; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit		; CHECK-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit
; CHECK-NOT: vector.body:		; CHECK-NOT: vector.body:
;		;
entry:		entry:
%cmp.sgt = icmp sgt i64 %n, 0		%cmp.sgt = icmp sgt i64 %n, 0
br i1 %cmp.sgt, label %for.body, label %exit		br i1 %cmp.sgt, label %for.body, label %exit

▲ Show 20 Lines • Show All 168 Lines • Show Last 20 Lines