Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4879,12 +4879,15 @@ // induction variable. Notice that we can only optimize the 'trunc' case // because (a) FP conversions lose precision, (b) sext/zext may wrap, and // (c) other casts depend on pointer size. - auto ID = Legal->getInductionVars()->lookup(OldInduction); - if (isa(CI) && CI->getOperand(0) == OldInduction && - ID.getConstIntStepValue()) { - widenIntInduction(OldInduction, cast(CI)); - break; - } + if (auto *Trunc = dyn_cast(CI)) + if (auto *Phi = dyn_cast(Trunc->getOperand(0))) { + auto II = Legal->getInductionVars()->find(Phi); + if (II != Legal->getInductionVars()->end()) + if (II->second.getConstIntStepValue()) { + widenIntInduction(Phi, Trunc); + break; + } + } /// Vectorize casts. Type *DestTy = @@ -7224,12 +7227,17 @@ case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - // We optimize the truncation of induction variable. - // The cost of these is the same as the scalar operation. - if (I->getOpcode() == Instruction::Trunc && - Legal->isInductionVariable(I->getOperand(0))) - return TTI.getCastInstrCost(I->getOpcode(), I->getType(), - I->getOperand(0)->getType()); + // We optimize the truncation of induction variables having constant + // integer steps. The cost of these truncations is the same as the scalar + // operation. + if (auto *Trunc = dyn_cast(I)) + if (auto *Phi = dyn_cast(Trunc->getOperand(0))) { + auto II = Legal->getInductionVars()->find(Phi); + if (II != Legal->getInductionVars()->end()) + if (II->second.getConstIntStepValue()) + return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(), + Trunc->getSrcTy()); + } Type *SrcScalarTy = I->getOperand(0)->getType(); Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF); Index: llvm/trunk/test/Transforms/LoopVectorize/induction.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/induction.ll +++ llvm/trunk/test/Transforms/LoopVectorize/induction.ll @@ -773,3 +773,34 @@ exit: ret void } + +; CHECK-LABEL: @non_primary_iv_trunc( +; CHECK: vector.body: +; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; CHECK: [[VEC_IND:%.*]] = phi <2 x i32> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ] +; CHECK: [[TMP3:%.*]] = add i64 %index, 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* %a, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: %index.next = add i64 %index, 2 +; CHECK: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body +define void @non_primary_iv_trunc(i32* %a, i64 %n) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] + %j = phi i64 [ %j.next, %for.body ], [ 0, %entry ] + %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i + %tmp1 = trunc i64 %j to i32 + store i32 %tmp1, i32* %tmp0, align 4 + %i.next = add nuw nsw i64 %i, 1 + %j.next = add nuw nsw i64 %j, 2 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} Index: llvm/trunk/test/Transforms/LoopVectorize/reverse_iter.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/reverse_iter.ll +++ llvm/trunk/test/Transforms/LoopVectorize/reverse_iter.ll @@ -2,7 +2,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -; Make sure that the reverse iterators are calculated using 64bit arithmetic, not 32. +; PR15882: This test ensures that we do not produce wrapping arithmetic when +; creating constant reverse step vectors. ; ; int foo(int n, int *A) { ; int sum; @@ -13,7 +14,7 @@ ; ;CHECK-LABEL: @foo( -;CHECK: +;CHECK: ;CHECK: ret define i32 @foo(i32 %n, i32* nocapture %A) { %1 = icmp sgt i32 %n, 0