Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -573,7 +573,7 @@ /// Note that \p EntryVal doesn't have to be an induction variable (e.g., it /// can be a truncate instruction). void buildScalarSteps(Value *ScalarIV, Value *Step, Value *EntryVal, - const InductionDescriptor &ID); + const InductionDescriptor &ID, bool IsNewFromTrunc); /// Create a vector induction phi node based on an existing scalar one. \p /// EntryVal is the value from the original loop that maps to the vector phi @@ -2594,7 +2594,7 @@ // in the loop in the common case prior to InstCombine. We will be trading // one vector extract for each scalar step. if (NeedsScalarIV) - buildScalarSteps(ScalarIV, Step, EntryVal, ID); + buildScalarSteps(ScalarIV, Step, EntryVal, ID, Trunc); } Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step, @@ -2655,7 +2655,8 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, Value *EntryVal, - const InductionDescriptor &ID) { + const InductionDescriptor &ID, + bool IsNewFromTrunc) { // We shouldn't have to build scalar steps if we aren't vectorizing. assert(VF > 1 && "VF should be greater than one"); @@ -2689,7 +2690,8 @@ auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step)); auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul)); VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add); - recordVectorLoopValueForInductionCast(ID, Add, Part, Lane); + if (!IsNewFromTrunc) + recordVectorLoopValueForInductionCast(ID, Add, Part, Lane); } } } Index: llvm/test/Transforms/LoopVectorize/X86/pr36524.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -scev-version-unknown -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" + +define void @foo() { +; CHECK-LABEL: @foo( +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 2, [[INDEX]] +; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[OFFSET_IDX1]] to i32 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP11]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 0 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80 +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; +entry: + br label %loop + +loop: + %0 = phi i64 [ 2, %entry ], [ %3, %loop ] + %1 = and i64 %0, 4294967295 + %2 = trunc i64 %0 to i32 + %3 = add nuw nsw i64 %1, 1 + %4 = icmp sgt i32 %2, 80 + br i1 %4, label %exit, label %loop + +exit: + ret void +}