Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -30,6 +30,7 @@ class Loop; class LoopInfo; class Pass; +class PredicatedScalarEvolution; class PredIteratorCache; class ScalarEvolution; class TargetLibraryInfo; @@ -289,7 +290,11 @@ ConstantInt *getStepValue() const { return StepValue; } static bool isInductionPHI(PHINode *Phi, ScalarEvolution *SE, - InductionDescriptor &D); + InductionDescriptor &D, + const SCEV *Expr = nullptr); + + static bool isInductionPHI(PHINode *Phi, PredicatedScalarEvolution &PSE, + InductionDescriptor &D, bool Assume = false); private: /// Private constructor - used by \c isInductionPHI. Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -698,16 +698,43 @@ llvm_unreachable("invalid enum"); } -bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, - InductionDescriptor &D) { +bool InductionDescriptor::isInductionPHI(PHINode *Phi, + PredicatedScalarEvolution &PSE, + InductionDescriptor &D, + bool Assume) { + Type *PhiTy = Phi->getType(); + // We only handle integer and pointer inductions variables. + if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) + return false; + + const SCEV *PhiScev = PSE.getSCEV(Phi); + const auto *AR = dyn_cast(PhiScev); + + // We need this expression to be an AddRecExpr. + if (Assume && !AR) + AR = PSE.getAsAddRec(Phi); + + if (!AR) { + DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); + return false; + } + + return isInductionPHI(Phi, PSE.getSE(), D, AR); +} + +bool InductionDescriptor::isInductionPHI(PHINode *Phi, + ScalarEvolution *SE, + InductionDescriptor &D, + const SCEV *Expr) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) return false; // Check that the PHI is consecutive. - const SCEV *PhiScev = SE->getSCEV(Phi); + const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); const SCEVAddRecExpr *AR = dyn_cast(PhiScev); + if (!AR) { DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); return false; Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4581,8 +4581,17 @@ return false; } + RecurrenceDescriptor RedDes; + if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) { + if (RedDes.hasUnsafeAlgebra()) + Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); + AllowedExit.insert(RedDes.getLoopExitInstr()); + Reductions[Phi] = RedDes; + continue; + } + InductionDescriptor ID; - if (InductionDescriptor::isInductionPHI(Phi, PSE.getSE(), ID)) { + auto ValidateInductionPhi = [&]() { Inductions[Phi] = ID; // Get the widest type. if (!WidestIndTy) @@ -4614,23 +4623,30 @@ return false; } - continue; - } + return true; + }; - RecurrenceDescriptor RedDes; - if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) { - if (RedDes.hasUnsafeAlgebra()) - Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); - AllowedExit.insert(RedDes.getLoopExitInstr()); - Reductions[Phi] = RedDes; - continue; + if (InductionDescriptor::isInductionPHI(Phi, PSE, ID)) { + if (ValidateInductionPhi()) + continue; + return false; } + if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, DT)) { FirstOrderRecurrences.insert(Phi); continue; } + // As a last resort, coerce the PHI to a AddRec expression + // and re-try classifying it a an induction PHI. + if (InductionDescriptor::isInductionPHI(Phi, PSE, ID, true)) { + if (ValidateInductionPhi()) + continue; + return false; + } + + emitAnalysis(VectorizationReport(&*it) << "value that could not be identified as " "reduction is used outside the loop"); Index: test/Transforms/LoopVectorize/induction.ll =================================================================== --- test/Transforms/LoopVectorize/induction.ll +++ test/Transforms/LoopVectorize/induction.ll @@ -166,3 +166,78 @@ loopexit: ret i32 %and.i } + +; The SCEV expression of %sphi is (zext i8 {%t,+,1}<%loop> to i32) +; In order to recognize it as an induction PHI and vectorize this loop, +; we need to convert the SCEV expression into an AddRecExpr. +; The expression gets converted to {zext i8 %t to i32,+,1}. + +; CHECK-LABEL: wrappingindvars1 +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +; CHECK: add <2 x i32> {{%[^ ]*}}, +define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) { + entry: + %st = zext i8 %t to i16 + %ext = zext i8 %t to i32 + %ecmp = icmp ult i16 %st, 42 + br i1 %ecmp, label %loop, label %exit + + loop: + + %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ] + %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ] + %sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop] + + %ptr = getelementptr inbounds i32, i32* %A, i8 %idx + store i32 %sphi, i32* %ptr + + %idx.inc = add i8 %idx, 1 + %idx.inc.ext = zext i8 %idx.inc to i32 + %idx.b.inc = add nuw nsw i32 %idx.b, 1 + + %c = icmp ult i32 %idx.b, %len + br i1 %c, label %loop, label %exit + + exit: + ret void +} + +; The SCEV expression of %sphi is (4 * (zext i8 {%t,+,1}<%loop> to i32)) +; In order to recognize it as an induction PHI and vectorize this loop, +; we need to convert the SCEV expression into an AddRecExpr. +; The expression gets converted to ({4 * (zext %t to i32),+,4}). +; CHECK-LABEL: wrappingindvars2 +; CHECK-LABEL: vector.scevcheck +; CHECK-LABEL: vector.body +; CHECK: add <2 x i32> {{%[^ ]*}}, +define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) { + +entry: + %st = zext i8 %t to i16 + %ext = zext i8 %t to i32 + %ext.mul = mul i32 %ext, 4 + + %ecmp = icmp ult i16 %st, 42 + br i1 %ecmp, label %loop, label %exit + + loop: + + %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ] + %sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop] + %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ] + + %ptr = getelementptr inbounds i32, i32* %A, i8 %idx + store i32 %sphi, i32* %ptr + + %idx.inc = add i8 %idx, 1 + %idx.inc.ext = zext i8 %idx.inc to i32 + %mul = mul i32 %idx.inc.ext, 4 + %idx.b.inc = add nuw nsw i32 %idx.b, 1 + + %c = icmp ult i32 %idx.b, %len + br i1 %c, label %loop, label %exit + + exit: + ret void +}