Index: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h @@ -267,18 +267,10 @@ /// -1 - consecutive and decreasing. int getConsecutiveDirection() const; - /// Compute the transformed value of Index at offset StartValue using step - /// StepValue. - /// For integer induction, returns StartValue + Index * StepValue. - /// For pointer induction, returns StartValue[Index * StepValue]. - /// FIXME: The newly created binary instructions should contain nsw/nuw - /// flags, which can be found from the original scalar operations. - Value *transform(IRBuilder<> &B, Value *Index, ScalarEvolution *SE, - const DataLayout& DL) const; - Value *getStartValue() const { return StartValue; } InductionKind getKind() const { return IK; } const SCEV *getStep() const { return Step; } + BinaryOperator *getInductionBinOp() const { return InductionBinOp; } ConstantInt *getConstIntStepValue() const; /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an Index: llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp @@ -759,74 +759,6 @@ return nullptr; } -Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, - ScalarEvolution *SE, - const DataLayout& DL) const { - - SCEVExpander Exp(*SE, DL, "induction"); - assert(Index->getType() == Step->getType() && - "Index type does not match StepValue type"); - switch (IK) { - case IK_IntInduction: { - assert(Index->getType() == StartValue->getType() && - "Index type does not match StartValue type"); - - // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution - // and calculate (Start + Index * Step) for all cases, without - // special handling for "isOne" and "isMinusOne". - // But in the real life the result code getting worse. We mix SCEV - // expressions and ADD/SUB operations and receive redundant - // intermediate values being calculated in different ways and - // Instcombine is unable to reduce them all. - - if (getConstIntStepValue() && - getConstIntStepValue()->isMinusOne()) - return B.CreateSub(StartValue, Index); - if (getConstIntStepValue() && - getConstIntStepValue()->isOne()) - return B.CreateAdd(StartValue, Index); - const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), - SE->getMulExpr(Step, SE->getSCEV(Index))); - return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); - } - case IK_PtrInduction: { - assert(isa(Step) && - "Expected constant step for pointer induction"); - const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); - Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); - return B.CreateGEP(nullptr, StartValue, Index); - } - case IK_FpInduction: { - assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); - assert(InductionBinOp && - (InductionBinOp->getOpcode() == Instruction::FAdd || - InductionBinOp->getOpcode() == Instruction::FSub) && - "Original bin op should be defined for FP induction"); - - Value *StepValue = cast(Step)->getValue(); - - // Floating point operations had to be 'fast' to enable the induction. - FastMathFlags Flags; - Flags.setFast(); - - Value *MulExp = B.CreateFMul(StepValue, Index); - if (isa(MulExp)) - // We have to check, the MulExp may be a constant. - cast(MulExp)->setFastMathFlags(Flags); - - Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, - MulExp, "induction"); - if (isa(BOp)) - cast(BOp)->setFastMathFlags(Flags); - - return BOp; - } - case IK_NoInduction: - return nullptr; - } - llvm_unreachable("invalid enum"); -} - bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, InductionDescriptor &D) { Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -584,6 +584,16 @@ /// Emit bypass checks to check any memory assumptions we may have made. void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass); + /// Compute the transformed value of Index at offset StartValue using step + /// StepValue. + /// For integer induction, returns StartValue + Index * StepValue. + /// For pointer induction, returns StartValue[Index * StepValue]. + /// FIXME: The newly created binary instructions should contain nsw/nuw + /// flags, which can be found from the original scalar operations. + Value *emitTransformedIndex(IRBuilder<> &B, Value *Index, ScalarEvolution *SE, + const DataLayout &DL, + const InductionDescriptor &ID) const; + /// Add additional metadata to \p To that was not present on \p Orig. /// /// Currently this is used to add the noalias annotations based on the @@ -1971,7 +1981,7 @@ ? Builder.CreateSExtOrTrunc(Induction, IV->getType()) : Builder.CreateCast(Instruction::SIToFP, Induction, IV->getType()); - ScalarIV = ID.transform(Builder, ScalarIV, PSE.getSE(), DL); + ScalarIV = emitTransformedIndex(Builder, ScalarIV, PSE.getSE(), DL, ID); ScalarIV->setName("offset.idx"); } if (Trunc) { @@ -2810,6 +2820,75 @@ LVer->prepareNoAliasMetadata(); } +Value *InnerLoopVectorizer::emitTransformedIndex( + IRBuilder<> &B, Value *Index, ScalarEvolution *SE, const DataLayout &DL, + const InductionDescriptor &ID) const { + + SCEVExpander Exp(*SE, DL, "induction"); + auto Step = ID.getStep(); + auto StartValue = ID.getStartValue(); + assert(Index->getType() == Step->getType() && + "Index type does not match StepValue type"); + switch (ID.getKind()) { + case InductionDescriptor::IK_IntInduction: { + assert(Index->getType() == StartValue->getType() && + "Index type does not match StartValue type"); + + // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution + // and calculate (Start + Index * Step) for all cases, without + // special handling for "isOne" and "isMinusOne". + // But in the real life the result code getting worse. We mix SCEV + // expressions and ADD/SUB operations and receive redundant + // intermediate values being calculated in different ways and + // Instcombine is unable to reduce them all. + + if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isMinusOne()) + return B.CreateSub(StartValue, Index); + if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne()) + return B.CreateAdd(StartValue, Index); + const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), + SE->getMulExpr(Step, SE->getSCEV(Index))); + return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); + } + case InductionDescriptor::IK_PtrInduction: { + assert(isa(Step) && + "Expected constant step for pointer induction"); + const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); + Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); + return B.CreateGEP(nullptr, StartValue, Index); + } + case InductionDescriptor::IK_FpInduction: { + assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); + auto InductionBinOp = ID.getInductionBinOp(); + assert(InductionBinOp && + (InductionBinOp->getOpcode() == Instruction::FAdd || + InductionBinOp->getOpcode() == Instruction::FSub) && + "Original bin op should be defined for FP induction"); + + Value *StepValue = cast(Step)->getValue(); + + // Floating point operations had to be 'fast' to enable the induction. + FastMathFlags Flags; + Flags.setFast(); + + Value *MulExp = B.CreateFMul(StepValue, Index); + if (isa(MulExp)) + // We have to check, the MulExp may be a constant. + cast(MulExp)->setFastMathFlags(Flags); + + Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp, + "induction"); + if (isa(BOp)) + cast(BOp)->setFastMathFlags(Flags); + + return BOp; + } + case InductionDescriptor::IK_NoInduction: + return nullptr; + } + llvm_unreachable("invalid enum"); +} + BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { /* In this function we generate a new loop. The new loop will contain @@ -2948,7 +3027,7 @@ CastInst::getCastOpcode(CountRoundDown, true, StepType, true); Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd"); const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); - EndValue = II.transform(B, CRD, PSE.getSE(), DL); + EndValue = emitTransformedIndex(B, CRD, PSE.getSE(), DL, II); EndValue->setName("ind.end"); } @@ -3044,7 +3123,7 @@ II.getStep()->getType()) : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType()); CMO->setName("cast.cmo"); - Value *Escape = II.transform(B, CMO, PSE.getSE(), DL); + Value *Escape = emitTransformedIndex(B, CMO, PSE.getSE(), DL, II); Escape->setName("ind.escape"); MissingVals[UI] = Escape; } @@ -3879,7 +3958,8 @@ for (unsigned Lane = 0; Lane < Lanes; ++Lane) { Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF); Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); - Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL); + Value *SclrGep = + emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II); SclrGep->setName("next.gep"); VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep); }