Index: ../include/llvm/Analysis/ScalarEvolution.h =================================================================== --- ../include/llvm/Analysis/ScalarEvolution.h +++ ../include/llvm/Analysis/ScalarEvolution.h @@ -1187,8 +1187,12 @@ const SCEV *getConstant(ConstantInt *V); const SCEV *getConstant(const APInt& Val); const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false); + const SCEV *getFpConstant(ConstantFP *V); + const SCEV *getFpConstant(const APFloat& Val); + const SCEV *getFpConstant(Type *Ty, double V); const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty); const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty); + const SCEV *getSIToFPExpr(const SCEV *Op, Type *Ty); const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty); const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); const SCEV *getAddExpr(SmallVectorImpl &Ops, @@ -1203,6 +1207,15 @@ SmallVector Ops = {Op0, Op1, Op2}; return getAddExpr(Ops, Flags); } + const SCEV *getFAddExpr(SmallVectorImpl &Ops); + const SCEV *getFAddExpr(const SCEV *LHS, const SCEV *RHS) { + SmallVector Ops = {LHS, RHS}; + return getFAddExpr(Ops); + } + const SCEV *getFAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2) { + SmallVector Ops = {Op0, Op1, Op2}; + return getFAddExpr(Ops); + } const SCEV *getMulExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS, @@ -1215,6 +1228,15 @@ SmallVector Ops = {Op0, Op1, Op2}; return getMulExpr(Ops, Flags); } + const SCEV *getFMulExpr(SmallVectorImpl &Ops); + const SCEV *getFMulExpr(const SCEV *LHS, const SCEV *RHS) { + SmallVector Ops = {LHS, RHS}; + return getFMulExpr(Ops); + } + const SCEV *getFMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2) { + SmallVector Ops = {Op0, Op1, Op2}; + return getFMulExpr(Ops); + } const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getUDivExactExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step, @@ -1226,6 +1248,15 @@ SmallVector NewOp(Operands.begin(), Operands.end()); return getAddRecExpr(NewOp, L, Flags); } + const SCEV *getFAddRecExpr(const SCEV *Start, const SCEV *Step, + const Loop *L); + const SCEV *getFAddRecExpr(SmallVectorImpl &Operands, + const Loop *L); + const SCEV *getFAddRecExpr(const SmallVectorImpl &Operands, + const Loop *L) { + SmallVector NewOp(Operands.begin(), Operands.end()); + return getFAddRecExpr(NewOp, L); + } /// Returns an expression for a GEP /// /// \p PointeeType The type used as the basis for the pointer arithmetics @@ -1263,6 +1294,10 @@ const SCEV *getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); + /// Return the FP SCEV object corresponding to -V. + /// + const SCEV *getNegativeFpSCEV(const SCEV *V); + /// Return the SCEV object corresponding to ~V. /// const SCEV *getNotSCEV(const SCEV *V); Index: ../include/llvm/Analysis/ScalarEvolutionExpander.h =================================================================== --- ../include/llvm/Analysis/ScalarEvolutionExpander.h +++ ../include/llvm/Analysis/ScalarEvolutionExpander.h @@ -322,7 +322,7 @@ /// \brief Determine the most "relevant" loop for the given SCEV. const Loop *getRelevantLoop(const SCEV *); - Value *visitConstant(const SCEVConstant *S) { + Value *visitConstant(const SCEVIntOrFpConstant *S) { return S->getValue(); } @@ -348,6 +348,14 @@ return S->getValue(); } + Value *visitFAddExpr(const SCEVFAddExpr *S); + + Value *visitFMulExpr(const SCEVFMulExpr *S); + + Value *visitSintToFpExpr(const SCEVSintToFpExpr *S); + + Value *visitFAddRecExpr(const SCEVFAddRecExpr *S); + void rememberInstruction(Value *I); bool isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L); Index: ../include/llvm/Analysis/ScalarEvolutionExpressions.h =================================================================== --- ../include/llvm/Analysis/ScalarEvolutionExpressions.h +++ ../include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -27,20 +27,40 @@ enum SCEVTypes { // These should be ordered in terms of increasing complexity to make the // folders simpler. - scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr, - scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, - scUnknown, scCouldNotCompute + scConstant, scFpConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, + scMulExpr, scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scSintToFp, + scFAddExpr, scFMulExpr, scFAddRecExpr, scUnknown, scCouldNotCompute + }; + + /// This class represents a constant integer or Fp value. + class SCEVIntOrFpConstant : public SCEV { + friend class ScalarEvolution; + protected: + Constant *V; + SCEVIntOrFpConstant(const FoldingSetNodeIDRef ID, SCEVTypes T, Constant *v) : + SCEV(ID, T), V(v) { + } + public: + Constant *getValue() const { return V; } + + Type *getType() const { return V->getType(); } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEV *S) { + return S->getSCEVType() == scConstant || + S->getSCEVType() == scFpConstant; + } }; /// This class represents a constant integer value. - class SCEVConstant : public SCEV { + class SCEVConstant : public SCEVIntOrFpConstant { friend class ScalarEvolution; - ConstantInt *V; SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v) : - SCEV(ID, scConstant), V(v) {} + SCEVIntOrFpConstant(ID, scConstant, v) { + } public: - ConstantInt *getValue() const { return V; } + ConstantInt *getValue() const { return cast(V); } const APInt &getAPInt() const { return getValue()->getValue(); } Type *getType() const { return V->getType(); } @@ -51,6 +71,25 @@ } }; + /// This class represents a constant floating point value. + class SCEVFpConstant : public SCEVIntOrFpConstant { + friend class ScalarEvolution; + + SCEVFpConstant(const FoldingSetNodeIDRef ID, ConstantFP *v) : + SCEVIntOrFpConstant(ID, scFpConstant, v) { + } + public: + ConstantFP *getValue() const { return cast(V); } + const APFloat &getAPFloat() const { return getValue()->getValueAPF(); } + + Type *getType() const { return V->getType(); } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEV *S) { + return S->getSCEVType() == scFpConstant; + } + }; + /// This is the base class for unary cast operator classes. class SCEVCastExpr : public SCEV { protected: @@ -68,7 +107,8 @@ static inline bool classof(const SCEV *S) { return S->getSCEVType() == scTruncate || S->getSCEVType() == scZeroExtend || - S->getSCEVType() == scSignExtend; + S->getSCEVType() == scSignExtend || + S->getSCEVType() == scSintToFp; } }; @@ -87,6 +127,20 @@ } }; + /// This class represents a signed int to FP conversion + class SCEVSintToFpExpr : public SCEVCastExpr { + friend class ScalarEvolution; + + SCEVSintToFpExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, Type *ty); + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEV *S) { + return S->getSCEVType() == scSintToFp; + } + }; + /// This class represents a zero extension of a small integer value /// to a larger integer value. class SCEVZeroExtendExpr : public SCEVCastExpr { @@ -172,7 +226,10 @@ S->getSCEVType() == scMulExpr || S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr || - S->getSCEVType() == scAddRecExpr; + S->getSCEVType() == scAddRecExpr || + S->getSCEVType() == scFAddRecExpr || + S->getSCEVType() == scFAddExpr || + S->getSCEVType() == scFMulExpr; } }; @@ -189,7 +246,9 @@ return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || S->getSCEVType() == scSMaxExpr || - S->getSCEVType() == scUMaxExpr; + S->getSCEVType() == scUMaxExpr || + S->getSCEVType() == scFAddExpr || + S->getSCEVType() == scFMulExpr; } /// Set flags for a non-recurrence without clearing previously set flags. @@ -222,6 +281,29 @@ } }; + /// This node represents an addition of some number of FP SCEVs. + class SCEVFAddExpr : public SCEVCommutativeExpr { + friend class ScalarEvolution; + + SCEVFAddExpr(const FoldingSetNodeIDRef ID, + const SCEV *const *O, size_t N) + : SCEVCommutativeExpr(ID, scFAddExpr, O, N) { + } + + public: + Type *getType() const { + // Use the type of the last operand, which is likely to be a pointer + // type, if there is one. This doesn't usually matter, but it can help + // reduce casts when the expressions are expanded. + return getOperand(getNumOperands() - 1)->getType(); + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEV *S) { + return S->getSCEVType() == scFAddExpr; + } + }; + /// This node represents multiplication of some number of SCEVs. class SCEVMulExpr : public SCEVCommutativeExpr { @@ -239,6 +321,21 @@ } }; + /// This node represents multiplication of some number of FP SCEVs. + class SCEVFMulExpr : public SCEVCommutativeExpr { + friend class ScalarEvolution; + + SCEVFMulExpr(const FoldingSetNodeIDRef ID, + const SCEV *const *O, size_t N) + : SCEVCommutativeExpr(ID, scFMulExpr, O, N) { + } + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEV *S) { + return S->getSCEVType() == scFMulExpr; + } + }; /// This class represents a binary unsigned division operation. class SCEVUDivExpr : public SCEV { @@ -268,6 +365,34 @@ } }; + /// This node represents a polynomial recurrence. + /// All operands of a recurrent expression are required to be loop invariant. + class SCEVRecExpr : public SCEVNAryExpr { + const Loop *L; + protected: + SCEVRecExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T, + const SCEV *const *O, size_t N, const Loop *l) + : SCEVNAryExpr(ID, T, O, N), L(l) { + } + + public: + const SCEV *getStart() const { return Operands[0]; } + const Loop *getLoop() const { return L; } + + /// Return true if this represents an expression + /// A + B*x where A and B are loop invariant values. + bool isAffine() const { + // We know that the start value is invariant. This expression is thus + // affine iff the step is also invariant. + return getNumOperands() == 2; + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEV *S) { + return S->getSCEVType() == scAddRecExpr || + S->getSCEVType() == scFAddRecExpr; + } + }; /// This node represents a polynomial recurrence on the trip count /// of the specified loop. This is the primary focus of the @@ -277,18 +402,14 @@ /// /// All operands of an AddRec are required to be loop invariant. /// - class SCEVAddRecExpr : public SCEVNAryExpr { + class SCEVAddRecExpr : public SCEVRecExpr { friend class ScalarEvolution; - const Loop *L; - SCEVAddRecExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N, const Loop *l) - : SCEVNAryExpr(ID, scAddRecExpr, O, N), L(l) {} + : SCEVRecExpr(ID, scAddRecExpr, O, N, l) {} public: - const SCEV *getStart() const { return Operands[0]; } - const Loop *getLoop() const { return L; } /// Constructs and returns the recurrence indicating how much this /// expression steps by. If this is a polynomial of degree N, it @@ -301,14 +422,6 @@ getLoop(), FlagAnyWrap); } - /// Return true if this represents an expression A + B*x where A - /// and B are loop invariant values. - bool isAffine() const { - // We know that the start value is invariant. This expression is thus - // affine iff the step is also invariant. - return getNumOperands() == 2; - } - /// Return true if this represents an expression A + B*x + C*x^2 /// where A, B and C are loop invariant values. This corresponds /// to an addrec of the form {L,+,M,+,N} @@ -350,6 +463,34 @@ } }; + /// This node represents a polynomial recurrence + /// of FP induction variable of the specified loop. + /// + /// All operands of an FAddRec are required to be loop invariant. + class SCEVFAddRecExpr : public SCEVRecExpr { + friend class ScalarEvolution; + + SCEVFAddRecExpr(const FoldingSetNodeIDRef ID, + const SCEV *const *O, size_t N, const Loop *l) + : SCEVRecExpr(ID, scFAddRecExpr, O, N, l) { + } + + public: + const SCEV *getStepRecurrence(ScalarEvolution &SE) const { + if (isAffine()) return getOperand(1); + return SE.getFAddRecExpr(SmallVector(op_begin() + 1, + op_end()), getLoop()); + } + + /// Return the value of this chain of recurrences at the specified + /// iteration number. + const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const; + + static inline bool classof(const SCEV *S) { + return S->getSCEVType() == scFAddRecExpr; + } + }; + /// This class represents a signed maximum selection. class SCEVSMaxExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; @@ -440,21 +581,30 @@ RetVal visit(const SCEV *S) { switch (S->getSCEVType()) { case scConstant: - return ((SC*)this)->visitConstant((const SCEVConstant*)S); + case scFpConstant: + return ((SC*)this)->visitConstant((const SCEVIntOrFpConstant*)S); case scTruncate: return ((SC*)this)->visitTruncateExpr((const SCEVTruncateExpr*)S); case scZeroExtend: return ((SC*)this)->visitZeroExtendExpr((const SCEVZeroExtendExpr*)S); case scSignExtend: return ((SC*)this)->visitSignExtendExpr((const SCEVSignExtendExpr*)S); + case scSintToFp: + return ((SC*)this)->visitSintToFpExpr((const SCEVSintToFpExpr*)S); case scAddExpr: return ((SC*)this)->visitAddExpr((const SCEVAddExpr*)S); + case scFAddExpr: + return ((SC*)this)->visitFAddExpr((const SCEVFAddExpr*)S); case scMulExpr: return ((SC*)this)->visitMulExpr((const SCEVMulExpr*)S); + case scFMulExpr: + return ((SC*)this)->visitFMulExpr((const SCEVFMulExpr*)S); case scUDivExpr: return ((SC*)this)->visitUDivExpr((const SCEVUDivExpr*)S); case scAddRecExpr: return ((SC*)this)->visitAddRecExpr((const SCEVAddRecExpr*)S); + case scFAddRecExpr: + return ((SC*)this)->visitFAddRecExpr((const SCEVFAddRecExpr*)S); case scSMaxExpr: return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S); case scUMaxExpr: @@ -500,20 +650,25 @@ switch (S->getSCEVType()) { case scConstant: + case scFpConstant: case scUnknown: break; case scTruncate: case scZeroExtend: case scSignExtend: + case scSintToFp: push(cast(S)->getOperand()); break; case scAddExpr: + case scFAddExpr: case scMulExpr: + case scFMulExpr: case scSMaxExpr: case scUMaxExpr: case scAddRecExpr: - for (const auto *Op : cast(S)->operands()) - push(Op); + case scFAddRecExpr: + for (const auto *Op : cast(S)->operands()) + push(Op); break; case scUDivExpr: { const SCEVUDivExpr *UDiv = cast(S); @@ -545,7 +700,7 @@ public: SCEVRewriteVisitor(ScalarEvolution &SE) : SE(SE) {} - const SCEV *visitConstant(const SCEVConstant *Constant) { + const SCEV *visitConstant(const SCEVIntOrFpConstant *Constant) { return Constant; } @@ -564,6 +719,11 @@ return SE.getSignExtendExpr(Operand, Expr->getType()); } + const SCEV *visitSintToFpExpr(const SCEVSintToFpExpr *Expr) { + const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); + return SE.getSIToFPExpr(Operand, Expr->getType()); + } + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { SmallVector Operands; for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) @@ -571,6 +731,13 @@ return SE.getAddExpr(Operands); } + const SCEV *visitFAddExpr(const SCEVFAddExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); + return SE.getFAddExpr(Operands); + } + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { SmallVector Operands; for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) @@ -578,6 +745,13 @@ return SE.getMulExpr(Operands); } + const SCEV *visitFMulExpr(const SCEVFMulExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); + return SE.getFMulExpr(Operands); + } + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { return SE.getUDivExpr(((SC*)this)->visit(Expr->getLHS()), ((SC*)this)->visit(Expr->getRHS())); @@ -591,6 +765,13 @@ Expr->getNoWrapFlags()); } + const SCEV *visitFAddRecExpr(const SCEVFAddRecExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); + return SE.getFAddRecExpr(Operands, Expr->getLoop()); + } + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { SmallVector Operands; for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) Index: ../include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- ../include/llvm/Transforms/Utils/LoopUtils.h +++ ../include/llvm/Transforms/Utils/LoopUtils.h @@ -263,7 +263,8 @@ enum InductionKind { IK_NoInduction, ///< Not an induction variable. IK_IntInduction, ///< Integer induction variable. Step = C. - IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem). + IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem). + IK_FpInduction ///< Floating point induction variable. }; public: @@ -300,6 +301,12 @@ InductionDescriptor &D, const SCEV *Expr = nullptr); + /// Returns true if \p Phi is a floating point induction. + /// If \p Phi is an induction, the induction descriptor \p D will contain + /// the data describing this induction. + static bool isFpInductionPHI(PHINode *Phi, ScalarEvolution *SE, + InductionDescriptor &D); + /// Returns true if \p Phi is an induction, in the context associated with /// the run-time predicate of PSE. If \p Assume is true, this can add further /// SCEV predicates to \p PSE in order to prove that \p Phi is an induction. Index: ../lib/Analysis/IVUsers.cpp =================================================================== --- ../lib/Analysis/IVUsers.cpp +++ ../lib/Analysis/IVUsers.cpp @@ -124,7 +124,7 @@ if (!Processed.insert(I).second) return true; // Instruction already handled. - if (!SE->isSCEVable(I->getType())) + if (!SE->isSCEVable(I->getType()) || I->getType()->isFloatingPointTy()) return false; // Void and FP expressions cannot be reduced. // IVUsers is used by LSR which assumes that all SCEV expressions are safe to Index: ../lib/Analysis/ScalarEvolution.cpp =================================================================== --- ../lib/Analysis/ScalarEvolution.cpp +++ ../lib/Analysis/ScalarEvolution.cpp @@ -139,6 +139,9 @@ case scConstant: cast(this)->getValue()->printAsOperand(OS, false); return; + case scFpConstant: + cast(this)->getValue()->printAsOperand(OS, false); + return; case scTruncate: { const SCEVTruncateExpr *Trunc = cast(this); const SCEV *Op = Trunc->getOperand(); @@ -160,6 +163,13 @@ << *SExt->getType() << ")"; return; } + case scSintToFp: { + const SCEVSintToFpExpr *SintToFp = cast(this); + const SCEV *Op = SintToFp->getOperand(); + OS << "(sitofp " << *Op->getType() << " " << *Op << " to " + << *SintToFp->getType() << ")"; + return; + } case scAddRecExpr: { const SCEVAddRecExpr *AR = cast(this); OS << "{" << *AR->getOperand(0); @@ -177,14 +187,28 @@ OS << ">"; return; } + case scFAddRecExpr: { + const SCEVFAddRecExpr *AR = cast(this); + OS << "{" << *AR->getOperand(0); + for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i) + OS << ",+," << *AR->getOperand(i); + OS << "}<"; + AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false); + OS << ">"; + return; + } case scAddExpr: case scMulExpr: + case scFAddExpr: + case scFMulExpr: case scUMaxExpr: case scSMaxExpr: { const SCEVNAryExpr *NAry = cast(this); const char *OpStr = nullptr; switch (NAry->getSCEVType()) { + case scFAddExpr: case scAddExpr: OpStr = " + "; break; + case scFMulExpr: case scMulExpr: OpStr = " * "; break; case scUMaxExpr: OpStr = " umax "; break; case scSMaxExpr: OpStr = " smax "; break; @@ -247,18 +271,24 @@ Type *SCEV::getType() const { switch (static_cast(getSCEVType())) { case scConstant: - return cast(this)->getType(); + case scFpConstant: + return cast(this)->getType(); case scTruncate: case scZeroExtend: case scSignExtend: + case scSintToFp: return cast(this)->getType(); + case scFAddRecExpr: case scAddRecExpr: case scMulExpr: + case scFMulExpr: case scUMaxExpr: case scSMaxExpr: return cast(this)->getType(); case scAddExpr: return cast(this)->getType(); + case scFAddExpr: + return cast(this)->getType(); case scUDivExpr: return cast(this)->getType(); case scUnknown: @@ -306,6 +336,18 @@ return S->getSCEVType() == scCouldNotCompute; } +const SCEV *ScalarEvolution::getFpConstant(ConstantFP *V) { + FoldingSetNodeID ID; + ID.AddInteger(scFpConstant); + ID.AddPointer(V); + void *IP = nullptr; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) + return S; + SCEV *S = new (SCEVAllocator)SCEVFpConstant(ID.Intern(SCEVAllocator), V); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { FoldingSetNodeID ID; ID.AddInteger(scConstant); @@ -327,10 +369,25 @@ return getConstant(ConstantInt::get(ITy, V, isSigned)); } +const SCEV *ScalarEvolution::getFpConstant(Type *Ty, double Value) { + return getFpConstant(cast(ConstantFP::get(Ty, Value))); +} + +const SCEV *ScalarEvolution::getFpConstant(const APFloat &Val) { + return getFpConstant(ConstantFP::get(getContext(), Val)); +} + SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, unsigned SCEVTy, const SCEV *op, Type *ty) : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} +SCEVSintToFpExpr::SCEVSintToFpExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, Type *ty) + : SCEVCastExpr(ID, scSintToFp, op, ty) { + assert(Op->getType()->isIntegerTy() && ty->isFloatingPointTy() && + "Unexpected type for sitofp operation"); +} + SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) : SCEVCastExpr(ID, scTruncate, op, ty) { @@ -543,9 +600,14 @@ return LA.ult(RA) ? -1 : 1; } + case scFpConstant: + // Return 1 because analysis of FP expressions is not implemented yet. + return 1; + + case scFAddRecExpr: case scAddRecExpr: { - const SCEVAddRecExpr *LA = cast(LHS); - const SCEVAddRecExpr *RA = cast(RHS); + const SCEVRecExpr *LA = cast(LHS); + const SCEVRecExpr *RA = cast(RHS); // Compare addrec loop depths. const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); @@ -570,11 +632,12 @@ return 0; } - case scAddExpr: case scMulExpr: case scSMaxExpr: - case scUMaxExpr: { + case scUMaxExpr: + case scFAddExpr: + case scFMulExpr: { const SCEVNAryExpr *LC = cast(LHS); const SCEVNAryExpr *RC = cast(RHS); @@ -606,7 +669,8 @@ case scTruncate: case scZeroExtend: - case scSignExtend: { + case scSignExtend: + case scSintToFp: { const SCEVCastExpr *LC = cast(LHS); const SCEVCastExpr *RC = cast(RHS); @@ -759,8 +823,15 @@ void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} void visitUnknown(const SCEVUnknown *Numerator) {} void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} - - void visitConstant(const SCEVConstant *Numerator) { + void visitFMulExpr(const SCEVFMulExpr *Numerator) {} + void visitFAddExpr(const SCEVFAddExpr *Numerator) {} + void visitFAddRecExpr(const SCEVFAddRecExpr *Numerator) {} + void visitSintToFpExpr(const SCEVSintToFpExpr *Numerator) {} + + void visitConstant(const SCEVIntOrFpConstant *ConstNumerator) { + const SCEVConstant *Numerator = dyn_cast(ConstNumerator); + if (!Numerator) + return; if (const SCEVConstant *D = dyn_cast(Denominator)) { APInt NumeratorVal = Numerator->getAPInt(); APInt DenominatorVal = D->getAPInt(); @@ -1060,6 +1131,16 @@ return Result; } +const SCEV *SCEVFAddRecExpr::evaluateAtIteration(const SCEV *It, + ScalarEvolution &SE) const { + const SCEV *Result = getStart(); + for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { + Result = SE.getFAddExpr(Result, SE.getFMulExpr(getOperand(i), + SE.getSIToFPExpr(It, Result->getType()))); + } + return Result; +} + //===----------------------------------------------------------------------===// // SCEV Expression folder implementations //===----------------------------------------------------------------------===// @@ -1401,6 +1482,64 @@ return false; } +const SCEV *ScalarEvolution::getSIToFPExpr(const SCEV *Op, + Type *Ty) { + Ty = getEffectiveSCEVType(Ty); + + // Before doing any expensive analysis, check to see if we've already + // computed a SCEV for this Op and Ty. + FoldingSetNodeID ID; + ID.AddInteger(scSintToFp); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = nullptr; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) + return S; + + // for (int X = 0; X < 100; ++X) { float Y = (float)X; } + + if (const SCEVAddRecExpr *AR = dyn_cast(Op)) { + if (AR->isAffine()) { + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*this); + + const Loop *L = AR->getLoop(); + const SCEV *StartFP = nullptr; + // Convert integer constants to float just here. + if (Start->getSCEVType() == scConstant) { + APInt ConstStart = cast(Start)->getAPInt(); + if (ConstStart.isSignedIntN(64)) + StartFP = getFpConstant(Ty, (double)ConstStart.getSExtValue()); + else if (ConstStart.isIntN(64)) + StartFP = getFpConstant(Ty, (double)ConstStart.getZExtValue()); + } + if (!StartFP) + StartFP = getSIToFPExpr(Start, Ty); + + const SCEV *StepFP; + if (Step->getSCEVType() == scConstant) { + int64_t IntVal = cast(Step)->getAPInt().getSExtValue(); + StepFP = getFpConstant(Ty, (double)IntVal); + } else + StepFP = getSIToFPExpr(Step, Ty); + + return getFAddRecExpr(StartFP, StepFP, L); + } + } else if (auto C = dyn_cast(Op)) { + int64_t IntVal = C->getAPInt().getSExtValue(); + return getFpConstant(Ty, (double)IntVal); + } else if (auto AddExpr = dyn_cast(Op)) { + SmallVector Ops; + for (auto Op : AddExpr->operands()) + Ops.push_back(getSIToFPExpr(Op, Ty)); + return getFAddExpr(Ops); + } + SCEV *S = new (SCEVAllocator)SCEVSintToFpExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -2379,6 +2518,81 @@ return S; } +const SCEV *ScalarEvolution::getFAddExpr(SmallVectorImpl &Ops) { + assert(!Ops.empty() && "Cannot get empty add!"); + if (Ops.size() == 1) return Ops[0]; + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, &LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVFpConstant *LHSC = dyn_cast(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVFpConstant *RHSC = dyn_cast(Ops[Idx])) { + // We found two constants, fold them together! + Ops[0] = getFpConstant(LHSC->getAPFloat() + RHSC->getAPFloat()); + if (Ops.size() == 2) + return Ops[0]; + Ops.erase(Ops.begin() + 1); // Erase the folded element + LHSC = cast(Ops[0]); + } + + // If we are left with a constant zero being added, strip it off. + if (LHSC->getValue()->isZero()) { + Ops.erase(Ops.begin()); + --Idx; + } + + if (Ops.size() == 1) + return Ops[0]; + } + + + // Skip past any other cast SCEVs. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scFAddExpr) + ++Idx; + + // If there are add operands they would be next. + if (Idx < Ops.size()) { + bool DeletedAdd = false; + while (const SCEVFAddExpr *Add = dyn_cast(Ops[Idx])) { + // If we have an add, expand the add operands onto the end of the operands + // list. + Ops.erase(Ops.begin() + Idx); + Ops.append(Add->op_begin(), Add->op_end()); + DeletedAdd = true; + } + + // If we deleted at least one add, we added operands to the end of the list, + // and they are not necessarily sorted. Recurse to resort and resimplify + // any operands we just acquired. + if (DeletedAdd) + return getFAddExpr(Ops); + } + + // FIXME: folding FAddRecExpr is not implemented yet. + + // Check to see if we already have FAddExpr, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scFAddExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = nullptr; + SCEVFAddExpr *S = + static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator)SCEVFAddExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + } + return S; +} + + static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { uint64_t k = i*j; if (j > 1 && k / j != i) Overflow = true; @@ -2418,10 +2632,11 @@ Ops.push_back(StartExpr); while (!Ops.empty()) { const SCEV *CurrentExpr = Ops.pop_back_val(); - if (isa(*CurrentExpr)) + if (isa(*CurrentExpr)) return true; - if (isa(*CurrentExpr) || isa(*CurrentExpr)) { + if (isa(*CurrentExpr) || isa(*CurrentExpr) || + isa(*CurrentExpr) || isa(*CurrentExpr)) { const auto *CurrentNAry = cast(CurrentExpr); Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end()); } @@ -2435,6 +2650,9 @@ assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty mul!"); + assert(!Ops[0]->getType()->isFloatingPointTy() && + "SCEVMulExpr operands should be integer"); + if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); @@ -2669,6 +2887,101 @@ return S; } +const SCEV *ScalarEvolution::getFMulExpr(SmallVectorImpl &Ops) { + + assert(!Ops.empty() && "Cannot get empty mul!"); + assert(Ops[0]->getType()->isFloatingPointTy() && + "SCEVFMulExpr operands should be FP"); + + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + Type *ETy = Ops[0]->getType(); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(Ops[i]->getType() == ETy && + "SCEVFMulExpr operand types don't match!"); +#endif + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, &LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVFpConstant *LHSC = dyn_cast(Ops[0])) { + + // C1*(C2+V) -> C1*C2 + C1*V + if (Ops.size() == 2) + if (const SCEVFAddExpr *Add = dyn_cast(Ops[1])) + // If any of Add's ops are Adds or Muls with a constant, + // apply this transformation as well. + if (Add->getNumOperands() == 2) + if (containsConstantSomewhere(Add)) + return getFAddExpr(getFMulExpr(LHSC, Add->getOperand(0)), + getFMulExpr(LHSC, Add->getOperand(1))); + + ++Idx; + while (const SCEVFpConstant *RHSC = dyn_cast(Ops[Idx])) { + // We found two constants, fold them together! + ConstantFP *Fold = + ConstantFP::get(getContext(), LHSC->getAPFloat() * RHSC->getAPFloat()); + Ops[0] = getFpConstant(Fold); + Ops.erase(Ops.begin() + 1); // Erase the folded element + if (Ops.size() == 1) + return Ops[0]; + LHSC = cast(Ops[0]); + } + + // If we are left with a constant one being multiplied, strip it off. + ConstantFP *Op0Val = cast(Ops[0])->getValue(); + if (Op0Val->isExactlyValue(1.0)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (Op0Val->isZero()) + // If we have a multiply of zero, it will always be zero. + return Ops[0]; + + if (Ops.size() == 1) + return Ops[0]; + } + + // Skip over the add expression until we get to a multiply. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scFMulExpr) + ++Idx; + + // If there are mul operands inline them all into this expression. + if (Idx < Ops.size()) { + bool DeletedMul = false; + while (const SCEVFMulExpr *Mul = dyn_cast(Ops[Idx])) { + // If we have an mul, expand the mul operands onto the end of the operands + // list. + Ops.erase(Ops.begin() + Idx); + Ops.append(Mul->op_begin(), Mul->op_end()); + DeletedMul = true; + } + + // If we deleted at least one mul, we added operands to the end of the list, + // and they are not necessarily sorted. Recurse to resort and resimplify + // any operands we just acquired. + if (DeletedMul) + return getFMulExpr(Ops); + } + + FoldingSetNodeID ID; + ID.AddInteger(scFMulExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = nullptr; + SCEVFMulExpr *S = + static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator)SCEVFMulExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + } + return S; +} + /// Get a canonical unsigned division expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, @@ -2858,6 +3171,40 @@ return getUDivExpr(LHS, RHS); } +const SCEV *ScalarEvolution::getFAddRecExpr(const SCEV *Start, const SCEV *Step, + const Loop *L) { + // FIXME: nesting of FAddRecExpr should be implemented + SmallVector Operands; + Operands.push_back(Start); + Operands.push_back(Step); + return getFAddRecExpr(Operands, L); +} + +const SCEV * +ScalarEvolution::getFAddRecExpr(SmallVectorImpl &Operands, + const Loop *L) { + if (Operands.size() == 1) + return Operands[0]; + + // FIXME: nesting of FAddRecExpr should be implemented + FoldingSetNodeID ID; + ID.AddInteger(scFAddRecExpr); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + ID.AddPointer(Operands[i]); + ID.AddPointer(L); + void *IP = nullptr; + SCEVFAddRecExpr *S = + static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate(Operands.size()); + std::uninitialized_copy(Operands.begin(), Operands.end(), O); + S = new (SCEVAllocator)SCEVFAddRecExpr(ID.Intern(SCEVAllocator), + O, Operands.size(), L); + UniqueSCEVs.InsertNode(S, IP); + } + return S; +} + /// Get an add recurrence expression for the specified loop. Simplify the /// expression as much as possible. const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step, @@ -3285,9 +3632,10 @@ /// framework. This primarily includes integer types, and it can optionally /// include pointer types if the ScalarEvolution class has access to /// target-specific information. +/// Now we add Half, Float, Double to this set. bool ScalarEvolution::isSCEVable(Type *Ty) const { - // Integers and pointers are always SCEVable. - return Ty->isIntegerTy() || Ty->isPointerTy(); + return Ty->isIntegerTy() || Ty->isPointerTy() || Ty->isFloatTy() || + Ty->isDoubleTy() || Ty->isHalfTy(); } /// Return the size in bits of the specified type, for which isSCEVable must @@ -3303,7 +3651,8 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - if (Ty->isIntegerTy()) + if (Ty->isIntegerTy() || Ty->isFloatTy() || Ty->isDoubleTy() || + Ty->isHalfTy()) return Ty; // The only other support type is pointer. @@ -3327,6 +3676,7 @@ bool follow(const SCEV *S) { switch (static_cast(S->getSCEVType())) { case scConstant: + case scFpConstant: return false; case scUnknown: if (!cast(S)->getValue()) @@ -3359,6 +3709,7 @@ case scAddRecExpr: FoundOne = true; case scConstant: + case scFpConstant: case scUnknown: case scCouldNotCompute: return false; @@ -3460,6 +3811,16 @@ V, getConstant(cast(Constant::getAllOnesValue(Ty))), Flags); } +/// Return a FP SCEV corresponding to -V = -1*V +const SCEV *ScalarEvolution::getNegativeFpSCEV(const SCEV *V) { + if (const SCEVFpConstant *VC = dyn_cast(V)) + return getFpConstant(cast(ConstantExpr::getFNeg(VC->getValue()))); + + Type *Ty = V->getType(); + return getFMulExpr(V, + getFpConstant(cast(ConstantFP::get(Ty, -1.0)))); +} + /// Return a SCEV corresponding to ~V = -1-V const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { if (const SCEVConstant *VC = dyn_cast(V)) @@ -3837,6 +4198,9 @@ case Instruction::Or: case Instruction::AShr: case Instruction::Shl: + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: return BinaryOp(Op); case Instruction::Xor: @@ -4029,7 +4393,36 @@ return PHISCEV; } } - } else { + } else if (const SCEVFAddExpr *Add = dyn_cast(BEValue)) { + unsigned FoundIndex = Add->getNumOperands(); + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicName) + if (FoundIndex == e) { + FoundIndex = i; + break; + } + + if (FoundIndex != Add->getNumOperands()) { + // Create an add with everything but the specified operand. + SmallVector Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getFAddExpr(Ops); + const SCEV *StartVal = getSCEV(StartValueV); + const SCEV *PHISCEV = getFAddRecExpr(StartVal, Accum, L); + + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + forgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + // We don't know how to expand FAddRecExpr to PHI. + // So we map FAddRecExpr to PHI and do not modify it + ExprValueMap[PHISCEV].insert(PN); + return PHISCEV; + } + } else if (!PN->getType()->isFloatingPointTy()) { // Otherwise, this could be a loop like this: // i = 0; for (j = 1; ..; ++j) { .... i = j; } // In this case, j = {1,+,1} and BEValue is j. @@ -4090,16 +4483,18 @@ switch (S->getSCEVType()) { case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: + case scFpConstant: case scSintToFp: case scFAddExpr: case scFMulExpr: // These expressions are available if their operand(s) is/are. return true; - case scAddRecExpr: { + case scAddRecExpr: + case scFAddRecExpr: { // We allow add recurrences that are on the loop BB is in, or some // outer loop. This guarantees availability because the value of the // add recurrence at BB is simply the "current" value of the induction // variable. We can relax this in the future; for instance an add // recurrence on a sibling dominating loop is also available at BB. - const auto *ARLoop = cast(S)->getLoop(); + const auto *ARLoop = cast(S)->getLoop(); if (L && (ARLoop == L || ARLoop->contains(L))) return true; @@ -4212,6 +4607,11 @@ if (const SCEV *S = createAddRecFromPHI(PN)) return S; + // FIXME: All further attempts to create SCEV for FP PHI should be + // implemented. + if (PN->getType()->isFloatingPointTy()) + return getUnknown(PN); + if (const SCEV *S = createNodeFromSelectLikePHI(PN)) return S; @@ -4241,6 +4641,10 @@ if (!ICI) return getUnknown(I); + // Fmax and Fmin may also be implemented in the future. + if (I->getType()->isFloatingPointTy()) + return getUnknown(I); + Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); @@ -4941,6 +5345,8 @@ return getUnknown(V); } else if (ConstantInt *CI = dyn_cast(V)) return getConstant(CI); + else if (ConstantFP *CI = dyn_cast(V)) + return getFpConstant(CI); else if (isa(V)) return getZero(V->getType()); else if (GlobalAlias *GA = dyn_cast(V)) @@ -5025,7 +5431,7 @@ MulOps.push_back(getSCEV(BO->LHS)); break; } - BO = NewBO; + BO = NewBO; } while (true); return getMulExpr(MulOps); @@ -5192,6 +5598,58 @@ BO->LHS->getType()); } break; + case Instruction::FAdd: { + // The simple thing to do would be to just call getSCEV on both operands + // and call getFAddExpr with the result. However if we're looking at a + // bunch of things all added together, this can be quite inefficient, + // because it leads to N-1 getFAddExpr calls for N ultimate operands. + // Instead, gather up all the operands and make a single getFAddExpr call. + // LLVM IR canonical form means we need only traverse the left operands. + SmallVector AddOps; + do { + if (BO->Op) + if (auto *OpSCEV = getExistingSCEV(BO->Op)) { + AddOps.push_back(OpSCEV); + break; + } + if (BO->Opcode == Instruction::FSub) + AddOps.push_back(getNegativeSCEV(getSCEV(BO->RHS))); + else + AddOps.push_back(getSCEV(BO->RHS)); + + auto NewBO = MatchBinaryOp(BO->LHS, DT); + if (!NewBO || (NewBO->Opcode != Instruction::FAdd && + NewBO->Opcode != Instruction::FSub)) { + AddOps.push_back(getSCEV(BO->LHS)); + break; + } + BO = NewBO; + } while (true); + + return getFAddExpr(AddOps); + } + case Instruction::FSub: + return getFAddExpr(getSCEV(BO->LHS), getNegativeFpSCEV(getSCEV(BO->RHS))); + case Instruction::FMul: { + SmallVector MulOps; + do { + if (BO->Op) + if (auto *OpSCEV = getExistingSCEV(BO->Op)) { + MulOps.push_back(OpSCEV); + break; + } + + MulOps.push_back(getSCEV(BO->RHS)); + auto NewBO = MatchBinaryOp(BO->LHS, DT); + if (!NewBO || NewBO->Opcode != Instruction::FMul) { + MulOps.push_back(getSCEV(BO->LHS)); + break; + } + BO = NewBO; + } while (true); + + return getFMulExpr(MulOps); + } } } @@ -5206,11 +5664,20 @@ return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); case Instruction::BitCast: - // BitCasts are no-op casts so we just eliminate the cast. - if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) + // SCEV propagation for BitCasts from "integer" to "float" and back + // is not supported right now. + // All other bitcasts are just eliminated. + if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) { + if (U->getType()->isFloatingPointTy() != + U->getOperand(0)->getType()->isFloatingPointTy()) + return getUnknown(V); return getSCEV(U->getOperand(0)); + } break; + case Instruction::SIToFP: + return getSIToFPExpr(getSCEV(U->getOperand(0)), U->getType()); + // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can // lead to pointer expressions which cannot safely be expanded to GEPs, // because ScalarEvolution doesn't respect the GEP aliasing rules when @@ -5907,6 +6374,7 @@ return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit, /*AllowPredicates=*/true); } + // We do not try to compute exit limit from FP compare. // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -6627,9 +7095,11 @@ switch (static_cast(V->getSCEVType())) { case scCouldNotCompute: case scAddRecExpr: + case scFAddRecExpr: break; case scConstant: - return cast(V)->getValue(); + case scFpConstant: + return cast(V)->getValue(); case scUnknown: return dyn_cast(cast(V)->getValue()); case scSignExtend: { @@ -6650,6 +7120,24 @@ return ConstantExpr::getTrunc(CastOp, ST->getType()); break; } + case scSintToFp: { + const SCEVSintToFpExpr *SF = cast(V); + if (Constant *CastOp = BuildConstantFromSCEV(SF->getOperand())) + return ConstantExpr::getSIToFP(CastOp, SF->getType()); + break; + } + case scFAddExpr: { + const SCEVFAddExpr *SA = cast(V); + if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { + for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); + if (!C2) return nullptr; + C = ConstantExpr::getAdd(C, C2); + } + return C; + } + break; + } case scAddExpr: { const SCEVAddExpr *SA = cast(V); if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { @@ -6703,6 +7191,19 @@ } break; } + case scFMulExpr: { + const SCEVFMulExpr *SM = cast(V); + if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { + for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); + if (!C2) + return nullptr; + C = ConstantExpr::getFMul(C, C2); + } + return C; + } + break; + } case scUDivExpr: { const SCEVUDivExpr *SU = cast(V); if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) @@ -6719,7 +7220,9 @@ } const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { - if (isa(V)) return V; + + if (isa(V)) + return V; // If this instruction is evolved from a constant-evolving PHI, compute the // exit value from the loop without using SCEVs. @@ -6824,6 +7327,10 @@ return getSMaxExpr(NewOps); if (isa(Comm)) return getUMaxExpr(NewOps); + if (isa(Comm)) + return getFAddExpr(NewOps); + if (isa(Comm)) + return getFMulExpr(NewOps); llvm_unreachable("Unknown commutative SCEV type!"); } } @@ -6884,6 +7391,47 @@ return AddRec; } + if (const SCEVFAddRecExpr *AddRec = dyn_cast(V)) { + // First, attempt to evaluate each operand. + // Avoid performing the look-up in the common case where the specified + // expression has no loop-variant portions. + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { + const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L); + if (OpAtScope == AddRec->getOperand(i)) + continue; + + // Okay, at least one of these operands is loop variant but might be + // foldable. Build a new instance of the folded commutative expression. + SmallVector NewOps(AddRec->op_begin(), + AddRec->op_begin() + i); + NewOps.push_back(OpAtScope); + for (++i; i != e; ++i) + NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); + + const SCEV *FoldedRec = getFAddRecExpr(NewOps, AddRec->getLoop()); + AddRec = dyn_cast(FoldedRec); + // The addrec may be folded to a nonrecurrence, for example, if the + // induction variable is multiplied by zero after constant folding. Go + // ahead and return the folded value. + if (!AddRec) + return FoldedRec; + break; + } + + // If the scope is outside the addrec's loop, evaluate it by using the + // loop exit value of the addrec. + if (!AddRec->getLoop()->contains(L)) { + // To evaluate this recurrence, we need to know how many times the AddRec + // loop iterates. Compute this now. + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); + if (BackedgeTakenCount == getCouldNotCompute()) return AddRec; + + // Then, evaluate the AddRec. + return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); + } + + return AddRec; + } if (const SCEVZeroExtendExpr *Cast = dyn_cast(V)) { const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); @@ -6906,6 +7454,42 @@ return getTruncateExpr(Op, Cast->getType()); } + if (const SCEVFAddRecExpr *FAddRec = dyn_cast(V)) { + // First, attempt to evaluate each operand. + // Avoid performing the look-up in the common case where the specified + // expression has no loop-variant portions. + for (unsigned i = 0, e = FAddRec->getNumOperands(); i != e; ++i) { + const SCEV *OpAtScope = getSCEVAtScope(FAddRec->getOperand(i), L); + if (OpAtScope == FAddRec->getOperand(i)) + continue; + + // Okay, at least one of these operands is loop variant but might be + // foldable. Build a new instance of the folded commutative expression. + SmallVector NewOps(FAddRec->op_begin(), + FAddRec->op_begin() + i); + NewOps.push_back(OpAtScope); + for (++i; i != e; ++i) + NewOps.push_back(getSCEVAtScope(FAddRec->getOperand(i), L)); + + const SCEV *FoldedRec = getFAddRecExpr(NewOps, FAddRec->getLoop()); + FAddRec = dyn_cast(FoldedRec); + // The addrec may be folded to a nonrecurrence, for example, if the + // induction variable is multiplied by zero after constant folding. Go + // ahead and return the folded value. + if (!FAddRec) + return FoldedRec; + break; + } + return FAddRec; + } + + if (const SCEVSintToFpExpr *Cast = dyn_cast(V)) { + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getSIToFPExpr(Op, Cast->getType()); + } + llvm_unreachable("Unknown SCEV type!"); } @@ -9636,7 +10220,9 @@ F.printAsOperand(OS, /*PrintType=*/false); OS << "\n"; for (Instruction &I : instructions(F)) - if (isSCEVable(I.getType()) && !isa(I)) { + // FIXME: FP SCEV print should be implemented + if (isSCEVable(I.getType()) && !isa(I) && + !I.getType()->isFloatingPointTy()) { OS << I << '\n'; OS << " --> "; const SCEV *SV = SE.getSCEV(&I); @@ -9734,13 +10320,16 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { switch (static_cast(S->getSCEVType())) { case scConstant: + case scFpConstant: return LoopInvariant; case scTruncate: case scZeroExtend: case scSignExtend: + case scSintToFp: return getLoopDisposition(cast(S)->getOperand(), L); + case scFAddRecExpr: case scAddRecExpr: { - const SCEVAddRecExpr *AR = cast(S); + const SCEVRecExpr *AR = cast(S); // If L is the addrec's loop, it's computable. if (AR->getLoop() == L) @@ -9769,6 +10358,8 @@ } case scAddExpr: case scMulExpr: + case scFAddExpr: + case scFMulExpr: case scUMaxExpr: case scSMaxExpr: { bool HasVarying = false; @@ -9837,23 +10428,30 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { switch (static_cast(S->getSCEVType())) { case scConstant: + case scFpConstant: return ProperlyDominatesBlock; case scTruncate: case scZeroExtend: case scSignExtend: + case scSintToFp: return getBlockDisposition(cast(S)->getOperand(), BB); - case scAddRecExpr: { - // This uses a "dominates" query instead of "properly dominates" query - // to test for proper dominance too, because the instruction which - // produces the addrec's value is a PHI, and a PHI effectively properly - // dominates its entire containing block. - const SCEVAddRecExpr *AR = cast(S); - if (!DT.dominates(AR->getLoop()->getHeader(), BB)) + case scFAddRecExpr: + case scAddRecExpr: + if (auto AR = dyn_cast(S)) { + // This uses a "dominates" query instead of "properly dominates" query + // to test for proper dominance too, because the instruction which + // produces the addrec's value is a PHI, and a PHI effectively properly + // dominates its entire containing block. + if (!DT.dominates(AR->getLoop()->getHeader(), BB)) + return DoesNotDominateBlock; + } else if (!DT.dominates(cast(AR)->getLoop()->getHeader(), + BB)) return DoesNotDominateBlock; - } // FALL THROUGH into SCEVNAryExpr handling. case scAddExpr: + case scFAddExpr: case scMulExpr: + case scFMulExpr: case scUMaxExpr: case scSMaxExpr: { const SCEVNAryExpr *NAry = cast(S); @@ -10468,7 +11066,7 @@ // For each block. for (auto *BB : L.getBlocks()) for (auto &I : *BB) { - if (!SE.isSCEVable(I.getType())) + if (!SE.isSCEVable(I.getType()) || I.getType()->isFloatingPointTy()) continue; auto *Expr = SE.getSCEV(&I); Index: ../lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- ../lib/Analysis/ScalarEvolutionExpander.cpp +++ ../lib/Analysis/ScalarEvolutionExpander.cpp @@ -599,7 +599,7 @@ if (!Pair.second) return Pair.first->second; - if (isa(S)) + if (isa(S)) // A constant has no relevant loops. return nullptr; if (const SCEVUnknown *U = dyn_cast(S)) { @@ -610,7 +610,7 @@ } if (const SCEVNAryExpr *N = dyn_cast(S)) { const Loop *L = nullptr; - if (const SCEVAddRecExpr *AR = dyn_cast(S)) + if (const SCEVRecExpr *AR = dyn_cast(S)) L = AR->getLoop(); for (const SCEV *Op : N->operands()) L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT); @@ -733,6 +733,36 @@ return Sum; } +Value *SCEVExpander::visitFAddExpr(const SCEVFAddExpr *S) { + + // Collect all the add operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal, and + // so that pointer operands are inserted first, which the code below relies on + // to form more involved GEPs. + SmallVector, 8> OpsAndLoops; + for (std::reverse_iterator I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants and + // pointer operands precede non-pointer operands. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT)); + + // Emit instructions to add all the operands. Hoist as much as possible + // out of loops, and form meaningful getelementptrs where possible. + Value *Sum = nullptr; + for (const auto &I : OpsAndLoops) { + const SCEV *Op = I.second; + if (!Sum) + // This is the first operand. Just expand it. + Sum = expand(Op); + else + // A simple add. + Sum = InsertBinop(Instruction::FAdd, Sum, expandCodeFor(Op)); + } + return Sum; +} + Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { Type *Ty = SE.getEffectiveSCEVType(S->getType()); @@ -779,6 +809,32 @@ return Prod; } +Value *SCEVExpander::visitFMulExpr(const SCEVFMulExpr *S) { + + // Collect all the mul operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal. + SmallVector, 8> OpsAndLoops; + for (std::reverse_iterator I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT)); + + Value *Prod = nullptr; + for (const auto &I : OpsAndLoops) { + const SCEV *Op = I.second; + if (!Prod) + // This is the first operand. Just expand it. + Prod = expand(Op); + else + // A simple fmul. + Prod = InsertBinop(Instruction::FMul, Prod, expandCodeFor(Op)); + } + + return Prod; +} + Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { Type *Ty = SE.getEffectiveSCEVType(S->getType()); @@ -1388,6 +1444,10 @@ return Result; } +Value *SCEVExpander::visitFAddRecExpr(const SCEVFAddRecExpr *S) { + llvm_unreachable("FAddRecExpr should be mapped to an existing phi node"); +} + Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!CanonicalMode) return expandAddRecExprLiterally(S); @@ -1549,6 +1609,15 @@ return I; } +Value *SCEVExpander::visitSintToFpExpr(const SCEVSintToFpExpr *S) { + Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); + Value *I = Builder.CreateSIToFP(V, Ty); + rememberInstruction(I); + return I; +} + Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); Type *Ty = LHS->getType(); @@ -1769,7 +1838,7 @@ return V; if (!SE.isSCEVable(PN->getType())) return nullptr; - auto *Const = dyn_cast(SE.getSCEV(PN)); + auto *Const = dyn_cast(SE.getSCEV(PN)); if (!Const) return nullptr; return Const->getValue(); @@ -1788,7 +1857,7 @@ continue; } - if (!SE.isSCEVable(Phi->getType())) + if (!SE.isSCEVable(Phi->getType()) || Phi->getType()->isFloatingPointTy()) continue; PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; @@ -1927,6 +1996,7 @@ switch (S->getSCEVType()) { case scUnknown: case scConstant: + case scFpConstant: return false; case scTruncate: return isHighCostExpansionHelper(cast(S)->getOperand(), @@ -1937,6 +2007,9 @@ case scSignExtend: return isHighCostExpansionHelper(cast(S)->getOperand(), L, At, Processed); + case scSintToFp: + return isHighCostExpansionHelper(cast(S)->getOperand(), + L, At, Processed); } if (!Processed.insert(S).second) Index: ../lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- ../lib/Transforms/Scalar/IndVarSimplify.cpp +++ ../lib/Transforms/Scalar/IndVarSimplify.cpp @@ -570,7 +570,9 @@ // optimized away. // - no use outside of the loop can take advantage of hoisting the // computation out of the loop - if (ExitValue->getSCEVType()>=scMulExpr) { + if ((!ExitValue->getType()->isFloatingPointTy() && + ExitValue->getSCEVType()>=scMulExpr) || + ExitValue->getSCEVType() >= scFMulExpr) { unsigned NumHardInternalUses = 0; unsigned NumSoftExternalUses = 0; unsigned NumUses = 0; @@ -1173,7 +1175,8 @@ /// the loop with SCEV reducing the value to a recurrence on the same loop. If /// so, return the sign or zero extended recurrence. Otherwise return NULL. const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) { - if (!SE->isSCEVable(NarrowUse->getType())) + if (!SE->isSCEVable(NarrowUse->getType()) || + NarrowUse->getType()->isFloatingPointTy()) return nullptr; const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); Index: ../lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- ../lib/Transforms/Utils/LoopUtils.cpp +++ ../lib/Transforms/Utils/LoopUtils.cpp @@ -672,7 +672,11 @@ assert((IK != IK_PtrInduction || getConstIntStepValue()) && "Step value should be constant for pointer induction"); - assert(Step->getType()->isIntegerTy() && "StepValue is not an integer"); + assert(((IK != IK_PtrInduction && IK != IK_IntInduction) || + Step->getType()->isIntegerTy()) && "StepValue is not an integer"); + + assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && + "StepValue is not FP for FpInduction"); } int InductionDescriptor::getConsecutiveDirection() const { @@ -725,24 +729,68 @@ Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); return B.CreateGEP(nullptr, StartValue, Index); } + case IK_FpInduction: { + assert(Index->getType() == Step->getType() && + "Index type does not match StepValue type"); + assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); + const SCEV *S = SE->getFAddExpr(SE->getSCEV(StartValue), + SE->getFMulExpr(Step, SE->getSCEV(Index))); + return Exp.expandCodeFor(S, nullptr, &*B.GetInsertPoint()); + } case IK_NoInduction: return nullptr; } llvm_unreachable("invalid enum"); } +bool InductionDescriptor::isFpInductionPHI(PHINode *Phi, ScalarEvolution *SE, + InductionDescriptor &D) { + + // Here we only handle FP induction variables. + assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); + + // Check that the PHI is consecutive. + const SCEV *PhiScev = SE->getSCEV(Phi); + const SCEVFAddRecExpr *AR = dyn_cast(PhiScev); + + if (!AR) { + DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); + return false; + } + + assert(AR->getLoop()->getHeader() == Phi->getParent() && + "PHI is an AddRec for a different loop?!"); + Value *StartValue = + Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // The Step should be loop invariant for induction variables + if (!SE->isLoopInvariant(Step, AR->getLoop())) + return false; + D = InductionDescriptor(StartValue, IK_FpInduction, Step); + return true; +} + bool InductionDescriptor::isInductionPHI(PHINode *Phi, PredicatedScalarEvolution &PSE, InductionDescriptor &D, bool Assume) { Type *PhiTy = Phi->getType(); - // We only handle integer and pointer inductions variables. - if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) + + // Handle integer and pointer inductions variables. + // Now we handle also FP induction but not trying to make a + // recurrent expression from the PHI node in-place. + + if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && + !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) return false; + if (PhiTy->isFloatingPointTy()) + return isFpInductionPHI(Phi, PSE.getSE(), D); + const SCEV *PhiScev = PSE.getSCEV(Phi); - const auto *AR = dyn_cast(PhiScev); + const auto *AR = dyn_cast(PhiScev); // We need this expression to be an AddRecExpr. if (Assume && !AR) AR = PSE.getAsAddRec(Phi); Index: ../lib/Transforms/Utils/SimplifyIndVar.cpp =================================================================== --- ../lib/Transforms/Utils/SimplifyIndVar.cpp +++ ../lib/Transforms/Utils/SimplifyIndVar.cpp @@ -667,7 +667,8 @@ /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. /// void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { - if (!SE->isSCEVable(CurrIV->getType())) + if (!SE->isSCEVable(CurrIV->getType()) && + !CurrIV->getType()->isFloatingPointTy()) return; // Instructions processed by SimplifyIndvar for CurrIV. Index: ../lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- ../lib/Transforms/Vectorize/LoopVectorize.cpp +++ ../lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2144,8 +2144,9 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step) { assert(Val->getType()->isVectorTy() && "Must be a vector"); - assert(Val->getType()->getScalarType()->isIntegerTy() && - "Elem must be an integer"); + assert((Val->getType()->getScalarType()->isIntegerTy() || + Val->getType()->getScalarType()->isFloatingPointTy()) && + "Induction Step must be an integer or FP"); assert(Step->getType() == Val->getType()->getScalarType() && "Step has wrong type"); // Create the types. @@ -2154,19 +2155,29 @@ int VLen = Ty->getNumElements(); SmallVector Indices; + bool IsInteger = Step->getType()->isIntegerTy(); + // Create a vector of consecutive numbers from zero to VF. for (int i = 0; i < VLen; ++i) - Indices.push_back(ConstantInt::get(ITy, StartIdx + i)); + IsInteger ? + Indices.push_back(ConstantInt::get(ITy, StartIdx + i)) : + Indices.push_back(ConstantFP::get(ITy, (double)(StartIdx + i))); // Add the consecutive indices to the vector value. Constant *Cv = ConstantVector::get(Indices); assert(Cv->getType() == Val->getType() && "Invalid consecutive vec"); Step = Builder.CreateVectorSplat(VLen, Step); assert(Step->getType() == Val->getType() && "Invalid step vec"); - // FIXME: The newly created binary instructions should contain nsw/nuw flags, - // which can be found from the original scalar operations. - Step = Builder.CreateMul(Cv, Step); - return Builder.CreateAdd(Val, Step, "induction"); + + if (IsInteger) { + // FIXME: The newly created binary instructions should contain nsw/nuw flags, + // which can be found from the original scalar operations. + Step = Builder.CreateMul(Cv, Step); + return Builder.CreateAdd(Val, Step, "induction"); + } + // FP induction + Step = Builder.CreateFMul(Cv, Step); + return Builder.CreateFAdd(Val, Step, "induction"); } int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { @@ -3207,8 +3218,13 @@ EndValue = CountRoundDown; } else { IRBuilder<> B(LoopBypassBlocks.back()->getTerminator()); - Value *CRD = B.CreateSExtOrTrunc(CountRoundDown, - II.getStep()->getType(), "cast.crd"); + Value *CRD; + if (II.getStep()->getType()->isIntegerTy()) + CRD = B.CreateSExtOrTrunc(CountRoundDown, II.getStep()->getType(), + "cast.crd"); + else + CRD = B.CreateCast(Instruction::SIToFP, CountRoundDown, + II.getStep()->getType(), "cast.crd"); const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); EndValue = II.transform(B, CRD, PSE.getSE(), DL); EndValue->setName("ind.end"); @@ -4119,6 +4135,24 @@ } return; } + case InductionDescriptor::IK_FpInduction: { + assert(P->getType() == II.getStartValue()->getType() && + "Types must match"); + // Handle other induction variables that are now based on the + // canonical one. + Value *V = Induction; + if (P != OldInduction) { + V = Builder.CreateCast(Instruction::SIToFP, Induction, P->getType()); + V = II.transform(Builder, V, PSE.getSE(), DL); + V->setName("fp.offset.idx"); + } + Value *Broadcasted = getBroadcastInstrs(V); + // After broadcasting the induction variable we need to make the vector + // consecutive by adding 0, 1, 2, etc. + for (unsigned part = 0; part < UF; ++part) + Entry[part] = getStepVector(Broadcasted, VF * part, II.getStep()); + return; + } case InductionDescriptor::IK_PtrInduction: // Handle the pointer induction variable case. assert(P->getType()->isPointerTy() && "Unexpected type."); @@ -4660,10 +4694,12 @@ const DataLayout &DL = Phi->getModule()->getDataLayout(); // Get the widest type. - if (!WidestIndTy) - WidestIndTy = convertPointerToIntegerType(DL, PhiTy); - else - WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); + if (!PhiTy->isFloatingPointTy()) { + if (!WidestIndTy) + WidestIndTy = convertPointerToIntegerType(DL, PhiTy); + else + WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); + } // Int inductions are special because we only allow one IV. if (ID.getKind() == InductionDescriptor::IK_IntInduction && @@ -6408,6 +6444,11 @@ // When unrolling and the VF is 1, we only need to add a simple scalar. Type *ITy = Val->getType(); assert(!ITy->isVectorTy() && "Val must be a scalar"); + + if (Val->getType()->isFloatingPointTy()) { + Constant *C = ConstantFP::get(ITy, (double)StartIdx); + return Builder.CreateFAdd(Val, Builder.CreateFMul(C, Step), "induction"); + } Constant *C = ConstantInt::get(ITy, StartIdx); return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction"); } Index: ../test/Transforms/IndVarSimplify/floating-point-iv.ll =================================================================== --- ../test/Transforms/IndVarSimplify/floating-point-iv.ll +++ ../test/Transforms/IndVarSimplify/floating-point-iv.ll @@ -90,3 +90,45 @@ ; CHECK: icmp slt i32 {{.*}}, 0 ; CHECK-NEXT: br i1 } + + +;float @fp_iv_simplify(float start, int N) { +; int i=0; +; float res = start; +; for (; i< N; i++) { +; res += (float)0.1; +; } +; return res; +;} + +; CHECK-LABEL: @fp_iv_simplify( +; CHECK: for.body.preheader: +; CHECK: %[[N:.*]] = sitofp i32 %{{.*}} to float +; CHECK: %[[MUL_RES:.*]] = fmul float %[[N]], 0x3FB99999A0000000 +; CHECK: for.end.loopexit: +; CHECK: fadd float %start, %[[MUL_RES]] + + +define float @fp_iv_simplify(float %start, i32 %N) { +entry: + %cmp2 = icmp sgt i32 %N, 0 + br i1 %cmp2, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %res.04 = phi float [ %add, %for.body ], [ %start, %for.body.preheader ] + %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %add = fadd float %res.04, 0x3FB99999A0000000 + %inc = add nsw i32 %i.03, 1 + %cmp = icmp slt i32 %inc, %N + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %res.0.lcssa = phi float [ %start, %entry ], [ %add, %for.end.loopexit ] + ret float %res.0.lcssa +} Index: ../test/Transforms/LoopVectorize/float-induction.ll =================================================================== --- ../test/Transforms/LoopVectorize/float-induction.ll +++ ../test/Transforms/LoopVectorize/float-induction.ll @@ -0,0 +1,150 @@ +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s + + +; CHECK-LABEL: @fp_iv_loop1( +; CHECK: %[[FP_INC:.*]] = load float, float* @fp_inc, align 4 +; CHECK: %[[FP_INC_NEG:.*]] = fsub float -0.000000e+00, %[[FP_INC]] +; CHECK: vector.body: +; CHECK: %[[VAR1:.*]] = insertelement <4 x float> undef, float %[[FP_INC_NEG]], i32 0 +; CHECK: %[[VAR2:.*]] = shufflevector <4 x float> %[[VAR1]], <4 x float> undef, <4 x i32> zeroinitializer +; CHECK: fmul <4 x float> %[[VAR2]], +; CHECK: store <4 x float> + +@fp_inc = common global float 0.000000e+00, align 4 + +;void fp_iv_loop1(float init, float * __restrict__ A, int N) { +; float x = init; +; for (int i=0; i < N; ++i) { +; A[i] = x; +; x -= fp_inc; +; } +;} + +define void @fp_iv_loop1(float %init, float* noalias nocapture %A, i32 %N) #0 { +entry: + %cmp4 = icmp sgt i32 %N, 0 + br i1 %cmp4, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %0 = load float, float* @fp_inc, align 4 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv + store float %x.05, float* %arrayidx, align 4 + %add = fsub float %x.05, %0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +;void fp_iv_loop2(float init, float * __restrict__ A, int N) { +; float x = init; +; for (int i=0; i < N; ++i) { +; A[i] = x; +; x += 0.5; +; } +;} + +; CHECK-LABEL: @fp_iv_loop2( +; CHECK: vector.body +; CHECK: %[[index:.*]] = phi i64 [ 0, %vector.ph ] +; CHECK: sitofp i64 %[[index]] to float +; CHECK: %[[VAR1:.*]] = fmul float {{.*}}, 5.000000e-01 +; CHECK: %[[VAR2:.*]] = fadd float %[[VAR1]] +; CHECK: insertelement <4 x float> undef, float %[[VAR2]], i32 0 +; CHECK: shufflevector <4 x float> {{.*}}, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK: fadd <4 x float> {{.*}}, +; CHECK: store <4 x float> + +define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 { +entry: + %cmp4 = icmp sgt i32 %N, 0 + br i1 %cmp4, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ] + %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv + store float %x.06, float* %arrayidx, align 4 + %conv1 = fadd float %x.06, 5.000000e-01 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +;void fp_iv_loop3(float init, float * __restrict__ A, float * __restrict__ B, float * __restrict__ C, int N) { +; int i = 0; +; float x = init; +; float y = 0.1; +; for (; i < N; ++i) { +; A[i] = x; +; x += fp_inc; +; y -= 0.5; +; B[i] = x + y; +; C[i] = y; +; } +;} +; CHECK-LABEL: @fp_iv_loop3( +; CHECK: vector.body +; CHECK: %[[index:.*]] = phi i64 [ 0, %vector.ph ] +; CHECK: sitofp i64 %[[index]] to float +; CHECK: %[[VAR1:.*]] = fmul float {{.*}}, -5.000000e-01 +; CHECK: %[[VAR2:.*]] = fadd float %[[VAR1]] +; CHECK: insertelement <4 x float> undef, float %[[VAR2]], i32 0 +; CHECK: shufflevector <4 x float> {{.*}}, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK: fadd <4 x float> {{.*}}, +; CHECK: store <4 x float> + +define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalias nocapture %B, float* noalias nocapture %C, i32 %N) #0 { +entry: + %cmp9 = icmp sgt i32 %N, 0 + br i1 %cmp9, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %0 = load float, float* @fp_inc, align 4 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ] + %x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv + store float %x.011, float* %arrayidx, align 4 + %add = fadd float %x.011, %0 + %conv1 = fadd float %y.012, -5.000000e-01 + %add2 = fadd float %conv1, %add + %arrayidx4 = getelementptr inbounds float, float* %B, i64 %indvars.iv + store float %add2, float* %arrayidx4, align 4 + %arrayidx6 = getelementptr inbounds float, float* %C, i64 %indvars.iv + store float %conv1, float* %arrayidx6, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +}