diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -120,6 +120,19 @@ NoWrapMask = (1 << 3) - 1 }; + /// HasNonIntegralPointerFlag are bitfield indices into SubclassData. + /// + /// When constructing SCEV expressions for LLVM expressions with non-integral + /// pointer types, some additional processing is required to ensure that we + /// don't introduce any illegal transformations. However, non-integral pointer + /// types are a very rarely used feature, so we want to make sure to only do + /// such processing if they are actually used. To ensure minimal performance + /// impact, we memoize that fact in using these flags. + enum HasNonIntegralPointerFlag { + FlagNoNIPointers = 0, + FlagHasNIPointers = (1 << 3) + }; + explicit SCEV(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, unsigned short ExpressionSize) : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {} @@ -156,6 +169,10 @@ return ExpressionSize; } + bool hasNonIntegralPointers() const { + return SubclassData & FlagHasNIPointers; + } + /// Print out the internal representation of this scalar to the specified /// stream. This should really only be used for debugging purposes. void print(raw_ostream &OS) const; @@ -790,7 +807,7 @@ const BasicBlock *ExitingBlock); /// The terms "backedge taken count" and "exit count" are used - /// interchangeably to refer to the number of times the backedge of a loop + /// interchangeably to refer to the number of times the backedge of a loop /// has executed before the loop is exited. enum ExitCountKind { /// An expression exactly describing the number of times the backedge has @@ -803,7 +820,7 @@ }; /// Return the number of times the backedge executes before the given exit - /// would be taken; if not exactly computable, return SCEVCouldNotCompute. + /// would be taken; if not exactly computable, return SCEVCouldNotCompute. /// For a single exit loop, this value is equivelent to the result of /// getBackedgeTakenCount. The loop is guaranteed to exit (via *some* exit) /// before the backedge is executed (ExitCount + 1) times. Note that there diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h --- a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -226,6 +226,13 @@ return getNoWrapFlags(FlagNW) != FlagAnyWrap; } + void setHasNIPtr(bool HasNIPtr) { + if (HasNIPtr) + SubclassData |= FlagHasNIPointers; + else + SubclassData &= ~FlagHasNIPointers; + } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || @@ -262,19 +269,16 @@ Type *Ty; + protected: SCEVAddExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N) : SCEVCommutativeExpr(ID, scAddExpr, O, N) { - auto *FirstPointerTypedOp = find_if(operands(), [](const SCEV *Op) { - return Op->getType()->isPointerTy(); - }); - if (FirstPointerTypedOp != operands().end()) - Ty = (*FirstPointerTypedOp)->getType(); - else - Ty = getOperand(0)->getType(); + } public: - Type *getType() const { return Ty; } + // Returns the type of the add expression, by looking either at the last + // operand or deferring to the SCEVAddNIExpr subclass. + Type *getType() const; /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S) { @@ -282,6 +286,46 @@ } }; + /// This node represents an addition of some number of SCEVs, one which + /// is a non-integral pointer type, requiring us to know the type exactly for + /// correctness. + class SCEVAddNIExpr : public SCEVAddExpr { + friend class ScalarEvolution; + PointerType *NIType; + + SCEVAddNIExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N, + PointerType *NIType) + : SCEVAddExpr(ID, O, N), NIType(NIType) { + SubclassData |= FlagHasNIPointers; + } + + public: + Type *getType() const { return NIType; } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scAddExpr && S->hasNonIntegralPointers(); + } + }; + + inline Type *SCEVAddExpr::getType() const { + // In general, use the type of the last operand, which is likely to be a + // pointer type, if there is one. This doesn't usually matter, but it can + // help reduce casts when the expressions are expanded. In the (unusual) + // case that we're working with non-integral pointers, we have a subclass + // that stores that type explicitly. + if (hasNonIntegralPointers()) + return cast(this)->getType(); + + auto *FirstPointerTypedOp = find_if(operands(), [](const SCEV *Op) { + return Op->getType()->isPointerTy(); + }); + if (FirstPointerTypedOp != operands().end()) + return (*FirstPointerTypedOp)->getType(); + else + return getOperand(0)->getType(); + } + /// This node represents multiplication of some number of SCEVs. class SCEVMulExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; @@ -291,7 +335,17 @@ : SCEVCommutativeExpr(ID, scMulExpr, O, N) {} public: - Type *getType() const { return getOperand(0)->getType(); } + Type *getType() const { + // In general, we can't form SCEVMulExprs with non-integral pointer types, + // but for the moment we need to allow a special case: Multiplying by + // -1 to be able express the difference between two pointers. In order + // to maintain the invariant that SCEVs with the NI flag set should have + // a type corresponding to the contained NI ptr, we need to return the + // type of the pointer here. + if (hasNonIntegralPointers()) + return getOperand(getNumOperands() - 1)->getType(); + return getOperand(0)->getType(); + } /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S) { @@ -539,9 +593,12 @@ /// instances owned by a ScalarEvolution. SCEVUnknown *Next; - SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, - ScalarEvolution *se, SCEVUnknown *next) : - SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {} + SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, ScalarEvolution *se, + SCEVUnknown *next, bool ValueIsNIPtr) + : SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) { + if (ValueIsNIPtr) + SubclassData |= FlagHasNIPointers; + } // Implement CallbackVH. void deleted() override; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1734,9 +1734,9 @@ getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); } - + // For a negative step, we can extend the operands iff doing so only - // traverses values in the range zext([0,UINT_MAX]). + // traverses values in the range zext([0,UINT_MAX]). if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - getSignedRangeMin(Step)); @@ -2856,16 +2856,27 @@ SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddExpr); - for (const SCEV *Op : Ops) - ID.AddPointer(Op); + bool HasNIPtr = false; + PointerType *NIPtrType = nullptr; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + ID.AddPointer(Ops[i]); + if (Ops[i]->hasNonIntegralPointers()) { + HasNIPtr = true; + NIPtrType = cast(Ops[i]->getType()); + } + } void *IP = nullptr; SCEVAddExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); - S = new (SCEVAllocator) - SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); + if (HasNIPtr) + S = new (SCEVAllocator) + SCEVAddNIExpr(ID.Intern(SCEVAllocator), O, Ops.size(), NIPtrType); + else + S = new (SCEVAllocator) + SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); } @@ -2878,8 +2889,10 @@ const Loop *L, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddRecExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + assert(i == 0 || !Ops[i]->hasNonIntegralPointers()); ID.AddPointer(Ops[i]); + } ID.AddPointer(L); void *IP = nullptr; SCEVAddRecExpr *S = @@ -2893,6 +2906,7 @@ addToLoopUseLists(S); } setNoWrapFlags(S, Flags); + S->setHasNIPtr(Ops[0]->hasNonIntegralPointers()); return S; } @@ -2901,8 +2915,11 @@ SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scMulExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) + bool HasNIPtr = false; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + HasNIPtr |= Ops[i]->hasNonIntegralPointers(); ID.AddPointer(Ops[i]); + } void *IP = nullptr; SCEVMulExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); @@ -2915,6 +2932,7 @@ addToLoopUseLists(S); } S->setNoWrapFlags(Flags); + S->setHasNIPtr(HasNIPtr); return S; } @@ -3803,8 +3821,11 @@ return ExistingSCEV; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); - SCEV *S = new (SCEVAllocator) + SCEVMinMaxExpr *S = new (SCEVAllocator) SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size()); + // For MinMaxExprs it's sufficient to see if the first Op has NI data, as the + // operands all need to be of the same type. + S->setHasNIPtr(Ops[0]->hasNonIntegralPointers()); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); @@ -3904,8 +3925,9 @@ "Stale SCEVUnknown in uniquing map!"); return S; } + bool ValueIsNIPtr = getDataLayout().isNonIntegralPointerType(V->getType()); SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, - FirstUnknown); + FirstUnknown, ValueIsNIPtr); FirstUnknown = cast(S); UniqueSCEVs.InsertNode(S, IP); return S; @@ -11502,7 +11524,7 @@ bool ScalarEvolution::canIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned) { - + unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *One = getOne(Stride->getType()); diff --git a/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll b/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll --- a/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll @@ -2,7 +2,7 @@ ; Address Space 10 is non-integral. The optimizer is not allowed to use ; ptrtoint/inttoptr instructions. Make sure that this doesn't happen -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" target triple = "x86_64-unknown-linux-gnu" define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) { @@ -43,3 +43,36 @@ done: ; preds = %if38 ret void } + +; This is a bugpoint-reduced regression test - It doesn't make too much sense by itself, +; but creates the correct SCEV expressions to reproduce the issue. See +; https://github.com/JuliaLang/julia/issues/31156 for the original bug report. +define void @"japi1_permutedims!_4259"(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i1 %g, i8 addrspace(13)* %base) #0 { +; CHECK-NOT: inttoptr +; CHECK-NOT: ptrtoint +; CHECK: getelementptr i8, i8 addrspace(13)* {{.*}}, i64 {{.*}} +top: + br label %L42.L46_crit_edge.us + +L42.L46_crit_edge.us: ; preds = %L82.us.us.loopexit, %top + %value_phi11.us = phi i64 [ %a, %top ], [ %2, %L82.us.us.loopexit ] + %0 = sub i64 %value_phi11.us, %b + %1 = add i64 %0, %c + %spec.select = select i1 %g, i64 %d, i64 0 + br label %L62.us.us + +L82.us.us.loopexit: ; preds = %L62.us.us + %2 = add i64 %e, %value_phi11.us + br label %L42.L46_crit_edge.us + +L62.us.us: ; preds = %L62.us.us, %L42.L46_crit_edge.us + %value_phi21.us.us = phi i64 [ %6, %L62.us.us ], [ %spec.select, %L42.L46_crit_edge.us ] + %3 = add i64 %1, %value_phi21.us.us + %4 = getelementptr inbounds i8, i8 addrspace(13)* %base, i64 %3 + %5 = load i8, i8 addrspace(13)* %4, align 1 + %6 = add i64 %f, %value_phi21.us.us + br i1 %g, label %L82.us.us.loopexit, label %L62.us.us, !llvm.loop !1 +} + +!1 = distinct !{!1, !2} +!2 = !{!"llvm.loop.isvectorized", i32 1}