diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -118,6 +118,19 @@ NoWrapMask = (1 << 3) - 1 }; + /// HasNonIntegralPointerFlag are bitfield indices into SubclassData. + /// + /// When constructing SCEV expressions for LLVM expressions with non-integral + /// pointer types, some additional processing is required to ensure that we + /// don't introduce any illegal transformations. However, non-integral pointer + /// types are a very rarely used feature, so we want to make sure to only do + /// such processing if they are actually used. To ensure minimal performance + /// impact, we memoize that fact in using these flags. + enum HasNonIntegralPointerFlag { + FlagNoNIPointers = 0, + FlagHasNIPointers = (1 << 3) + }; + explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy, unsigned short ExpressionSize) : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {} @@ -154,6 +167,10 @@ return ExpressionSize; } + bool hasNonIntegralPointers() const { + return SubclassData & FlagHasNIPointers; + } + /// Print out the internal representation of this scalar to the specified /// stream. This should really only be used for debugging purposes. void print(raw_ostream &OS) const; @@ -719,9 +736,8 @@ unsigned getSmallConstantTripMultiple(const Loop *L, BasicBlock *ExitingBlock); - /// The terms "backedge taken count" and "exit count" are used - /// interchangeably to refer to the number of times the backedge of a loop + /// interchangeably to refer to the number of times the backedge of a loop /// has executed before the loop is exited. enum ExitCountKind { /// An expression exactly describing the number of times the backedge has @@ -732,11 +748,11 @@ }; /// Return the number of times the backedge executes before the given exit - /// would be taken; if not exactly computable, return SCEVCouldNotCompute. + /// would be taken; if not exactly computable, return SCEVCouldNotCompute. /// For a single exit loop, this value is equivelent to the result of /// getBackedgeTakenCount. The loop is guaranteed to exit (via *some* exit) /// before the backedge is executed (ExitCount + 1) times. Note that there - /// is no guarantee about *which* exit is taken on the exiting iteration. + /// is no guarantee about *which* exit is taken on the exiting iteration. const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock, ExitCountKind Kind = Exact); @@ -765,7 +781,7 @@ /// SCEVCouldNotCompute object. const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L) { return getBackedgeTakenCount(L, ConstantMaximum); - } + } /// Return true if the backedge taken count is either the value returned by /// getConstantMaxBackedgeTakenCount or zero. diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h --- a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -188,6 +188,13 @@ return getNoWrapFlags(FlagNW) != FlagAnyWrap; } + void setHasNIPtr(bool HasNIPtr) { + if (HasNIPtr) + SubclassData |= FlagHasNIPointers; + else + SubclassData &= ~FlagHasNIPointers; + } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || @@ -222,24 +229,54 @@ class SCEVAddExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; + protected: SCEVAddExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N) : SCEVCommutativeExpr(ID, scAddExpr, O, N) {} public: - Type *getType() const { - // Use the type of the last operand, which is likely to be a pointer - // type, if there is one. This doesn't usually matter, but it can help - // reduce casts when the expressions are expanded. - return getOperand(getNumOperands() - 1)->getType(); + /// Returns the type of the add expression, by looking either at the last + /// operand or deferring to the SCEVAddNIExpr subclass for non-integral + /// pointers. + Type *getType() const; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr; } + }; + + /// This node represents an addition of some number of SCEVs, one which + /// is a non-integral pointer type, requiring us to know the type exactly for + /// correctness. + class SCEVAddNIExpr : public SCEVAddExpr { + friend class ScalarEvolution; + PointerType *NIType; + + SCEVAddNIExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N, + PointerType *NIType) + : SCEVAddExpr(ID, O, N), NIType(NIType) { + SubclassData |= FlagHasNIPointers; } + public: + Type *getType() const { return NIType; } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S) { - return S->getSCEVType() == scAddExpr; + return S->getSCEVType() == scAddExpr && S->hasNonIntegralPointers(); } }; + inline Type *SCEVAddExpr::getType() const { + // In general, use the type of the last operand, which is likely to be a + // pointer type, if there is one. This doesn't usually matter, but it can + // help reduce casts when the expressions are expanded. In the (unusual) + // case that we're working with non-integral pointers, we have a subclass + // that stores that type explicitly. + if (hasNonIntegralPointers()) + return cast(this)->getType(); + return getOperand(getNumOperands() - 1)->getType(); + } + /// This node represents multiplication of some number of SCEVs. class SCEVMulExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; @@ -249,6 +286,18 @@ : SCEVCommutativeExpr(ID, scMulExpr, O, N) {} public: + Type *getType() const { + // In general, we can't form SCEVMulExprs with non-integral pointer types, + // but for the moment we need to allow a special case: Multiplying by + // -1 to be able express the difference between two pointers. In order + // to maintain the invariant that SCEVs with the NI flag set should have + // a type corresponding to the contained NI ptr, we need to return the + // type of the pointer here. + if (hasNonIntegralPointers()) + return getOperand(getNumOperands() - 1)->getType(); + return SCEVCommutativeExpr::getType(); + } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S) { return S->getSCEVType() == scMulExpr; @@ -475,9 +524,12 @@ /// instances owned by a ScalarEvolution. SCEVUnknown *Next; - SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, - ScalarEvolution *se, SCEVUnknown *next) : - SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {} + SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, ScalarEvolution *se, + SCEVUnknown *next, bool ValueIsNIPtr) + : SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) { + if (ValueIsNIPtr) + SubclassData |= FlagHasNIPointers; + } // Implement CallbackVH. void deleted() override; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -369,12 +369,13 @@ case scSignExtend: return cast(this)->getType(); case scAddRecExpr: - case scMulExpr: case scUMaxExpr: case scSMaxExpr: case scUMinExpr: case scSMinExpr: return cast(this)->getType(); + case scMulExpr: + return cast(this)->getType(); case scAddExpr: return cast(this)->getType(); case scUDivExpr: @@ -2449,8 +2450,9 @@ } // Limit recursion calls depth. - if (Depth > MaxArithDepth || hasHugeExpression(Ops)) + if (Depth > MaxArithDepth || hasHugeExpression(Ops)) { return getOrCreateAddExpr(Ops, Flags); + } // Okay, check to see if the same value occurs in the operand list more than // once. If so, merge them together into an multiply expression. Since we @@ -2791,16 +2793,27 @@ SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddExpr); - for (const SCEV *Op : Ops) - ID.AddPointer(Op); + bool HasNIPtr = false; + PointerType *NIPtrType = nullptr; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + ID.AddPointer(Ops[i]); + if (Ops[i]->hasNonIntegralPointers()) { + HasNIPtr = true; + NIPtrType = cast(Ops[i]->getType()); + } + } void *IP = nullptr; SCEVAddExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); - S = new (SCEVAllocator) - SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); + if (HasNIPtr) + S = new (SCEVAllocator) + SCEVAddNIExpr(ID.Intern(SCEVAllocator), O, Ops.size(), NIPtrType); + else + S = new (SCEVAllocator) + SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); } @@ -2813,8 +2826,10 @@ const Loop *L, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddRecExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + assert(i == 0 || !Ops[i]->hasNonIntegralPointers()); ID.AddPointer(Ops[i]); + } ID.AddPointer(L); void *IP = nullptr; SCEVAddRecExpr *S = @@ -2828,6 +2843,7 @@ addToLoopUseLists(S); } S->setNoWrapFlags(Flags); + S->setHasNIPtr(Ops[0]->hasNonIntegralPointers()); return S; } @@ -2836,8 +2852,11 @@ SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scMulExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) + bool HasNIPtr = false; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + HasNIPtr |= Ops[i]->hasNonIntegralPointers(); ID.AddPointer(Ops[i]); + } void *IP = nullptr; SCEVMulExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); @@ -2850,6 +2869,7 @@ addToLoopUseLists(S); } S->setNoWrapFlags(Flags); + S->setHasNIPtr(HasNIPtr); return S; } @@ -3673,8 +3693,11 @@ return ExistingSCEV; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); - SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr( + SCEVMinMaxExpr *S = new (SCEVAllocator) SCEVMinMaxExpr( ID.Intern(SCEVAllocator), static_cast(Kind), O, Ops.size()); + // For MinMaxExprs it's sufficient to see if the first Op has NI data, as the + // operands all need to be of the same type. + S->setHasNIPtr(Ops[0]->hasNonIntegralPointers()); UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); @@ -3751,8 +3774,9 @@ "Stale SCEVUnknown in uniquing map!"); return S; } + bool ValueIsNIPtr = getDataLayout().isNonIntegralPointerType(V->getType()); SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, - FirstUnknown); + FirstUnknown, ValueIsNIPtr); FirstUnknown = cast(S); UniqueSCEVs.InsertNode(S, IP); return S; diff --git a/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll b/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll --- a/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll @@ -2,7 +2,7 @@ ; Address Space 10 is non-integral. The optimizer is not allowed to use ; ptrtoint/inttoptr instructions. Make sure that this doesn't happen -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" target triple = "x86_64-unknown-linux-gnu" define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) { @@ -43,3 +43,36 @@ done: ; preds = %if38 ret void } + +; This is a bugpoint-reduced regression test - It doesn't make too much sense by itself, +; but creates the correct SCEV expressions to reproduce the issue. See +; https://github.com/JuliaLang/julia/issues/31156 for the original bug report. +define void @"japi1_permutedims!_4259"(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i1 %g, i8 addrspace(13)* %base) #0 { +; CHECK-NOT: inttoptr +; CHECK-NOT: ptrtoint +; CHECK: getelementptr i8, i8 addrspace(13)* {{.*}}, i64 {{.*}} +top: + br label %L42.L46_crit_edge.us + +L42.L46_crit_edge.us: ; preds = %L82.us.us.loopexit, %top + %value_phi11.us = phi i64 [ %a, %top ], [ %2, %L82.us.us.loopexit ] + %0 = sub i64 %value_phi11.us, %b + %1 = add i64 %0, %c + %spec.select = select i1 %g, i64 %d, i64 0 + br label %L62.us.us + +L82.us.us.loopexit: ; preds = %L62.us.us + %2 = add i64 %e, %value_phi11.us + br label %L42.L46_crit_edge.us + +L62.us.us: ; preds = %L62.us.us, %L42.L46_crit_edge.us + %value_phi21.us.us = phi i64 [ %6, %L62.us.us ], [ %spec.select, %L42.L46_crit_edge.us ] + %3 = add i64 %1, %value_phi21.us.us + %4 = getelementptr inbounds i8, i8 addrspace(13)* %base, i64 %3 + %5 = load i8, i8 addrspace(13)* %4, align 1 + %6 = add i64 %f, %value_phi21.us.us + br i1 %g, label %L82.us.us.loopexit, label %L62.us.us, !llvm.loop !1 +} + +!1 = distinct !{!1, !2} +!2 = !{!"llvm.loop.isvectorized", i32 1}