Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -82,12 +82,20 @@ FoldingSetNodeIDRef FastID; // The SCEV baseclass this node corresponds to - const unsigned short SCEVType; + const unsigned short SCEVType : 4; + + // A hash value for the SCEV expression rooted at this node. Represents the + // total complexity of the SCEV sub-trees referenced from the root, excluding + // the root itself. For small expressions increases in value with the increase + // of the complexity in a way consistent with CompareSCEVComplexity, for large + // expressions behaves just like a hash. + const unsigned NestedComplexityRank : 25; + static constexpr unsigned NCMask = 0x1FFFFFF; protected: /// This field is initialized to zero and may be used in subclasses to store /// miscellaneous information. - unsigned short SubclassData = 0; + unsigned short SubclassData : 3; public: /// NoWrapFlags are bitfield indices into SubclassData. @@ -116,13 +124,30 @@ NoWrapMask = (1 << 3) - 1 }; - explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) - : FastID(ID), SCEVType(SCEVTy) {} + explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy, + unsigned NCRank = 1) + : FastID(ID), SCEVType(SCEVTy), + // As the actual complexity rank of the entire expression including the + // root is computed as ((SCEVType + 1U) * NestedComplexityRank) we need + // to avoid flushing the nested complexity rank to zero when it wraps. + NestedComplexityRank((NCRank & NCMask) ? NCRank : 1), + SubclassData(FlagAnyWrap) {} + SCEV(const SCEV &) = delete; SCEV &operator=(const SCEV &) = delete; unsigned getSCEVType() const { return SCEVType; } + /// Return a rank used to compute the nested complexity of a parent SCEV node. + /// Has a higher chance of wrapping around than NestedComplexityRank, + /// therefore use getNestedComplexityRank() instead to compare SCEVs by + /// complexity if they are known to have the same root type. + unsigned getComplexityRank() const { + return (SCEVType + 1U) * getNestedComplexityRank(); + } + /// Return a rank used to compare complexities of same root type SCEVs. + unsigned getNestedComplexityRank() const { return NestedComplexityRank; } + /// Return the LLVM type of this SCEV expression. Type *getType() const; Index: include/llvm/Analysis/ScalarEvolutionExpressions.h =================================================================== --- include/llvm/Analysis/ScalarEvolutionExpressions.h +++ include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -25,6 +25,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include #include #include @@ -142,9 +143,9 @@ const SCEV *const *Operands; size_t NumOperands; - SCEVNAryExpr(const FoldingSetNodeIDRef ID, - enum SCEVTypes T, const SCEV *const *O, size_t N) - : SCEV(ID, T), Operands(O), NumOperands(N) {} + SCEVNAryExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T, + const SCEV *const *O, size_t N, unsigned NCRank) + : SCEV(ID, T, NCRank), Operands(O), NumOperands(N) {} public: size_t getNumOperands() const { return NumOperands; } @@ -194,9 +195,16 @@ /// This node is the base class for n'ary commutative operators. class SCEVCommutativeExpr : public SCEVNAryExpr { protected: - SCEVCommutativeExpr(const FoldingSetNodeIDRef ID, - enum SCEVTypes T, const SCEV *const *O, size_t N) - : SCEVNAryExpr(ID, T, O, N) {} + SCEVCommutativeExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T, + const SCEV *const *O, size_t N) + : SCEVNAryExpr(ID, T, O, N, + // All subclasses are also associative operations, not + // just commutative, therefore all the operands have + // equally weighted contributions to the complexity. + std::accumulate(O, O + N, 0U, + [](unsigned Acc, const SCEV *const Op) { + return Acc + Op->getComplexityRank(); + })) {} public: /// Methods for support type inquiry through isa, cast, and dyn_cast: @@ -258,7 +266,10 @@ const SCEV *RHS; SCEVUDivExpr(const FoldingSetNodeIDRef ID, const SCEV *lhs, const SCEV *rhs) - : SCEV(ID, scUDivExpr), LHS(lhs), RHS(rhs) {} + : SCEV(ID, scUDivExpr, + // For consistency with lexicographical order weigh LHS higher. + lhs->getComplexityRank() * scMulExpr + rhs->getComplexityRank()), + LHS(lhs), RHS(rhs) {} public: const SCEV *getLHS() const { return LHS; } @@ -292,9 +303,17 @@ const Loop *L; - SCEVAddRecExpr(const FoldingSetNodeIDRef ID, - const SCEV *const *O, size_t N, const Loop *l) - : SCEVNAryExpr(ID, scAddRecExpr, O, N), L(l) {} + SCEVAddRecExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N, + const Loop *l) + : SCEVNAryExpr(ID, scAddRecExpr, O, N, + // For consistency with lexicographical order weigh Start + // higher than Step, and Step higher than x^2-Step. + std::accumulate(O, O + N, 0U, + [](unsigned Acc, const SCEV *Op) { + return Acc * scMulExpr + + Op->getComplexityRank(); + })), + L(l) {} public: const SCEV *getStart() const { return Operands[0]; } Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -414,9 +414,9 @@ return getConstant(ConstantInt::get(ITy, V, isSigned)); } -SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, - unsigned SCEVTy, const SCEV *op, Type *ty) - : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} +SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, unsigned SCEVTy, + const SCEV *op, Type *ty) + : SCEV(ID, SCEVTy, op->getComplexityRank()), Op(op), Ty(ty) {} SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) @@ -692,9 +692,10 @@ if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; - // Compare NoWrap flags. - if (LA->getNoWrapFlags() != RA->getNoWrapFlags()) - return (int)LA->getNoWrapFlags() - (int)RA->getNoWrapFlags(); + const unsigned LNCRank = LA->getNestedComplexityRank(); + const unsigned RNCRank = RA->getNestedComplexityRank(); + if (LNCRank != RNCRank) + return (int)LNCRank - (int)RNCRank; // Lexicographically compare. for (unsigned i = 0; i != LNumOps; ++i) { @@ -715,15 +716,16 @@ const SCEVNAryExpr *LC = cast(LHS); const SCEVNAryExpr *RC = cast(RHS); + const unsigned LNCRank = LC->getNestedComplexityRank(); + const unsigned RNCRank = RC->getNestedComplexityRank(); + if (LNCRank != RNCRank) + return (int)LNCRank - (int)RNCRank; + // Lexicographically compare n-ary expressions. unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; - // Compare NoWrap flags. - if (LC->getNoWrapFlags() != RC->getNoWrapFlags()) - return (int)LC->getNoWrapFlags() - (int)RC->getNoWrapFlags(); - for (unsigned i = 0; i != LNumOps; ++i) { int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getOperand(i), RC->getOperand(i), DT, @@ -739,6 +741,11 @@ const SCEVUDivExpr *LC = cast(LHS); const SCEVUDivExpr *RC = cast(RHS); + const unsigned LNCRank = LC->getNestedComplexityRank(); + const unsigned RNCRank = RC->getNestedComplexityRank(); + if (LNCRank != RNCRank) + return (int)LNCRank - (int)RNCRank; + // Lexicographically compare udiv expressions. int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(), RC->getLHS(), DT, Depth + 1); @@ -757,6 +764,11 @@ const SCEVCastExpr *LC = cast(LHS); const SCEVCastExpr *RC = cast(RHS); + const unsigned LNCRank = LC->getNestedComplexityRank(); + const unsigned RNCRank = RC->getNestedComplexityRank(); + if (LNCRank != RNCRank) + return (int)LNCRank - (int)RNCRank; + // Compare cast expressions by operand. int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getOperand(), RC->getOperand(), DT, Index: test/Analysis/ScalarEvolution/max-addops-inline.ll =================================================================== --- test/Analysis/ScalarEvolution/max-addops-inline.ll +++ test/Analysis/ScalarEvolution/max-addops-inline.ll @@ -3,7 +3,7 @@ define i32 @foo(i64 %p0, i32 %p1) { ; CHECK1: %add2 = add nsw i32 %mul1, %add -; CHECK1-NEXT: --> ((trunc i64 %p0 to i32) * (1 + (trunc i64 %p0 to i32)) * (1 + %p1)) +; CHECK1-NEXT: --> ((trunc i64 %p0 to i32) * (1 + %p1) * (1 + (trunc i64 %p0 to i32))) ; CHECK10: %add2 = add nsw i32 %mul1, %add ; CHECK10-NEXT: --> ((trunc i64 %p0 to i32) * (1 + ((trunc i64 %p0 to i32) * (1 + %p1)) + %p1)) Index: test/Analysis/ScalarEvolution/min-max-exprs.ll =================================================================== --- test/Analysis/ScalarEvolution/min-max-exprs.ll +++ test/Analysis/ScalarEvolution/min-max-exprs.ll @@ -33,7 +33,7 @@ %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6 ; min(N, i+3) ; CHECK: select i1 %tmp4, i64 %tmp5, i64 %tmp6 -; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64)))))) +; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 %N to i64))) smax (-1 + (-1 * (sext i32 {3,+,1}<%bb1> to i64)))))) %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9 %tmp12 = load i32, i32* %tmp11, align 4 %tmp13 = shl nsw i32 %tmp12, 1 Index: test/Analysis/ScalarEvolution/predicated-trip-count.ll =================================================================== --- test/Analysis/ScalarEvolution/predicated-trip-count.ll +++ test/Analysis/ScalarEvolution/predicated-trip-count.ll @@ -80,7 +80,7 @@ ; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32) ; CHECK: Loop %bb3: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. -; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M)))) +; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-1 + (-1 * %M)) smax (-2 + (-1 * (sext i16 %Start to i32))))) ; CHECK-NEXT: Predicates: ; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: Index: test/Analysis/ScalarEvolution/trip-count14.ll =================================================================== --- test/Analysis/ScalarEvolution/trip-count14.ll +++ test/Analysis/ScalarEvolution/trip-count14.ll @@ -81,7 +81,7 @@ br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times ; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit -; CHECK-NEXT: Loop %do.body: backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) smax %n)) + %n) umax (-1 + (-1 * %x) + %n)))) +; CHECK-NEXT: Loop %do.body: backedge-taken count is (-1 + (-1 * ((-1 + (-1 * %x) + %n) umax (-1 + (-1 * ((2 + %n) smax %n)) + %n)))) ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}} do.end: @@ -169,7 +169,7 @@ br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times ; CHECK-LABEL: Determining loop execution counts for: @u32_max2_unpredictable_exit -; CHECK-NEXT: Loop %do.body: backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) umax %n)) + %n) umax (-1 + (-1 * %x) + %n)))) +; CHECK-NEXT: Loop %do.body: backedge-taken count is (-1 + (-1 * ((-1 + (-1 * %x) + %n) umax (-1 + (-1 * ((2 + %n) umax %n)) + %n)))) ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}} do.end: Index: test/Transforms/IRCE/conjunctive-checks.ll =================================================================== --- test/Transforms/IRCE/conjunctive-checks.ll +++ test/Transforms/IRCE/conjunctive-checks.ll @@ -5,10 +5,10 @@ ; CHECK-LABEL: @f_0( ; CHECK: loop.preheader: -; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]] +; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len +; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] +; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 Index: test/Transforms/IRCE/ranges_of_different_types.ll =================================================================== --- test/Transforms/IRCE/ranges_of_different_types.ll +++ test/Transforms/IRCE/ranges_of_different_types.ll @@ -151,11 +151,11 @@ ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len ; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 -; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, [[SMAX1]] +; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], %len ; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 ; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 ; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] @@ -344,11 +344,11 @@ ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len ; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 -; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, [[SMAX1]] +; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], %len ; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 ; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 ; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] Index: test/Transforms/IRCE/rc-negative-bound.ll =================================================================== --- test/Transforms/IRCE/rc-negative-bound.ll +++ test/Transforms/IRCE/rc-negative-bound.ll @@ -112,18 +112,18 @@ ; CHECK-NEXT: [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 -1, [[BOUND]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1 -; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1 -; CHECK-NEXT: [[TMP5:%.*]] = sub i32 -1, [[SMAX1]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1 -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SMAX2]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]] +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 -1, [[SMAX]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], -1 +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 -1 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SMAX1]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[BOUND]], -2147483647 +; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], 0 +; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[BOUND]], [[SMAX2]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = sub i32 -1, [[N]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] @@ -214,13 +214,13 @@ ; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 -1, [[SMAX]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1 -; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[SMAX1]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 -1, [[SMAX]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], -1 +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 -1 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SMAX1]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[BOUND]], [[SMAX]] +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = sub i32 -1, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[N]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]] @@ -411,18 +411,18 @@ ; CHECK-NEXT: [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 -1, [[BOUND]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1 -; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1 -; CHECK-NEXT: [[TMP5:%.*]] = sub i32 -1, [[SMAX1]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1 -; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SMAX2]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]] +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 -1, [[SMAX]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], -1 +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 -1 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SMAX1]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[BOUND]], -2147483647 +; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], 0 +; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[BOUND]], [[SMAX2]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = sub i32 -1, [[N]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] @@ -515,13 +515,13 @@ ; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 -1, [[SMAX]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1 -; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[SMAX1]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 -1, [[SMAX]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], -1 +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 -1 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SMAX1]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[BOUND]], [[SMAX]] +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = sub i32 -1, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[N]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]] Index: test/Transforms/IRCE/single-access-no-preloop.ll =================================================================== --- test/Transforms/IRCE/single-access-no-preloop.ll +++ test/Transforms/IRCE/single-access-no-preloop.ll @@ -86,10 +86,10 @@ ; CHECK-LABEL: @single_access_no_preloop_with_offset( ; CHECK: loop.preheader: -; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]] +; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len +; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] +; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 Index: test/Transforms/IRCE/single-access-with-preloop.ll =================================================================== --- test/Transforms/IRCE/single-access-with-preloop.ll +++ test/Transforms/IRCE/single-access-with-preloop.ll @@ -49,13 +49,13 @@ ; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 [[safe_offset_mainloop]], -1 ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. ; CHECK: [[not_safe_upper_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len -; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_n]] -; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_n]] ; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0 ; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0 ; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 [[safe_offset_mainloop_2]], -2147483648 -; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_safe_lower_end]] -; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_safe_lower_end]] +; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_safe_lower_end]] +; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_safe_lower_end]] +; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_n]] +; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_n]] ; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_hiclamp]] ; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_hiclamp]], 0 ; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_hiclamp]], i32 0 Index: test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll =================================================================== --- /dev/null +++ test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll @@ -0,0 +1,76 @@ +; RUN: opt -load-store-vectorizer %s -S | FileCheck %s + +; Check that setting wrapping flags after a SCEV node is created +; does not invalidate "sorted by complexity" invariant for +; operands of commutative and associative SCEV operators. + +target triple = "x86_64--" + +@global_value0 = external constant i32 +@global_value1 = external constant i32 +@other_value = external global float +@a = external global float +@b = external global float +@c = external global float +@d = external global float +@plus1 = external global i32 +@cnd = external global i8 + +; Function Attrs: nounwind +define void @main() local_unnamed_addr #0 { +; CHECK-LABEL: @main() +; CHECK: [[PTR:%[0-9]+]] = bitcast float* %preheader.load0.address to <2 x float>* +; CHECK: = load <2 x float>, <2 x float>* [[PTR]] +; CHECK-LABEL: for.body23: +entry: + %tmp = load i32, i32* @global_value0, !range !0 + %tmp2 = load i32, i32* @global_value1 + %and.i.i = and i32 %tmp2, 2 + %add.nuw.nsw.i.i = add nuw nsw i32 %and.i.i, 0 + %mul.i.i = shl nuw nsw i32 %add.nuw.nsw.i.i, 1 + %and6.i.i = and i32 %tmp2, 3 + %and9.i.i = and i32 %tmp2, 4 + %add.nuw.nsw10.i.i = add nuw nsw i32 %and6.i.i, %and9.i.i + %conv3.i42.i = add nuw nsw i32 %mul.i.i, 1 + %reass.add346.7 = add nuw nsw i32 %add.nuw.nsw10.i.i, 56 + %reass.mul347.7 = mul nuw nsw i32 %tmp, %reass.add346.7 + %add7.i.7 = add nuw nsw i32 %reass.mul347.7, 0 + %preheader.address0.idx = add nuw nsw i32 %add7.i.7, %mul.i.i + %preheader.address0.idx.zext = zext i32 %preheader.address0.idx to i64 + %preheader.load0.address = getelementptr inbounds float, float* @other_value, i64 %preheader.address0.idx.zext + %preheader.load0. = load float, float* %preheader.load0.address, align 4, !tbaa !1 + %common.address.idx = add nuw nsw i32 %add7.i.7, %conv3.i42.i + %preheader.header.common.address.idx.zext = zext i32 %common.address.idx to i64 + %preheader.load1.address = getelementptr inbounds float, float* @other_value, i64 %preheader.header.common.address.idx.zext + %preheader.load1. = load float, float* %preheader.load1.address, align 4, !tbaa !1 + br label %for.body23 + +for.body23: ; preds = %for.body23, %entry + %loop.header.load0.address = getelementptr inbounds float, float* @other_value, i64 %preheader.header.common.address.idx.zext + %loop.header.load0. = load float, float* %loop.header.load0.address, align 4, !tbaa !1 + %reass.mul343.7 = mul nuw nsw i32 %reass.add346.7, 72 + %add7.i286.7.7 = add nuw nsw i32 %reass.mul343.7, 56 + %add9.i288.7.7 = add nuw nsw i32 %add7.i286.7.7, %mul.i.i + %loop.header.address1.idx = add nuw nsw i32 %add9.i288.7.7, 1 + %loop.header.address1.idx.zext = zext i32 %loop.header.address1.idx to i64 + %loop.header.load1.address = getelementptr inbounds float, float* @other_value, i64 %loop.header.address1.idx.zext + %loop.header.load1. = load float, float* %loop.header.load1.address, align 4, !tbaa !1 + store float %preheader.load0., float* @a, align 4, !tbaa !1 + store float %preheader.load1., float* @b, align 4, !tbaa !1 + store float %loop.header.load0., float* @c, align 4, !tbaa !1 + store float %loop.header.load1., float* @d, align 4, !tbaa !1 + %loaded.cnd = load i8, i8* @cnd + %condition = trunc i8 %loaded.cnd to i1 + br i1 %condition, label %for.body23, label %exit + +exit: + ret void +} + +attributes #0 = { nounwind } + +!0 = !{i32 0, i32 65536} +!1 = !{!2, !2, i64 0} +!2 = !{!"float", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C++ TBAA"}