Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -1838,6 +1838,144 @@ return ZExt; } +namespace { + +/// \brief Represents a sum of product representation of a SCEV expression. +/// +/// The value represented by an instance of this class is "Sum[S*I for (S, I) in +/// Terms]". Note: I is allowed to be 0 for some S. +class SumOfProduct { + DenseMap Terms; + unsigned BitWidth; + +public: + /// \brief Construct a SumOfProduct instance with the given bit width. + SumOfProduct(unsigned BitWidth) : BitWidth(BitWidth) {} + + /// \brief Return the coefficient for a specific SCEV term. + APInt &operator[](const SCEV *S); + + /// \brief Simplify the entire SumOfProduct instance into a constant, if + /// possible. + Optional asConstant() const; + + /// \brief Obtain raw access to the terms dictionary. + const DenseMap &getTerms() const { return Terms; } +}; + + +/// \brief Simplifies a SCEV expression to a canonical sum of products form. +/// +/// The structure is reminiscent of a visitor pattern, but it isn't a "full" +/// visitor -- it only really cares about sums and products, and shortcuts out +/// of everything else. +struct SumOfProductVisitor { + ScalarEvolution &SE; + Type *Ty; + unsigned BitWidth; + + /// \brief Helper function used by visitMul. + void recursivelySimplifyProduct(ArrayRef Terms, unsigned Idx, + SmallVector &TermStack, + APInt Scale, SumOfProduct &Result); +public: + explicit SumOfProductVisitor(ScalarEvolution &SE, Type *Ty) + : SE(SE), Ty(Ty), BitWidth(SE.getTypeSizeInBits(Ty)) {} + + /// \brief Map a sum into a canonical SumOfProduct instance. + /// + /// It *adds* the sum of product representation of Sum[Ops] to Result. + void visitSum(ArrayRef Ops, SumOfProduct &Result); + + /// \brief Map a product into a canonical SumOfProduct instance. + /// + /// It *adds* the sum of product representation of Mul[Ops] to Result. + void visitMul(ArrayRef Ops, SumOfProduct &Result); +}; +} + +APInt &SumOfProduct::operator[](const SCEV *S) { + auto It = Terms.find(S); + if (It == Terms.end()) { + // By default we'll end up with i1 APInt's, which we won't be able to + // multiply with iBitWidth APInts. + Terms[S] = APInt(BitWidth, 0); + It = Terms.find(S); + } + + return It->second; +} + +Optional SumOfProduct::asConstant() const { + APInt Total(BitWidth, 0); + for (const auto &T : Terms) + if (!T.second.isMinValue()) { + auto *SC = dyn_cast(T.first); + if (!SC) + return None; + + Total += SC->getValue()->getValue() * T.second; + } + + return Total; +} + +void SumOfProductVisitor:: +recursivelySimplifyProduct(ArrayRef MulOps, unsigned Idx, + SmallVector &TermStack, APInt Scale, + SumOfProduct &Result) { + if (Idx == MulOps.size()) { + // getMulExpr modifies the Ops's array in place, so we have to create a copy + // here. + auto TermStackCopy = TermStack; + Result[SE.getMulExpr(TermStackCopy)] += Scale; + return; + } + + for (const auto &T : MulOps[Idx].getTerms()) { + TermStack.push_back(T.first); + recursivelySimplifyProduct(MulOps, Idx + 1, TermStack, Scale * T.second, + Result); + TermStack.pop_back(); + } +} + +void SumOfProductVisitor::visitMul(ArrayRef Ops, + SumOfProduct &Result) { + SmallVector AddOps; + SmallVector OtherOps; + APInt ConstOp(BitWidth, 1); + + for (auto *S : Ops) { + if (auto *SA = dyn_cast(S)) { + AddOps.emplace_back(BitWidth); + visitSum({SA->op_begin(), SA->op_end()}, AddOps.back()); + } else if (auto *SC = dyn_cast(S)) + ConstOp *= SC->getValue()->getValue(); + else + OtherOps.push_back(S); + } + + if (AddOps.empty()) { + Result[SE.getMulExpr(OtherOps)] += ConstOp; + return; + } + + recursivelySimplifyProduct(AddOps, 0, OtherOps, ConstOp, Result); +} + +void SumOfProductVisitor::visitSum(ArrayRef Ops, + SumOfProduct &Result) { + for (const SCEV *S : Ops) { + if (auto *SC = dyn_cast(S)) + Result[SE.getOne(Ty)] += SC->getValue()->getValue(); + else if (auto *SM = dyn_cast(S)) + visitMul({SM->op_begin(), SM->op_end()}, Result); + else + Result[S]++; + } +} + /// CollectAddOperandsWithScales - Process the given Ops list, which is /// a list of operands to be added under the given scale, update the given /// map. This is a helper function for getAddRecExpr. As an example of @@ -2146,6 +2284,16 @@ DenseMap M; SmallVector NewOps; APInt AccumulatedConstant(BitWidth, 0); + + { + SumOfProduct Result(BitWidth); + SumOfProductVisitor SOPVisitor(*this, Ty); + SOPVisitor.visitSum(Ops, Result); + + if (auto Val = Result.asConstant()) + return cast(getConstant(Val.getValue())); + } + if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Ops.data(), Ops.size(), APInt(BitWidth, 1), *this)) { Index: test/CodeGen/PowerPC/pr25170.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/pr25170.ll @@ -0,0 +1,93 @@ +; RUN: llc < %s + +; Checks for a crash + +target datalayout = "e-p:64:64-i64:64-n32:64" +target triple = "powerpc64le-linux" + +%struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144 = type <{ [220 x i8] }> + +@.BSS1 = external unnamed_addr global %struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144, align 32 + +; Function Attrs: noinline nounwind +define void @ety2_() #0 { +L.entry: + %0 = load i32, i32* undef, align 4 + %1 = sext i32 %0 to i64 + %2 = shl nsw i64 %1, 3 + %3 = add nsw i64 %2, 8 + br label %L.LB1_425 + +L.LB1_425: ; preds = %L.LB1_427, %L.entry + %4 = phi i64 [ %21, %L.LB1_427 ], [ undef, %L.entry ] + br i1 undef, label %L.LB1_427, label %L.LB1_816 + +L.LB1_816: ; preds = %L.LB1_425 + switch i32 undef, label %L.LB1_432 [ + i32 30, label %L.LB1_805 + i32 10, label %L.LB1_451 + i32 20, label %L.LB1_451 + ] + +L.LB1_451: ; preds = %L.LB1_816, %L.LB1_816 + unreachable + +L.LB1_432: ; preds = %L.LB1_816 + %.in.31 = lshr i64 %4, 32 + %5 = trunc i64 %.in.31 to i32 + br i1 undef, label %L.LB1_769, label %L.LB1_455 + +L.LB1_455: ; preds = %L.LB1_432 + unreachable + +L.LB1_769: ; preds = %L.LB1_432 + %6 = sext i32 %5 to i64 + %7 = add nsw i64 %6, 2 + %8 = add nsw i64 %6, -1 + %9 = mul i64 %8, %1 + %10 = add i64 %9, %7 + %11 = shl i64 %10, 3 + %12 = getelementptr i8, i8* undef, i64 %11 + %13 = mul nsw i64 %6, %1 + %14 = add i64 %7, %13 + %15 = shl i64 %14, 3 + %16 = getelementptr i8, i8* undef, i64 %15 + br i1 undef, label %L.LB1_662, label %L.LB1_662.prol + +L.LB1_662.prol: ; preds = %L.LB1_662.prol, %L.LB1_769 + %indvars.iv.next20.prol = add nuw nsw i64 undef, 1 + br i1 undef, label %L.LB1_662, label %L.LB1_662.prol, !llvm.loop !0 + +L.LB1_662: ; preds = %L.LB1_437.2, %L.LB1_662.prol, %L.LB1_769 + %indvars.iv19 = phi i64 [ %indvars.iv.next20.3, %L.LB1_437.2 ], [ 0, %L.LB1_769 ], [ %indvars.iv.next20.prol, %L.LB1_662.prol ] + %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1 + %17 = mul i64 %indvars.iv.next20, %3 + %18 = getelementptr i8, i8* %16, i64 %17 + %19 = bitcast i8* %18 to double* + store double 0.000000e+00, double* %19, align 8 + %indvars.iv.next20.1 = add nsw i64 %indvars.iv19, 2 + %20 = mul i64 %indvars.iv.next20.1, %3 + br i1 undef, label %L.LB1_437.2, label %L.LB1_824.2 + +L.LB1_427: ; preds = %L.LB1_425 + %21 = load i64, i64* bitcast (i8* getelementptr inbounds (%struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144, %struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144* @.BSS1, i64 0, i32 0, i64 8) to i64*), align 8 + br label %L.LB1_425 + +L.LB1_805: ; preds = %L.LB1_816 + ret void + +L.LB1_824.2: ; preds = %L.LB1_662 + %22 = getelementptr i8, i8* %12, i64 %20 + %23 = bitcast i8* %22 to double* + store double 0.000000e+00, double* %23, align 8 + br label %L.LB1_437.2 + +L.LB1_437.2: ; preds = %L.LB1_824.2, %L.LB1_662 + %indvars.iv.next20.3 = add nsw i64 %indvars.iv19, 4 + br label %L.LB1_662 +} + +attributes #0 = { noinline nounwind } + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.disable"}