diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h --- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h @@ -45,7 +45,7 @@ // The `y`, `v`, and `op` parameters either must or must not be // `kInvalidId`/`nullptr`, depending on the value of the `k` parameter; // however, they have uniform C++ types regardless of the value of `k`. - TensorExp(Kind k, unsigned x, ExprId y, Value v, Operation *op); + TensorExp(Kind k, unsigned x, ExprId y, Value v, Operation *op, Attribute a); /// Tensor expression kind. Kind kind; @@ -71,6 +71,10 @@ /// kBinaryBranch, this holds the YieldOp for the left or right half /// to be merged into a nested scf loop. Operation *op; + + /// An optional attribute that is required to determine the semantics of the + /// operations. E.g., CmpPredicateAttr for CmpI/CmpF operations. + Attribute attr; }; /// Tensor expression kind. @@ -79,6 +83,10 @@ /// That is, its argument is a `LoopId` identifying the loop-variable /// in question, and its value will be the current iteration's value /// of that loop-variable. See the `LoopId` documentation for more details. +/// +/// The `kSynZero` leaf kind is for representing a synthetic zero value, which +/// can be introduced when sparsifying operations like `arith::cmp` to generate +/// `arith::cmp %lhs, %syn_zero` when the rhs operand is absent. // // TODO: Modify this definition so that the numeric values already encode // the `ExpArity` (while extending the notion of "arity" to include not @@ -89,6 +97,7 @@ enum class TensorExp::Kind { // Leaf. kTensor = 0, + kSynZero, kInvariant, kLoopVar, // Unary operations. @@ -143,6 +152,8 @@ kAndI, kOrI, kXorI, + kCmpI, + kCmpF, kShrS, // signed kShrU, // unsigned kShlI, @@ -246,13 +257,16 @@ ExprId addLoopVarExp(LoopId i); /// Constructs a new invariant expression, and returns its identifier. ExprId addInvariantExp(Value v); + /// Constructs a new synthetic zero expression. + ExprId addSynZeroExp(); /// Constructs a new unary or binary expression, and returns its identifier. ExprId addExp(TensorExp::Kind k, ExprId e0, ExprId e1 = detail::kInvalidId, - Operation *op = nullptr); + Operation *op = nullptr, Attribute attr = nullptr); /// Constructs a new sesquinary expression, and returns its identifier. /// Currently no sesquinary `Kind` allows specifying the `op`, but we /// allow it anyways because `mapSet` is designed to allow it. - ExprId addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op = nullptr); + ExprId addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op = nullptr, + Attribute attr = nullptr); /// Constructs a new iteration lattice point, and returns its identifier. LatPointId addLat(TensorId t, LoopId i, ExprId e); @@ -265,26 +279,29 @@ /// of `LoopId` (effectively constructing a larger "intersection" of those /// loops) with a newly constructed tensor (sub)expression of given kind. /// Returns the identifier of the new lattice point. - LatPointId conjLat(TensorExp::Kind kind, LatPointId p0, LatPointId p1, + LatPointId conjLat(ExprId e, LatPointId p0, LatPointId p1, Operation *op = nullptr); /// Conjunctive merge of two lattice sets: `(s0 /\_op s1)`. /// Returns the identifier of the new set. - LatSetId conjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, - Operation *op = nullptr); + LatSetId conjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op = nullptr); /// Disjunctive merge of two lattice sets: `(s0 /\_op s1, s0, s1)`. /// Returns the identifier of the new set. - LatSetId disjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, - Operation *op = nullptr); + LatSetId disjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op = nullptr); + + /// Disjunctive merge of two lattice sets and also set one of the operand to + /// zero: `(s0 /\_op s1 (e0 op e1), s0 (0 op e0), s1 (e1 op 0))`. + /// Returns the identifier of the new set. + LatSetId disjSetWithZero(ExprId e, LatSetId s0, LatSetId s1); /// Disjunctive merge of two lattice sets with custom handling of the /// overlap, left, and right regions. Any region may be left missing /// in the output. Returns the identifier of the new set. - LatSetId combiSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, - Operation *orig, bool includeLeft, TensorExp::Kind ltrans, - Operation *opleft, bool includeRight, - TensorExp::Kind rtrans, Operation *opright); + LatSetId combiSet(ExprId e, LatSetId s0, LatSetId s1, Operation *orig, + bool includeLeft, TensorExp::Kind ltrans, Operation *opleft, + bool includeRight, TensorExp::Kind rtrans, + Operation *opright); /// Maps the unary operator over the lattice set of the operand, i.e. each /// lattice point on an expression E is simply copied over, but with OP E @@ -292,6 +309,12 @@ LatSetId mapSet(TensorExp::Kind kind, LatSetId s, Value v = Value(), Operation *op = nullptr); + /// Maps the binary operator to the same operation but with one of its operand + /// set to zero, i.e. each lattice point on an expression E is simply copied + /// over, but with `OP 0 E` (if lhsZero == true) or `OP E 0` (if lhsZero == + /// false) as new expression. Returns the identifier of the new set. + LatSetId mapBinWithSynZeroSet(ExprId e, LatSetId s, bool lhsZero); + /// Optimizes the iteration lattice points in the given set. This /// method should be called right before code generation to avoid /// generating redundant loops and conditions. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -1129,11 +1129,12 @@ /// Recursively generates tensor expression. static Value genExp(CodegenEnv &env, RewriterBase &rewriter, ExprId e, LoopId ldx) { + if (e == ::mlir::sparse_tensor::detail::kInvalidId) + return Value(); + linalg::GenericOp op = env.op(); Location loc = op.getLoc(); - if (e == ::mlir::sparse_tensor::detail::kInvalidId) - return Value(); const TensorExp &exp = env.exp(e); const auto kind = exp.kind; if (kind == TensorExp::Kind::kTensor) @@ -1146,8 +1147,23 @@ if (kind == TensorExp::Kind::kReduce) env.startCustomReduc(e); // enter custom - Value v0 = genExp(env, rewriter, exp.children.e0, ldx); - Value v1 = genExp(env, rewriter, exp.children.e1, ldx); + Value v0, v1; + // If either lhs/rhs is a synthetic zero, we infer the type for the zero value + // based on the type of the other operand. + if (exp.children.e0 != ::mlir::sparse_tensor::detail::kInvalidId && + env.exp(exp.children.e0).kind == TensorExp::Kind::kSynZero) { + v1 = genExp(env, rewriter, exp.children.e1, ldx); + v0 = constantZero(rewriter, loc, v1.getType()); + } else if (exp.children.e1 != ::mlir::sparse_tensor::detail::kInvalidId && + env.exp(exp.children.e1).kind == TensorExp::Kind::kSynZero) { + v0 = genExp(env, rewriter, exp.children.e0, ldx); + v1 = constantZero(rewriter, loc, v0.getType()); + } + + // If v0/v1 is not a synthetic constant zero, generates the expression. + v0 = !v0 ? genExp(env, rewriter, exp.children.e0, ldx) : v0; + v1 = !v1 ? genExp(env, rewriter, exp.children.e1, ldx) : v1; + Value ee; if (kind == TensorExp::Kind::kReduce && (!v0 || !v1)) { // custom reduce did not receive a value @@ -1223,7 +1239,8 @@ env.merger().clearExprValue(exp); } } else if (env.exp(exp).kind != TensorExp::Kind::kInvariant && - env.exp(exp).kind != TensorExp::Kind::kLoopVar) { + env.exp(exp).kind != TensorExp::Kind::kLoopVar && + env.exp(exp).kind != TensorExp::Kind::kSynZero) { // Traverse into the binary operations. Note that we only hoist // tensor loads, since subsequent MLIR/LLVM passes know how to // deal with all other kinds of derived loop invariants. diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -31,6 +31,7 @@ case TensorExp::Kind::kTensor: case TensorExp::Kind::kInvariant: case TensorExp::Kind::kLoopVar: + case TensorExp::Kind::kSynZero: return ExpArity::kNullary; case TensorExp::Kind::kAbsF: case TensorExp::Kind::kAbsC: @@ -89,6 +90,8 @@ case TensorExp::Kind::kSubF: case TensorExp::Kind::kSubC: case TensorExp::Kind::kSubI: + case TensorExp::Kind::kCmpF: + case TensorExp::Kind::kCmpI: return ExpArity::kBinary; } llvm_unreachable("unexpected kind"); @@ -99,7 +102,7 @@ //===----------------------------------------------------------------------===// TensorExp::TensorExp(TensorExp::Kind k, unsigned x, ExprId y, Value v, - Operation *o) + Operation *o, Attribute a) : kind(k), val(v), op(o) { switch (kind) { // Leaf. @@ -107,6 +110,9 @@ assert(x != detail::kInvalidId && y == detail::kInvalidId && !v && !o); tensor = x; return; + case TensorExp::Kind::kSynZero: + assert(x == detail::kInvalidId && y == detail::kInvalidId && !v && !o); + return; case TensorExp::Kind::kInvariant: assert(x == detail::kInvalidId && y == detail::kInvalidId && v && !o); return; @@ -191,6 +197,13 @@ children.e0 = x; children.e1 = y; return; + case TensorExp::Kind::kCmpF: + case TensorExp::Kind::kCmpI: + assert(x != detail::kInvalidId && y != detail::kInvalidId && !v && !o); + attr = a; + children.e0 = x; + children.e1 = y; + return; case TensorExp::Kind::kBinary: case TensorExp::Kind::kReduce: assert(x != detail::kInvalidId && y != detail::kInvalidId && !v && o); @@ -228,7 +241,7 @@ assert(isValidTensorId(t)); const ExprId eNew(tensorExps.size()); tensorExps.emplace_back(TensorExp::Kind::kTensor, t, detail::kInvalidId, - Value(), nullptr); + Value(), nullptr, nullptr); return eNew; } @@ -236,28 +249,36 @@ assert(isValidLoopId(i)); const ExprId eNew(tensorExps.size()); tensorExps.emplace_back(TensorExp::Kind::kLoopVar, i, detail::kInvalidId, - Value(), nullptr); + Value(), nullptr, nullptr); return eNew; } ExprId Merger::addInvariantExp(Value v) { const ExprId eNew(tensorExps.size()); tensorExps.emplace_back(TensorExp::Kind::kInvariant, detail::kInvalidId, - detail::kInvalidId, v, nullptr); + detail::kInvalidId, v, nullptr, nullptr); + return eNew; +} +ExprId Merger::addSynZeroExp() { + const ExprId eNew(tensorExps.size()); + tensorExps.emplace_back(TensorExp::Kind::kSynZero, detail::kInvalidId, + detail::kInvalidId, Value(), nullptr, nullptr); return eNew; } -ExprId Merger::addExp(TensorExp::Kind k, ExprId e0, ExprId e1, Operation *op) { +ExprId Merger::addExp(TensorExp::Kind k, ExprId e0, ExprId e1, Operation *op, + Attribute attr) { assert(k > TensorExp::Kind::kLoopVar); const ExprId eNew(tensorExps.size()); - tensorExps.emplace_back(k, e0, e1, Value(), op); + tensorExps.emplace_back(k, e0, e1, Value(), op, attr); return eNew; } -ExprId Merger::addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op) { +ExprId Merger::addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op, + Attribute attr) { assert(k > TensorExp::Kind::kLoopVar); const ExprId eNew(tensorExps.size()); - tensorExps.emplace_back(k, e, detail::kInvalidId, v, op); + tensorExps.emplace_back(k, e, detail::kInvalidId, v, op, attr); return eNew; } @@ -283,31 +304,33 @@ return sNew; } -LatPointId Merger::conjLat(TensorExp::Kind kind, LatPointId p0, LatPointId p1, +LatPointId Merger::conjLat(ExprId e, LatPointId p0, LatPointId p1, Operation *op) { + TensorExp::Kind kind = exp(e).kind; + Attribute attr = exp(e).attr; const LatPointId pNew(latPoints.size()); const auto &point0 = lat(p0); const auto &point1 = lat(p1); BitVector bits(point0.bits); bits |= point1.bits; - const ExprId e = addExp(kind, point0.exp, point1.exp, op); - latPoints.emplace_back(bits, e); + const ExprId ne = addExp(kind, point0.exp, point1.exp, op, attr); + latPoints.emplace_back(bits, ne); return pNew; } -LatSetId Merger::conjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, - Operation *op) { +LatSetId Merger::conjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op) { const LatSetId sNew = addSet(); auto &setNew = latSets[sNew]; for (const LatPointId p0 : set(s0)) for (const LatPointId p1 : set(s1)) - setNew.push_back(conjLat(kind, p0, p1, op)); + setNew.push_back(conjLat(e, p0, p1, op)); return sNew; } -LatSetId Merger::disjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, - Operation *op) { - const LatSetId sNew = conjSet(kind, s0, s1, op); +LatSetId Merger::disjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op) { + const LatSetId sNew = conjSet(e, s0, s1, op); + TensorExp::Kind kind = exp(e).kind; + // Followed by all in s0. latSets[sNew].append(latSets[s0]); // Map binary 0-y to unary -y. @@ -318,17 +341,41 @@ s1 = mapSet(TensorExp::Kind::kNegC, s1); else if (kind == TensorExp::Kind::kSubI) s1 = mapSet(TensorExp::Kind::kNegI, s1); + // Followed by all in s1. latSets[sNew].append(latSets[s1]); return sNew; } -LatSetId Merger::combiSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, - Operation *orig, bool includeLeft, - TensorExp::Kind ltrans, Operation *opleft, - bool includeRight, TensorExp::Kind rtrans, - Operation *opright) { - const LatSetId sNew = conjSet(kind, s0, s1, orig); +LatSetId Merger::disjSetWithZero(ExprId e, LatSetId s0, LatSetId s1) { + TensorExp::Kind kind = exp(e).kind; + assert(kind == TensorExp::Kind::kCmpI || kind == TensorExp::Kind::kCmpF); + const LatSetId sNew = conjSet(e, s0, s1, nullptr); + + ExprId e0 = exp(e).children.e0; + ExprId e1 = exp(e).children.e1; + if (exp(e0).kind == TensorExp::Kind::kSynZero || + exp(e1).kind == TensorExp::Kind::kSynZero) { + // lhs and rhs can't be synthetic zero at the same time. + assert(exp(e0).kind != exp(e1).kind); + // If one of the operands has already been assigned to zero (the + // element is absent in the corresponding operand), then we do not + // need to build disjunctive set for it. + return sNew; + } + + auto lhsSet = mapBinWithSynZeroSet(e, s0, false); + auto rhsSet = mapBinWithSynZeroSet(e, s1, true); + latSets[sNew].append(latSets[lhsSet]); + latSets[sNew].append(latSets[rhsSet]); + return sNew; +} + +LatSetId Merger::combiSet(ExprId e, LatSetId s0, LatSetId s1, Operation *orig, + bool includeLeft, TensorExp::Kind ltrans, + Operation *opleft, bool includeRight, + TensorExp::Kind rtrans, Operation *opright) { + const LatSetId sNew = conjSet(e, s0, s1, orig); // Left Region. if (includeLeft) { if (opleft) @@ -356,6 +403,23 @@ return sNew; } +LatSetId Merger::mapBinWithSynZeroSet(ExprId e, LatSetId s0, bool lhsZero) { + TensorExp::Kind kind = exp(e).kind; + Attribute a = exp(e).attr; + assert(TensorExp::Kind::kMulF <= kind && kind <= TensorExp::Kind::kShlI); + // Must be a binary operation. + const LatSetId sNew = addSet(); + auto &setNew = latSets[sNew]; + const ExprId zeroExp = addSynZeroExp(); + for (const LatPointId p : set(s0)) { + const auto &point = latPoints[p]; + ExprId newExp = lhsZero ? addExp(kind, zeroExp, point.exp, nullptr, a) + : addExp(kind, point.exp, zeroExp, nullptr, a); + setNew.push_back(addLat(point.bits, newExp)); + } + return sNew; +} + LatSetId Merger::optimizeSet(LatSetId s0) { const LatSetId sNew = addSet(); auto &setNew = latSets[sNew]; @@ -418,7 +482,8 @@ // Slice on dense level has `locate` property as well, and can be optimized. if (simple[b] && !isSparseLvlWithNonTrivialIdxExp(b)) { const auto dlt = getLvlType(b); - if (!isCompressedDLT(dlt) && !isSingletonDLT(dlt) && !isCompressedWithHiDLT(dlt)) { + if (!isCompressedDLT(dlt) && !isSingletonDLT(dlt) && + !isCompressedWithHiDLT(dlt)) { if (reset) simple.reset(b); reset = true; @@ -505,6 +570,7 @@ return expr.tensor == t; case TensorExp::Kind::kInvariant: case TensorExp::Kind::kLoopVar: + case TensorExp::Kind::kSynZero: return false; // Unary operations. case TensorExp::Kind::kAbsF: @@ -575,6 +641,8 @@ case TensorExp::Kind::kSubI: case TensorExp::Kind::kOrI: case TensorExp::Kind::kXorI: + case TensorExp::Kind::kCmpF: + case TensorExp::Kind::kCmpI: case TensorExp::Kind::kBinary: case TensorExp::Kind::kReduce: return false; @@ -585,7 +653,8 @@ bool Merger::hasAnySparse(const BitVector &bits) const { for (TensorLoopId b : bits.set_bits()) { const auto dlt = getLvlType(b); - if (isCompressedDLT(dlt) || isSingletonDLT(dlt) || isCompressedWithHiDLT(dlt)) + if (isCompressedDLT(dlt) || isSingletonDLT(dlt) || + isCompressedWithHiDLT(dlt)) return true; } return hasSparseIdxReduction(bits); @@ -613,6 +682,8 @@ return "invariant"; case TensorExp::Kind::kLoopVar: return "index"; + case TensorExp::Kind::kSynZero: + return "0"; // Unary operations. case TensorExp::Kind::kAbsF: case TensorExp::Kind::kAbsC: @@ -693,6 +764,9 @@ return ">>"; case TensorExp::Kind::kShlI: return "<<"; + case TensorExp::Kind::kCmpF: + case TensorExp::Kind::kCmpI: + return "cmp"; case TensorExp::Kind::kBinary: return "binary"; case TensorExp::Kind::kReduce: @@ -715,6 +789,9 @@ case TensorExp::Kind::kInvariant: llvm::dbgs() << "invariant"; break; + case TensorExp::Kind::kSynZero: + llvm::dbgs() << "0"; + break; case TensorExp::Kind::kLoopVar: llvm::dbgs() << "loopvar_" << expr.loop; break; @@ -776,11 +853,16 @@ case TensorExp::Kind::kShrS: case TensorExp::Kind::kShrU: case TensorExp::Kind::kShlI: + case TensorExp::Kind::kCmpF: + case TensorExp::Kind::kCmpI: case TensorExp::Kind::kBinary: case TensorExp::Kind::kReduce: llvm::dbgs() << "("; dumpExp(expr.children.e0); - llvm::dbgs() << " " << kindToOpSymbol(expr.kind) << " "; + llvm::dbgs() << " " << kindToOpSymbol(expr.kind); + if (expr.attr) + llvm::dbgs() << "{" << expr.attr << "}"; + llvm::dbgs() << " "; dumpExp(expr.children.e1); llvm::dbgs() << ")"; } @@ -838,6 +920,7 @@ // Leaf. case TensorExp::Kind::kTensor: case TensorExp::Kind::kInvariant: + case TensorExp::Kind::kSynZero: case TensorExp::Kind::kLoopVar: { // Either the loop-var is really used in the tensor expression, or it is // set to the undefined loop-var in that level. An invariant expression, @@ -921,13 +1004,14 @@ if (absentRegion.empty()) { // Simple mapping over existing values. return mapSet(kind, child0, Value(), unop); - } // Use a disjunction with `unop` on the left and the absent value as an + } + // Use a disjunction with `unop` on the left and the absent value as an // invariant on the right. Block &absentBlock = absentRegion.front(); YieldOp absentYield = cast(absentBlock.getTerminator()); const Value absentVal = absentYield.getResult(); const ExprId rhs = addInvariantExp(absentVal); - return disjSet(kind, child0, buildLattices(rhs, i), unop); + return disjSet(e, child0, buildLattices(rhs, i), unop); } // Binary operations. case TensorExp::Kind::kMulF: @@ -946,7 +1030,7 @@ { const ExprId e0 = expr.children.e0; const ExprId e1 = expr.children.e1; - return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i)); + return conjSet(e, buildLattices(e0, i), buildLattices(e1, i)); } case TensorExp::Kind::kDivF: case TensorExp::Kind::kDivC: @@ -969,7 +1053,7 @@ const ExprId e0 = expr.children.e0; const ExprId e1 = expr.children.e1; assert(!maybeZero(e1)); - return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i)); + return conjSet(e, buildLattices(e0, i), buildLattices(e1, i)); } case TensorExp::Kind::kAddF: case TensorExp::Kind::kAddC: @@ -989,7 +1073,21 @@ { const ExprId e0 = expr.children.e0; const ExprId e1 = expr.children.e1; - return disjSet(kind, buildLattices(e0, i), buildLattices(e1, i)); + return disjSet(e, buildLattices(e0, i), buildLattices(e1, i)); + } + case TensorExp::Kind::kCmpF: + case TensorExp::Kind::kCmpI: + // An comparison operation needs to be performed + // for the disjunction of sparse iteration spaces. + // + // x < y| !y | y | + // -----+------+-------+ + // !x | 0 | 0 < y | + // x | x < 0 | x < y | + { + const ExprId e0 = expr.children.e0; + const ExprId e1 = expr.children.e1; + return disjSetWithZero(e, buildLattices(e0, i), buildLattices(e1, i)); } case TensorExp::Kind::kShrS: case TensorExp::Kind::kShrU: @@ -1001,7 +1099,7 @@ const ExprId e0 = expr.children.e0; const ExprId e1 = expr.children.e1; assert(isInvariant(e1)); - return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i)); + return conjSet(e, buildLattices(e0, i), buildLattices(e1, i)); } case TensorExp::Kind::kBinary: // A custom binary operation. @@ -1032,9 +1130,9 @@ } bool includeLeft = binop.getLeftIdentity() || !leftRegion.empty(); bool includeRight = binop.getRightIdentity() || !rightRegion.empty(); - return combiSet(TensorExp::Kind::kBinary, child0, child1, binop, - includeLeft, TensorExp::Kind::kBinaryBranch, leftYield, - includeRight, TensorExp::Kind::kBinaryBranch, rightYield); + return combiSet(e, child0, child1, binop, includeLeft, + TensorExp::Kind::kBinaryBranch, leftYield, includeRight, + TensorExp::Kind::kBinaryBranch, rightYield); } case TensorExp::Kind::kReduce: // A custom reduce operation. @@ -1042,7 +1140,7 @@ const ExprId e0 = expr.children.e0; const ExprId e1 = expr.children.e1; Operation *const op = expr.op; - return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i), op); + return conjSet(e, buildLattices(e0, i), buildLattices(e1, i), op); } } llvm_unreachable("unexpected expression kind"); @@ -1260,6 +1358,37 @@ return addExp(TensorExp::Kind::kShrU, e0, e1); if (isa(def) && isInvariant(e1)) return addExp(TensorExp::Kind::kShlI, e0, e1); + if (auto ci = dyn_cast(def)) { + if (ci.getPredicate() == arith::CmpIPredicate::eq && + ci.getPredicate() == arith::CmpIPredicate::sle && + ci.getPredicate() == arith::CmpIPredicate::sge && + ci.getPredicate() == arith::CmpIPredicate::ule && + ci.getPredicate() == arith::CmpIPredicate::uge) { + // We can not sparsify comparison with equal, this is because 0 <= 0 + // yields true, and thus densifies the result. + return std::nullopt; + } + + return addExp(TensorExp::Kind::kCmpI, e0, e1, nullptr, + ci.getPredicateAttr()); + } + if (auto cf = dyn_cast(def)) { + if (cf.getPredicate() == arith::CmpFPredicate::OEQ && + cf.getPredicate() == arith::CmpFPredicate::OGE && + cf.getPredicate() == arith::CmpFPredicate::OLE && + cf.getPredicate() == arith::CmpFPredicate::ONE && + cf.getPredicate() == arith::CmpFPredicate::UEQ && + cf.getPredicate() == arith::CmpFPredicate::UGE && + cf.getPredicate() == arith::CmpFPredicate::ULE && + cf.getPredicate() == arith::CmpFPredicate::ORD && + cf.getPredicate() == arith::CmpFPredicate::UNO) { + // We can not sparsify comparison with equal, this is because 0 <= 0 + // yields true, and thus densifies the result. + return std::nullopt; + } + return addExp(TensorExp::Kind::kCmpF, e0, e1, nullptr, + cf.getPredicateAttr()); + } if (auto binop = dyn_cast(def)) { if (isAdmissibleBranch(binop, binop.getOverlapRegion()) && (binop.getLeftIdentity() || @@ -1341,6 +1470,7 @@ case TensorExp::Kind::kTensor: case TensorExp::Kind::kInvariant: case TensorExp::Kind::kLoopVar: + case TensorExp::Kind::kSynZero: llvm_unreachable("unexpected non-op"); // Unary operations. case TensorExp::Kind::kAbsF: @@ -1457,6 +1587,14 @@ return rewriter.create(loc, v0, v1); case TensorExp::Kind::kShlI: return rewriter.create(loc, v0, v1); + case TensorExp::Kind::kCmpI: { + auto predicate = llvm::cast(expr.attr); + return rewriter.create(loc, predicate, v0, v1); + } + case TensorExp::Kind::kCmpF: { + auto predicate = llvm::cast(expr.attr); + return rewriter.create(loc, predicate, v0, v1); + } case TensorExp::Kind::kBinaryBranch: // semi-ring ops with custom logic. return insertYieldOp(rewriter, loc, *expr.op->getBlock()->getParent(), {v0}); diff --git a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir @@ -52,6 +52,43 @@ return %0 : tensor<32x16xf32> } +// CHECK-LABEL: func.func @cmp_dd( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "dense" ] }>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>, +// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> { +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "dense" ] }>> to memref +// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1> +// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_10]] : memref<32x16xi1>) +// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { +// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { +// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index +// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[VAL_12]] : index +// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref +// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32> +// CHECK: %[[VAL_17:.*]] = arith.cmpf ult, %[[VAL_15]], %[[VAL_16]] : f32 +// CHECK: memref.store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_18:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32x16xi1> +// CHECK: return %[[VAL_18]] : tensor<32x16xi1> +// CHECK: } +func.func @cmp_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> { + %0 = linalg.generic #trait2 + ins(%arga, %argb: tensor<32x16xf32, #Tdd>, tensor<32x16xf32>) + outs(%argx: tensor<32x16xi1>) { + ^bb(%a: f32, %b: f32, %x: i1): + %0 = arith.cmpf ult, %a, %b : f32 + linalg.yield %0 : i1 + } -> tensor<32x16xi1> + return %0 : tensor<32x16xi1> +} + // CHECK-LABEL: func @mul_dd( // CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "dense" ] }>>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>, @@ -151,6 +188,73 @@ return %0 : tensor<32x16xf32> } +// CHECK-LABEL: func.func @cmp_ds( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>, +// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> { +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_8:.*]] = arith.constant true +// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> +// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1> +// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>) +// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { +// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_15]]] : memref +// CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_7]] : index +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_17]]] : memref +// CHECK: %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_16]], %[[VAL_21:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) { +// CHECK: %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_18]] : index +// CHECK: scf.condition(%[[VAL_22]]) %[[VAL_20]], %[[VAL_21]] : index, index +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index): +// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref +// CHECK: %[[VAL_26:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index +// CHECK: scf.if %[[VAL_26]] { +// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_23]]] : memref +// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xf32> +// CHECK: %[[VAL_29:.*]] = arith.cmpf ult, %[[VAL_27]], %[[VAL_28]] : f32 +// CHECK: memref.store %[[VAL_29]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xi1> +// CHECK: } else { +// CHECK: scf.if %[[VAL_8]] { +// CHECK: %[[VAL_30:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xf32> +// CHECK: %[[VAL_31:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_30]] : f32 +// CHECK: memref.store %[[VAL_31]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xi1> +// CHECK: } else { +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_32:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index +// CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_23]], %[[VAL_7]] : index +// CHECK: %[[VAL_34:.*]] = arith.select %[[VAL_32]], %[[VAL_33]], %[[VAL_23]] : index +// CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_24]], %[[VAL_7]] : index +// CHECK: scf.yield %[[VAL_34]], %[[VAL_35]] : index, index +// CHECK: } attributes +// CHECK: scf.for %[[VAL_36:.*]] = %[[VAL_37:.*]]#1 to %[[VAL_4]] step %[[VAL_7]] { +// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_15]], %[[VAL_36]]] : memref<32x16xf32> +// CHECK: %[[VAL_39:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_38]] : f32 +// CHECK: memref.store %[[VAL_39]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_36]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_40:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16xi1> +// CHECK: return %[[VAL_40]] : tensor<32x16xi1> +// CHECK: } +func.func @cmp_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> { + %0 = linalg.generic #trait2 + ins(%arga, %argb: tensor<32x16xf32, #Tds>, tensor<32x16xf32>) + outs(%argx: tensor<32x16xi1>) { + ^bb(%a: f32, %b: f32, %x: i1): + %0 = arith.cmpf ult, %a, %b : f32 + linalg.yield %0 : i1 + } -> tensor<32x16xi1> + return %0 : tensor<32x16xi1> +} + // CHECK-LABEL: func @mul_ds( // CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>, @@ -258,6 +362,78 @@ return %0 : tensor<32x16xf32> } +// CHECK-LABEL: func.func @cmp_sd( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>, +// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> { +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 16 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 32 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_8:.*]] = arith.constant true +// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>> to memref +// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>> to memref +// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>> to memref +// CHECK: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> +// CHECK: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1> +// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>) +// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref +// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_7]]] : memref +// CHECK: %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_15]], %[[VAL_19:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) { +// CHECK: %[[VAL_20:.*]] = arith.cmpi ult, %[[VAL_18]], %[[VAL_16]] : index +// CHECK: scf.condition(%[[VAL_20]]) %[[VAL_18]], %[[VAL_19]] : index, index +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_21:.*]]: index, %[[VAL_22:.*]]: index): +// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_21]]] : memref +// CHECK: %[[VAL_24:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_22]] : index +// CHECK: scf.if %[[VAL_24]] { +// CHECK: scf.for %[[VAL_25:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { +// CHECK: %[[VAL_26:.*]] = arith.muli %[[VAL_21]], %[[VAL_3]] : index +// CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_26]], %[[VAL_25]] : index +// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_27]]] : memref +// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_22]], %[[VAL_25]]] : memref<32x16xf32> +// CHECK: %[[VAL_30:.*]] = arith.cmpf ult, %[[VAL_28]], %[[VAL_29]] : f32 +// CHECK: memref.store %[[VAL_30]], %[[VAL_14]]{{\[}}%[[VAL_22]], %[[VAL_25]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } else { +// CHECK: scf.if %[[VAL_8]] { +// CHECK: scf.for %[[VAL_31:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { +// CHECK: %[[VAL_32:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_22]], %[[VAL_31]]] : memref<32x16xf32> +// CHECK: %[[VAL_33:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_32]] : f32 +// CHECK: memref.store %[[VAL_33]], %[[VAL_14]]{{\[}}%[[VAL_22]], %[[VAL_31]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } else { +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_34:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_22]] : index +// CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_21]], %[[VAL_7]] : index +// CHECK: %[[VAL_36:.*]] = arith.select %[[VAL_34]], %[[VAL_35]], %[[VAL_21]] : index +// CHECK: %[[VAL_37:.*]] = arith.addi %[[VAL_22]], %[[VAL_7]] : index +// CHECK: scf.yield %[[VAL_36]], %[[VAL_37]] : index, index +// CHECK: } attributes +// CHECK: scf.for %[[VAL_38:.*]] = %[[VAL_39:.*]]#1 to %[[VAL_4]] step %[[VAL_7]] { +// CHECK: scf.for %[[VAL_40:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { +// CHECK: %[[VAL_41:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_38]], %[[VAL_40]]] : memref<32x16xf32> +// CHECK: %[[VAL_42:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_41]] : f32 +// CHECK: memref.store %[[VAL_42]], %[[VAL_14]]{{\[}}%[[VAL_38]], %[[VAL_40]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_43:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16xi1> +// CHECK: return %[[VAL_43]] : tensor<32x16xi1> +// CHECK: } +func.func @cmp_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> { + %0 = linalg.generic #trait2 + ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32>) + outs(%argx: tensor<32x16xi1>) { + ^bb(%a: f32, %b: f32, %x: i1): + %0 = arith.cmpf ult, %a, %b : f32 + linalg.yield %0 : i1 + } -> tensor<32x16xi1> + return %0 : tensor<32x16xi1> +} + // CHECK-LABEL: func @mul_sd( // CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>, @@ -392,6 +568,106 @@ return %0 : tensor<32x16xf32> } +// CHECK-LABEL: func.func @cmp_ss( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>, +// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> { +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_8:.*]] = arith.constant true +// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> +// CHECK: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1> +// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_16]] : memref<32x16xi1>) +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_7]]] : memref +// CHECK: %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_17]], %[[VAL_21:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) { +// CHECK: %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_18]] : index +// CHECK: scf.condition(%[[VAL_22]]) %[[VAL_20]], %[[VAL_21]] : index, index +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index): +// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref +// CHECK: %[[VAL_26:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index +// CHECK: scf.if %[[VAL_26]] { +// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_23]]] : memref +// CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_23]], %[[VAL_7]] : index +// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref +// CHECK: %[[VAL_30:.*]]:2 = scf.while (%[[VAL_31:.*]] = %[[VAL_27]], %[[VAL_32:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) { +// CHECK: %[[VAL_33:.*]] = arith.cmpi ult, %[[VAL_31]], %[[VAL_29]] : index +// CHECK: scf.condition(%[[VAL_33]]) %[[VAL_31]], %[[VAL_32]] : index, index +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_34:.*]]: index, %[[VAL_35:.*]]: index): +// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_34]]] : memref +// CHECK: %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_35]] : index +// CHECK: scf.if %[[VAL_37]] { +// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref +// CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xf32> +// CHECK: %[[VAL_40:.*]] = arith.cmpf ult, %[[VAL_38]], %[[VAL_39]] : f32 +// CHECK: memref.store %[[VAL_40]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xi1> +// CHECK: } else { +// CHECK: scf.if %[[VAL_8]] { +// CHECK: %[[VAL_41:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xf32> +// CHECK: %[[VAL_42:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_41]] : f32 +// CHECK: memref.store %[[VAL_42]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xi1> +// CHECK: } else { +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_43:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_35]] : index +// CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_34]], %[[VAL_7]] : index +// CHECK: %[[VAL_45:.*]] = arith.select %[[VAL_43]], %[[VAL_44]], %[[VAL_34]] : index +// CHECK: %[[VAL_46:.*]] = arith.addi %[[VAL_35]], %[[VAL_7]] : index +// CHECK: scf.yield %[[VAL_45]], %[[VAL_46]] : index, index +// CHECK: } attributes +// CHECK: scf.for %[[VAL_47:.*]] = %[[VAL_48:.*]]#1 to %[[VAL_4]] step %[[VAL_7]] { +// CHECK: %[[VAL_49:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_47]]] : memref<32x16xf32> +// CHECK: %[[VAL_50:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_49]] : f32 +// CHECK: memref.store %[[VAL_50]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_47]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } else { +// CHECK: scf.if %[[VAL_8]] { +// CHECK: scf.for %[[VAL_51:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { +// CHECK: %[[VAL_52:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_51]]] : memref<32x16xf32> +// CHECK: %[[VAL_53:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_52]] : f32 +// CHECK: memref.store %[[VAL_53]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_51]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } else { +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_54:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index +// CHECK: %[[VAL_55:.*]] = arith.addi %[[VAL_23]], %[[VAL_7]] : index +// CHECK: %[[VAL_56:.*]] = arith.select %[[VAL_54]], %[[VAL_55]], %[[VAL_23]] : index +// CHECK: %[[VAL_57:.*]] = arith.addi %[[VAL_24]], %[[VAL_7]] : index +// CHECK: scf.yield %[[VAL_56]], %[[VAL_57]] : index, index +// CHECK: } attributes +// CHECK: scf.for %[[VAL_58:.*]] = %[[VAL_59:.*]]#1 to %[[VAL_3]] step %[[VAL_7]] { +// CHECK: scf.for %[[VAL_60:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { +// CHECK: %[[VAL_61:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_58]], %[[VAL_60]]] : memref<32x16xf32> +// CHECK: %[[VAL_62:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_61]] : f32 +// CHECK: memref.store %[[VAL_62]], %[[VAL_16]]{{\[}}%[[VAL_58]], %[[VAL_60]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_63:.*]] = bufferization.to_tensor %[[VAL_16]] : memref<32x16xi1> +// CHECK: return %[[VAL_63]] : tensor<32x16xi1> +// CHECK: } +func.func @cmp_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> { + %0 = linalg.generic #trait2 + ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32>) + outs(%argx: tensor<32x16xi1>) { + ^bb(%a: f32, %b: f32, %x: i1): + %0 = arith.cmpf ult, %a, %b : f32 + linalg.yield %0 : i1 + } -> tensor<32x16xi1> + return %0 : tensor<32x16xi1> +} + // CHECK-LABEL: func @mul_ss( // CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>, @@ -599,6 +875,180 @@ return %0 : tensor<32x16xf32> } +// CHECK-LABEL: func.func @cmp_ss_ss( +// CHECK-SAME: %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>, +// CHECK-SAME: %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>, +// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> { +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref +// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1> +// CHECK: linalg.fill ins(%[[VAL_3]] : i1) outs(%[[VAL_17]] : memref<32x16xi1>) +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref +// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref +// CHECK: %[[VAL_22:.*]]:2 = scf.while (%[[VAL_23:.*]] = %[[VAL_18]], %[[VAL_24:.*]] = %[[VAL_20]]) : (index, index) -> (index, index) { +// CHECK: %[[VAL_25:.*]] = arith.cmpi ult, %[[VAL_23]], %[[VAL_19]] : index +// CHECK: %[[VAL_26:.*]] = arith.cmpi ult, %[[VAL_24]], %[[VAL_21]] : index +// CHECK: %[[VAL_27:.*]] = arith.andi %[[VAL_25]], %[[VAL_26]] : i1 +// CHECK: scf.condition(%[[VAL_27]]) %[[VAL_23]], %[[VAL_24]] : index, index +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_28:.*]]: index, %[[VAL_29:.*]]: index): +// CHECK: %[[VAL_30:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_28]]] : memref +// CHECK: %[[VAL_31:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref +// CHECK: %[[VAL_32:.*]] = arith.cmpi ult, %[[VAL_31]], %[[VAL_30]] : index +// CHECK: %[[VAL_33:.*]] = arith.select %[[VAL_32]], %[[VAL_31]], %[[VAL_30]] : index +// CHECK: %[[VAL_34:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_33]] : index +// CHECK: %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_33]] : index +// CHECK: %[[VAL_36:.*]] = arith.andi %[[VAL_34]], %[[VAL_35]] : i1 +// CHECK: scf.if %[[VAL_36]] { +// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_28]]] : memref +// CHECK: %[[VAL_38:.*]] = arith.addi %[[VAL_28]], %[[VAL_5]] : index +// CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_38]]] : memref +// CHECK: %[[VAL_40:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_29]]] : memref +// CHECK: %[[VAL_41:.*]] = arith.addi %[[VAL_29]], %[[VAL_5]] : index +// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_41]]] : memref +// CHECK: %[[VAL_43:.*]]:2 = scf.while (%[[VAL_44:.*]] = %[[VAL_37]], %[[VAL_45:.*]] = %[[VAL_40]]) : (index, index) -> (index, index) { +// CHECK: %[[VAL_46:.*]] = arith.cmpi ult, %[[VAL_44]], %[[VAL_39]] : index +// CHECK: %[[VAL_47:.*]] = arith.cmpi ult, %[[VAL_45]], %[[VAL_42]] : index +// CHECK: %[[VAL_48:.*]] = arith.andi %[[VAL_46]], %[[VAL_47]] : i1 +// CHECK: scf.condition(%[[VAL_48]]) %[[VAL_44]], %[[VAL_45]] : index, index +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_49:.*]]: index, %[[VAL_50:.*]]: index): +// CHECK: %[[VAL_51:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_49]]] : memref +// CHECK: %[[VAL_52:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_50]]] : memref +// CHECK: %[[VAL_53:.*]] = arith.cmpi ult, %[[VAL_52]], %[[VAL_51]] : index +// CHECK: %[[VAL_54:.*]] = arith.select %[[VAL_53]], %[[VAL_52]], %[[VAL_51]] : index +// CHECK: %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_51]], %[[VAL_54]] : index +// CHECK: %[[VAL_56:.*]] = arith.cmpi eq, %[[VAL_52]], %[[VAL_54]] : index +// CHECK: %[[VAL_57:.*]] = arith.andi %[[VAL_55]], %[[VAL_56]] : i1 +// CHECK: scf.if %[[VAL_57]] { +// CHECK: %[[VAL_58:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_49]]] : memref +// CHECK: %[[VAL_59:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_50]]] : memref +// CHECK: %[[VAL_60:.*]] = arith.cmpf ult, %[[VAL_58]], %[[VAL_59]] : f32 +// CHECK: memref.store %[[VAL_60]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_54]]] : memref<32x16xi1> +// CHECK: } else { +// CHECK: %[[VAL_61:.*]] = arith.cmpi eq, %[[VAL_51]], %[[VAL_54]] : index +// CHECK: scf.if %[[VAL_61]] { +// CHECK: %[[VAL_62:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_49]]] : memref +// CHECK: %[[VAL_63:.*]] = arith.cmpf ult, %[[VAL_62]], %[[VAL_6]] : f32 +// CHECK: memref.store %[[VAL_63]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_54]]] : memref<32x16xi1> +// CHECK: } else { +// CHECK: %[[VAL_64:.*]] = arith.cmpi eq, %[[VAL_52]], %[[VAL_54]] : index +// CHECK: scf.if %[[VAL_64]] { +// CHECK: %[[VAL_65:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_50]]] : memref +// CHECK: %[[VAL_66:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_65]] : f32 +// CHECK: memref.store %[[VAL_66]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_54]]] : memref<32x16xi1> +// CHECK: } else { +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_67:.*]] = arith.cmpi eq, %[[VAL_51]], %[[VAL_54]] : index +// CHECK: %[[VAL_68:.*]] = arith.addi %[[VAL_49]], %[[VAL_5]] : index +// CHECK: %[[VAL_69:.*]] = arith.select %[[VAL_67]], %[[VAL_68]], %[[VAL_49]] : index +// CHECK: %[[VAL_70:.*]] = arith.cmpi eq, %[[VAL_52]], %[[VAL_54]] : index +// CHECK: %[[VAL_71:.*]] = arith.addi %[[VAL_50]], %[[VAL_5]] : index +// CHECK: %[[VAL_72:.*]] = arith.select %[[VAL_70]], %[[VAL_71]], %[[VAL_50]] : index +// CHECK: scf.yield %[[VAL_69]], %[[VAL_72]] : index, index +// CHECK: } attributes +// CHECK: scf.for %[[VAL_73:.*]] = %[[VAL_74:.*]]#0 to %[[VAL_39]] step %[[VAL_5]] { +// CHECK: %[[VAL_75:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_73]]] : memref +// CHECK: %[[VAL_76:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_73]]] : memref +// CHECK: %[[VAL_77:.*]] = arith.cmpf ult, %[[VAL_76]], %[[VAL_6]] : f32 +// CHECK: memref.store %[[VAL_77]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_75]]] : memref<32x16xi1> +// CHECK: } +// CHECK: scf.for %[[VAL_78:.*]] = %[[VAL_79:.*]]#1 to %[[VAL_42]] step %[[VAL_5]] { +// CHECK: %[[VAL_80:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_78]]] : memref +// CHECK: %[[VAL_81:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_78]]] : memref +// CHECK: %[[VAL_82:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_81]] : f32 +// CHECK: memref.store %[[VAL_82]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_80]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } else { +// CHECK: %[[VAL_83:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_33]] : index +// CHECK: scf.if %[[VAL_83]] { +// CHECK: %[[VAL_84:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_28]]] : memref +// CHECK: %[[VAL_85:.*]] = arith.addi %[[VAL_28]], %[[VAL_5]] : index +// CHECK: %[[VAL_86:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_85]]] : memref +// CHECK: scf.for %[[VAL_87:.*]] = %[[VAL_84]] to %[[VAL_86]] step %[[VAL_5]] { +// CHECK: %[[VAL_88:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_87]]] : memref +// CHECK: %[[VAL_89:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_87]]] : memref +// CHECK: %[[VAL_90:.*]] = arith.cmpf ult, %[[VAL_89]], %[[VAL_6]] : f32 +// CHECK: memref.store %[[VAL_90]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_88]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } else { +// CHECK: %[[VAL_91:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_33]] : index +// CHECK: scf.if %[[VAL_91]] { +// CHECK: %[[VAL_92:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_29]]] : memref +// CHECK: %[[VAL_93:.*]] = arith.addi %[[VAL_29]], %[[VAL_5]] : index +// CHECK: %[[VAL_94:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_93]]] : memref +// CHECK: scf.for %[[VAL_95:.*]] = %[[VAL_92]] to %[[VAL_94]] step %[[VAL_5]] { +// CHECK: %[[VAL_96:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_95]]] : memref +// CHECK: %[[VAL_97:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_95]]] : memref +// CHECK: %[[VAL_98:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_97]] : f32 +// CHECK: memref.store %[[VAL_98]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_96]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } else { +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_99:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_33]] : index +// CHECK: %[[VAL_100:.*]] = arith.addi %[[VAL_28]], %[[VAL_5]] : index +// CHECK: %[[VAL_101:.*]] = arith.select %[[VAL_99]], %[[VAL_100]], %[[VAL_28]] : index +// CHECK: %[[VAL_102:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_33]] : index +// CHECK: %[[VAL_103:.*]] = arith.addi %[[VAL_29]], %[[VAL_5]] : index +// CHECK: %[[VAL_104:.*]] = arith.select %[[VAL_102]], %[[VAL_103]], %[[VAL_29]] : index +// CHECK: scf.yield %[[VAL_101]], %[[VAL_104]] : index, index +// CHECK: } attributes +// CHECK: scf.for %[[VAL_105:.*]] = %[[VAL_106:.*]]#0 to %[[VAL_19]] step %[[VAL_5]] { +// CHECK: %[[VAL_107:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_105]]] : memref +// CHECK: %[[VAL_108:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_105]]] : memref +// CHECK: %[[VAL_109:.*]] = arith.addi %[[VAL_105]], %[[VAL_5]] : index +// CHECK: %[[VAL_110:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_109]]] : memref +// CHECK: scf.for %[[VAL_111:.*]] = %[[VAL_108]] to %[[VAL_110]] step %[[VAL_5]] { +// CHECK: %[[VAL_112:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_111]]] : memref +// CHECK: %[[VAL_113:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_111]]] : memref +// CHECK: %[[VAL_114:.*]] = arith.cmpf ult, %[[VAL_113]], %[[VAL_6]] : f32 +// CHECK: memref.store %[[VAL_114]], %[[VAL_17]]{{\[}}%[[VAL_107]], %[[VAL_112]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } +// CHECK: scf.for %[[VAL_115:.*]] = %[[VAL_116:.*]]#1 to %[[VAL_21]] step %[[VAL_5]] { +// CHECK: %[[VAL_117:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_115]]] : memref +// CHECK: %[[VAL_118:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_115]]] : memref +// CHECK: %[[VAL_119:.*]] = arith.addi %[[VAL_115]], %[[VAL_5]] : index +// CHECK: %[[VAL_120:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_119]]] : memref +// CHECK: scf.for %[[VAL_121:.*]] = %[[VAL_118]] to %[[VAL_120]] step %[[VAL_5]] { +// CHECK: %[[VAL_122:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_121]]] : memref +// CHECK: %[[VAL_123:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_121]]] : memref +// CHECK: %[[VAL_124:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_123]] : f32 +// CHECK: memref.store %[[VAL_124]], %[[VAL_17]]{{\[}}%[[VAL_117]], %[[VAL_122]]] : memref<32x16xi1> +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_125:.*]] = bufferization.to_tensor %[[VAL_17]] : memref<32x16xi1> +// CHECK: return %[[VAL_125]] : tensor<32x16xi1> +// CHECK: } +func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> { + %0 = linalg.generic #trait2 + ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32, #Tss>) + outs(%argx: tensor<32x16xi1>) { + ^bb(%a: f32, %b: f32, %x: i1): + %0 = arith.cmpf ult, %a, %b : f32 + linalg.yield %0 : i1 + } -> tensor<32x16xi1> + return %0 : tensor<32x16xi1> +} + #BatchedVector = #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed-hi" ], }> @@ -671,22 +1121,22 @@ // CHECK: %[[VAL_60:.*]] = arith.addi %[[VAL_31]], %[[VAL_4]] : index // CHECK: %[[VAL_61:.*]] = arith.select %[[VAL_59]], %[[VAL_60]], %[[VAL_31]] : index // CHECK: scf.yield %[[VAL_58]], %[[VAL_61]], %[[VAL_62:.*]] : index, index, tensor<2x3xf64, #{{.*}}>> -// CHECK: } attributes {"Emitted from" = "linalg.generic"} +// CHECK: } attributes // CHECK: %[[VAL_63:.*]] = scf.for %[[VAL_64:.*]] = %[[VAL_65:.*]]#0 to %[[VAL_18]] step %[[VAL_4]] iter_args(%[[VAL_66:.*]] = %[[VAL_65]]#2) // CHECK: %[[VAL_67:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_64]]] : memref // CHECK: %[[VAL_68:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_64]]] : memref // CHECK: %[[VAL_69:.*]] = sparse_tensor.insert %[[VAL_68]] into %[[VAL_66]]{{\[}}%[[VAL_13]], %[[VAL_67]]] : tensor<2x3xf64, #{{.*}}>> // CHECK: scf.yield %[[VAL_69]] : tensor<2x3xf64, #{{.*}}>> -// CHECK: } {"Emitted from" = "linalg.generic"} +// CHECK: } // CHECK: %[[VAL_70:.*]] = scf.for %[[VAL_71:.*]] = %[[VAL_72:.*]]#1 to %[[VAL_22]] step %[[VAL_4]] iter_args(%[[VAL_73:.*]] = %[[VAL_74:.*]]) // CHECK: %[[VAL_75:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_71]]] : memref // CHECK: %[[VAL_76:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_71]]] : memref // CHECK: %[[VAL_77:.*]] = arith.negf %[[VAL_76]] : f64 // CHECK: %[[VAL_78:.*]] = sparse_tensor.insert %[[VAL_77]] into %[[VAL_73]]{{\[}}%[[VAL_13]], %[[VAL_75]]] : tensor<2x3xf64, #{{.*}}>> // CHECK: scf.yield %[[VAL_78]] : tensor<2x3xf64, #{{.*}}>> -// CHECK: } {"Emitted from" = "linalg.generic"} +// CHECK: } // CHECK: scf.yield %[[VAL_79:.*]] : tensor<2x3xf64, #{{.*}}>> -// CHECK: } {"Emitted from" = "linalg.generic"} +// CHECK: } // CHECK: %[[VAL_80:.*]] = sparse_tensor.load %[[VAL_81:.*]] hasInserts : tensor<2x3xf64, #{{.*}}>> // CHECK: return %[[VAL_80]] : tensor<2x3xf64, #{{.*}}>> // CHECK: } @@ -1140,9 +1590,9 @@ // CHECK: %[[VAL_30:.*]] = arith.mulf %[[VAL_27]], %[[VAL_29]] : f32 // CHECK: %[[VAL_31:.*]] = arith.addf %[[VAL_26]], %[[VAL_30]] : f32 // CHECK: memref.store %[[VAL_31]], %[[VAL_14]]{{\[}}%[[VAL_18]], %[[VAL_25]]] : memref -// CHECK: } {"Emitted from" = "linalg.generic"} -// CHECK: } {"Emitted from" = "linalg.generic"} -// CHECK: } {"Emitted from" = "linalg.generic"} +// CHECK: } +// CHECK: } +// CHECK: } // CHECK: %[[VAL_32:.*]] = bufferization.to_tensor %[[VAL_14]] : memref // CHECK: return %[[VAL_32]] : tensor // CHECK: } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cmp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cmp.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cmp.mlir @@ -0,0 +1,149 @@ +// DEFINE: %{option} = "enable-runtime-library=false" +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option} +// DEFINE: %{run} = mlir-cpu-runner \ +// DEFINE: -e entry -entry-point-result=void \ +// DEFINE: -shared-libs=%mlir_c_runner_utils | \ +// DEFINE: FileCheck %s +// +// RUN: %{compile} | %{run} +// +// Do the same run, but now with direct IR generation and vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true" +// RUN: %{compile} | %{run} + +// Do the same run, but now with direct IR generation and, if available, VLA +// vectorization. +// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA" +// REDEFINE: %{run} = %lli_host_or_aarch64_cmd \ +// REDEFINE: --entry-function=entry_lli \ +// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \ +// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \ +// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// REDEFINE: FileCheck %s +// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run} + +!Filename = !llvm.ptr + +#DCSR = #sparse_tensor.encoding<{ + lvlTypes = [ "compressed", "compressed" ] +}> + +#trait = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)>, // A + affine_map<(i,j) -> (i,j)>, // B + affine_map<(i,j) -> (i,j)> // x (out) + ], + iterator_types = ["parallel", "parallel"], + doc = "X(i, j) = cmp A(i,j) B(i, j)" +} + +// +// Integration test that lowers a kernel annotated as sparse to +// actual sparse code, initializes a matching sparse storage scheme +// from file, and runs the resulting code with the JIT compiler. +// +module { + func.func @cmp_all_dense(%arga: tensor<4x4xf64>, + %argb: tensor<4x4xf64>, + %argx: tensor<4x4xi8>) -> tensor<4x4xi8> { + %0 = linalg.generic #trait + ins(%arga, %argb: tensor<4x4xf64>, tensor<4x4xf64>) + outs(%argx: tensor<4x4xi8>) { + ^bb(%a: f64, %b: f64, %x: i8): + %0 = arith.cmpf ult, %a, %b : f64 + %1 = arith.extui %0 : i1 to i8 + linalg.yield %1 : i8 + } -> tensor<4x4xi8> + return %0 : tensor<4x4xi8> + } + + func.func @cmp_lhs_sparse(%arga: tensor<4x4xf64, #DCSR>, + %argb: tensor<4x4xf64>) -> tensor<4x4xi8, #DCSR> { + %argx = bufferization.alloc_tensor() : tensor<4x4xi8, #DCSR> + %0 = linalg.generic #trait + ins(%arga, %argb: tensor<4x4xf64, #DCSR>, tensor<4x4xf64>) + outs(%argx: tensor<4x4xi8, #DCSR>) { + ^bb(%a: f64, %b: f64, %x: i8): + %0 = arith.cmpf ult, %a, %b : f64 + %1 = arith.extui %0 : i1 to i8 + linalg.yield %1 : i8 + } -> tensor<4x4xi8, #DCSR> + return %0 : tensor<4x4xi8, #DCSR> + } + + func.func @cmp_all_sparse(%arga: tensor<4x4xf64, #DCSR>, + %argb: tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR> { + %argx = bufferization.alloc_tensor() : tensor<4x4xi8, #DCSR> + %0 = linalg.generic #trait + ins(%arga, %argb: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) + outs(%argx: tensor<4x4xi8, #DCSR>) { + ^bb(%a: f64, %b: f64, %x: i8): + %0 = arith.cmpf ult, %a, %b : f64 + %1 = arith.extui %0 : i1 to i8 + linalg.yield %1 : i8 + } -> tensor<4x4xi8, #DCSR> + return %0 : tensor<4x4xi8, #DCSR> + } + + func.func private @getTensorFilename(index) -> (!Filename) + + // + // Main driver that reads matrix from file and calls the sparse kernel. + // + func.func @entry() { + %d0 = arith.constant 0 : i8 + %c0 = arith.constant 0 : index + + %lhs_dn = arith.constant dense< + [ [ 0.0, 0.0, 1.5, 1.0], + [ 0.0, 3.5, 0.0, 0.0], + [ 1.0, 5.0, 2.0, 0.0], + [ 1.0, 0.5, 0.0, 0.0] ]> : tensor<4x4xf64> + + %rhs_dn = arith.constant dense< + [ [ 0.0, 1.5, 1.0, 1.5], + [ 3.5, 0.0, 0.0, 0.0], + [ 5.0, 2.0, 0.0, 2.0], + [ 0.5, 0.0, 0.0, 0.0] ]> : tensor<4x4xf64> + + %lhs_sp = sparse_tensor.convert %lhs_dn : tensor<4x4xf64> to tensor<4x4xf64, #DCSR> + %rhs_sp = sparse_tensor.convert %rhs_dn : tensor<4x4xf64> to tensor<4x4xf64, #DCSR> + + %output = arith.constant dense<0> : tensor<4x4xi8> + %all_dn_out = call @cmp_all_dense(%lhs_dn, %rhs_dn, %output) + : (tensor<4x4xf64>, tensor<4x4xf64>, tensor<4x4xi8>) -> tensor<4x4xi8> + %lhs_sp_out = call @cmp_lhs_sparse(%lhs_sp, %rhs_dn) + : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64>) -> tensor<4x4xi8, #DCSR> + %all_sp_out = call @cmp_all_sparse(%lhs_sp, %rhs_sp) + : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR> + + // + // All should have the same result + // + // CHECK-COUNT-3: ( ( 0, 1, 0, 1 ), ( 1, 0, 0, 0 ), ( 1, 0, 0, 1 ), ( 0, 0, 0, 0 ) ) + %v = vector.transfer_read %all_dn_out[%c0, %c0], %d0 + : tensor<4x4xi8>, vector<4x4xi8> + vector.print %v : vector<4x4xi8> + + %lhs_sp_ret = sparse_tensor.convert %lhs_sp_out + : tensor<4x4xi8, #DCSR> to tensor<4x4xi8> + %v1 = vector.transfer_read %lhs_sp_ret[%c0, %c0], %d0 + : tensor<4x4xi8>, vector<4x4xi8> + vector.print %v1 : vector<4x4xi8> + + %rhs_sp_ret = sparse_tensor.convert %all_sp_out + : tensor<4x4xi8, #DCSR> to tensor<4x4xi8> + %v2 = vector.transfer_read %rhs_sp_ret[%c0, %c0], %d0 + : tensor<4x4xi8>, vector<4x4xi8> + vector.print %v2 : vector<4x4xi8> + + + bufferization.dealloc_tensor %lhs_sp : tensor<4x4xf64, #DCSR> + bufferization.dealloc_tensor %rhs_sp : tensor<4x4xf64, #DCSR> + bufferization.dealloc_tensor %lhs_sp_out : tensor<4x4xi8, #DCSR> + bufferization.dealloc_tensor %all_sp_out : tensor<4x4xi8, #DCSR> + + return + } +} diff --git a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp --- a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp +++ b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp @@ -34,7 +34,9 @@ DO(subi, TensorExp::Kind::kSubI) \ DO(andi, TensorExp::Kind::kAndI) \ DO(xori, TensorExp::Kind::kXorI) \ - DO(ori, TensorExp::Kind::kOrI) + DO(ori, TensorExp::Kind::kOrI) \ + DO(cmpf, TensorExp::Kind::kCmpF) \ + DO(cmpi, TensorExp::Kind::kCmpI) // TODO: Disjunctive binary operations that need special handling are not // included, e.g., Division are not tested (for now) as it need a constant @@ -109,6 +111,7 @@ /// Constructors. /// Rather than using these, please use the readable helper constructor /// functions below to make tests more readable. + Pattern() : kind(TensorExp::Kind::kSynZero) {} Pattern(TensorId tid) : kind(TensorExp::Kind::kTensor), tid(tid) {} Pattern(TensorExp::Kind kind, PatternRef e0, PatternRef e1) : kind(kind), children(e0, e1) { @@ -122,6 +125,7 @@ /// static Pattern tensorPattern(TensorId tid) { return Pattern(tid); } +static Pattern synZeroPattern() { return Pattern(); } #define IMPL_BINOP_PATTERN(OP, KIND) \ LLVM_ATTRIBUTE_UNUSED static Pattern OP##Pattern(PatternRef e0, \ @@ -229,9 +233,12 @@ if (tensorExp.kind != pattern.kind) return false; switch (tensorExp.kind) { - // Leaf. + // Leaf. case TensorExp::Kind::kTensor: return tensorExp.tensor == pattern.tid; + case TensorExp::Kind::kSynZero: + // Already checked kind equivalence @L233 + return true; case TensorExp::Kind::kInvariant: llvm_unreachable("invariant not handled yet"); case TensorExp::Kind::kLoopVar: @@ -289,6 +296,8 @@ case TensorExp::Kind::kAndI: case TensorExp::Kind::kOrI: case TensorExp::Kind::kXorI: + case TensorExp::Kind::kCmpF: + case TensorExp::Kind::kCmpI: case TensorExp::Kind::kShrS: case TensorExp::Kind::kShrU: case TensorExp::Kind::kShlI: @@ -752,6 +761,79 @@ FOREVERY_COMMON_CONJ_BINOP(IMPL_MERGER_TEST_OPTIMIZED_CONJ) +/// Vector addition (disjunction) of 2 vectors. i.e.; +/// a(i) = b(i) + c(i) +/// which should form the 3 lattice points +/// { +/// lat( i_00 i_01 / (tensor_0 cmp tensor_1) ) +/// lat( i_00 / tensor_0 cmp 0 ) +/// lat( i_01 / 0 cmp tensor_1 ) +/// } +/// and after optimization, the lattice points do not change (as there is no +/// duplicated point and all input vectors are sparse vector). +/// { +/// lat( i_00 i_01 / (tensor_0 cmp tensor_1) ) +/// lat( i_00 / tensor_0 cmp 0 ) +/// lat( i_01 / 0 cmp tensor_1 ) +/// } +TEST_F(MergerTest3T1L, vector_cmp) { + const auto e = cmpiExpr(tensor(0), tensor(1)); + const auto l0 = lid(0); + const auto t0 = tid(0); + const auto t1 = tid(1); + PatternRef zero = synZeroPattern(); + PatternRef p0 = tensorPattern(t0); + PatternRef p1 = tensorPattern(t1); + auto s = merger.buildLattices(e, l0); + expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); + expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero), + loopsToBits({{l0, t0}})); + expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1), + loopsToBits({{l0, t1}})); + s = merger.optimizeSet(s); + expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); + expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero), + loopsToBits({{l0, t0}})); + expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1), + loopsToBits({{l0, t1}})); +} + +/// Vector addition (disjunction) of 2 vectors, i.e.; +/// a(i) = b(i) + c(i) +/// which should form the 3 lattice points +/// { +/// lat( i_00 i_01 / (sparse_tensor_0 cmp dense_tensor_1) ) +/// lat( i_00 / sparse_tensor_0 cmp 0) +/// lat( i_01 / 0 cmp dense_tensor_1 ) +/// } +/// which should be optimized to +/// { +/// lat( i_00 i_01 / (sparse_tensor_0 cmp dense_tensor_1) ) (not singleton) +/// lat( i_01 / 0 cmp dense_tensor_0 ) () +/// } +/// +/// lat( i_00 / sparse_tensor_0 ) should be opted out as it only has dense diff +/// with lat( i_00 i_01 / (sparse_tensor_0 cmp dense_tensor_1) ). +TEST_F(MergerTest3T1LD, vector_cmp) { + const auto e = cmpiExpr(tensor(0), tensor(1)); + const auto l0 = lid(0); + const auto t0 = tid(0); + const auto t1 = tid(1); + PatternRef zero = synZeroPattern(); + PatternRef p0 = tensorPattern(t0); + PatternRef p1 = tensorPattern(t1); + auto s = merger.buildLattices(e, l0); + expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); + expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero), + loopsToBits({{l0, t0}})); + expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1), + loopsToBits({{l0, t1}})); + s = merger.optimizeSet(s); + expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}})); + expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1), + loopsToBits({{l0, t1}})); +} + #undef IMPL_MERGER_TEST_OPTIMIZED_CONJ // TODO: mult-dim tests