Diff 475915

mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h

Show First 20 Lines • Show All 142 Lines • ▼ Show 20 Lines
};		};

/// A class to handle all iteration lattice operations. This class abstracts		/// A class to handle all iteration lattice operations. This class abstracts
/// away from some implementation details of storing iteration lattices and		/// away from some implementation details of storing iteration lattices and
/// tensor expressions. This allows for fine-tuning performance characteristics		/// tensor expressions. This allows for fine-tuning performance characteristics
/// independently from the basic algorithm if bottlenecks are identified.		/// independently from the basic algorithm if bottlenecks are identified.
class Merger {		class Merger {
public:		public:
/// Constructs a merger for the given number of tensors and loops. The		/// Constructs a merger for the given number of tensors and loops. The
		aartbikUnsubmitted Not Done Reply Inline Actions comment needs to be expanded also, you can probably separate the merger changes (and some unit testing) in its own revision for easier reviewing aartbik: comment needs to be expanded also, you can probably separate the merger changes (and some unit…
/// user supplies the number of tensors involved in the kernel, with the		/// user supplies the number of tensors involved in the kernel, with the
/// last tensor in this set denoting the output tensor. The merger adds an		/// last tensor in this set denoting the output tensor. The merger adds an
/// additional synthetic tensor at the end of this set to represent all		/// additional synthetic tensor at the end of this set to represent all
/// invariant expressions in the kernel.		/// invariant expressions in the kernel.
Merger(unsigned t, unsigned l)		Merger(unsigned t, unsigned l, unsigned fl)
: outTensor(t - 1), syntheticTensor(t), numTensors(t + 1), numLoops(l),		: outTensor(t - 1), syntheticTensor(t), numTensors(t + 1),
hasSparseOut(false),		numNativeLoops(l), numLoops(l + fl), hasSparseOut(false),
dimTypes(t + 1, std::vector<DimLevelType>(l, DimLevelType::Undef)),		dimTypes(numTensors,
loopIdxToDim(t + 1, std::vector<Optional<unsigned>>(l, llvm::None)) {}		std::vector<DimLevelType>(numLoops, DimLevelType::Undef)),
		loopIdxToDim(numTensors,
		std::vector<Optional<unsigned>>(numLoops, llvm::None)),
		dimToLoopIdx(numTensors,
		std::vector<Optional<unsigned>>(numLoops, llvm::None)) {}

/// Adds a tensor expression. Returns its index.		/// Adds a tensor expression. Returns its index.
unsigned addExp(Kind k, unsigned e0, unsigned e1 = -1u, Value v = Value(),		unsigned addExp(Kind k, unsigned e0, unsigned e1 = -1u, Value v = Value(),
Operation *op = nullptr);		Operation *op = nullptr);
unsigned addExp(Kind k, unsigned e, Value v, Operation *op = nullptr) {		unsigned addExp(Kind k, unsigned e, Value v, Operation *op = nullptr) {
return addExp(k, e, -1u, v, op);		return addExp(k, e, -1u, v, op);
}		}
unsigned addExp(Kind k, Value v, Operation *op = nullptr) {		unsigned addExp(Kind k, Value v, Operation *op = nullptr) {
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	public:
/// Returns true if Li and Lj only differ in dense.		/// Returns true if Li and Lj only differ in dense.
bool onlyDenseDiff(unsigned i, unsigned j);		bool onlyDenseDiff(unsigned i, unsigned j);

/// Bit translation (get tensor ID).		/// Bit translation (get tensor ID).
unsigned tensor(unsigned b) const { return b % numTensors; }		unsigned tensor(unsigned b) const { return b % numTensors; }
/// Bit translation (get loop index).		/// Bit translation (get loop index).
unsigned index(unsigned b) const { return b / numTensors; }		unsigned index(unsigned b) const { return b / numTensors; }

		/// Get the number of total loops (native loops + filter loops).
		unsigned getNumLoops() const { return numLoops; }
		/// Get the number of native loops.
		unsigned getNumNativeLoops() const { return numNativeLoops; }
		/// Get the number of filter loops.
		unsigned getNumFilterLoops() const { return numLoops - numNativeLoops; }
		/// Get the starting filter loop index.
		unsigned getFilterLoopStartingIdx() const { return getNumNativeLoops(); }
		aartbikUnsubmitted Not Done Reply Inline Actions just numNativeLoops? (given that you do that for all others too, it is local enough that this does not go out of sync) aartbik: just numNativeLoops? (given that you do that for all others too, it is local enough that this…

/// Returns true if bit corresponds to index of output tensor.		/// Returns true if bit corresponds to index of output tensor.
bool isOutTensor(unsigned b, unsigned i) const {		bool isOutTensor(unsigned b, unsigned i) const {
return tensor(b) == outTensor && index(b) == i;		return tensor(b) == outTensor && index(b) == i;
}		}

/// Gets tensor ID for the output tensor.		/// Gets tensor ID for the output tensor.
unsigned getOutTensorID() const { return outTensor; }		unsigned getOutTensorID() const { return outTensor; }
/// Gets tensor ID for the synthetic tensor (used for all invariant tensor		/// Gets tensor ID for the synthetic tensor (used for all invariant tensor
/// expressions).		/// expressions).
unsigned getSynTensorID() const { return syntheticTensor; }		unsigned getSynTensorID() const { return syntheticTensor; }

		bool isFilterLoop(unsigned ldx) const {
		assert(ldx < numLoops);
		return ldx >= numNativeLoops;
		}

/// Returns true if given tensor iterates only in the given tensor		/// Returns true if given tensor iterates only in the given tensor
/// expression. For the output tensor, this defines a "simply dynamic"		/// expression. For the output tensor, this defines a "simply dynamic"
/// operation [Bik96]. For instance: a(i) *= 2.0 or a(i) += a(i) for		/// operation [Bik96]. For instance: a(i) *= 2.0 or a(i) += a(i) for
/// sparse vector a.		/// sparse vector a.
bool isSingleCondition(unsigned t, unsigned e) const;		bool isSingleCondition(unsigned t, unsigned e) const;

/// Returns true if any set bit corresponds to sparse dimension level type.		/// Returns true if any set bit corresponds to sparse dimension level type.
bool hasAnySparse(const BitVector &bits) const;		bool hasAnySparse(const BitVector &bits) const;

/// Gets the dimension level type of the `t`th tensor on `i`th loop.		/// Gets the dimension level type of the `t`th tensor on `i`th loop.
DimLevelType getDimLevelType(unsigned t, unsigned i) const {		DimLevelType getDimLevelType(unsigned t, unsigned i) const {
assert(t < numTensors && i < numLoops);		assert(t < numTensors && i < numLoops);
return dimTypes[t][i];		return dimTypes[t][i];
}		}

/// Gets the dimension level type of `b`.		/// Gets the dimension level type of `b`.
DimLevelType getDimLevelType(unsigned b) const {		DimLevelType getDimLevelType(unsigned b) const {
return getDimLevelType(tensor(b), index(b));		return getDimLevelType(tensor(b), index(b));
}		}

		Optional<unsigned> getLoopIdx(unsigned t, unsigned dim) const {
		assert(t < numTensors && dim < numLoops);
		return dimToLoopIdx[t][dim];
		}

/// Gets the dimension number of the the `t`th tensor on `i`th loop.		/// Gets the dimension number of the the `t`th tensor on `i`th loop.
Optional<unsigned> getDimNum(unsigned t, unsigned i) const {		Optional<unsigned> getDimNum(unsigned t, unsigned i) const {
assert(t < numTensors && i < numLoops);		assert(t < numTensors && i < numLoops);
return loopIdxToDim[t][i];		return loopIdxToDim[t][i];
}		}

/// Gets the dimension number of `b`.		/// Gets the dimension number of `b`.
Optional<unsigned> getDimNum(unsigned b) const {		Optional<unsigned> getDimNum(unsigned b) const {
return getDimNum(tensor(b), index(b));		return getDimNum(tensor(b), index(b));
}		}

/// Sets the dimension and dimension level type of the `t`th tensor on `i`th		/// Sets the dimension and dimension level type of the `t`th tensor on `i`th
/// loop.		/// loop.
void setDimAndDimLevelType(unsigned t, unsigned i, unsigned dim,		void setDimAndDimLevelType(unsigned t, unsigned i, unsigned dim,
DimLevelType dlt) {		DimLevelType dlt) {
assert(isValidDLT(dlt));		assert(isValidDLT(dlt));
dimTypes[t][i] = dlt;		dimTypes[t][i] = dlt;
loopIdxToDim[t][i] = dim;		loopIdxToDim[t][i] = dim;
		assert(dim < numLoops);
		dimToLoopIdx[t][dim] = i;
}		}

// Iterates the bits of a lattice, for each set bit, converts it into the		// Iterates the bits of a lattice, for each set bit, converts it into the
// corresponding tensor dimension and invokes the callback.		// corresponding tensor dimension and invokes the callback.
void foreachTidDimPairInBits(		void foreachTidDimPairInBits(
const BitVector &bits,		const BitVector &bits,
function_ref<void(unsigned b, unsigned tid, Optional<unsigned> dim,		function_ref<void(unsigned b, unsigned tid, Optional<unsigned> dim,
DimLevelType dlt)>		DimLevelType dlt)>
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines	private:

/// Traverses the SSA tree (possibly a DAG) to build a tensor expression.		/// Traverses the SSA tree (possibly a DAG) to build a tensor expression.
Optional<unsigned> buildTensorExp(linalg::GenericOp op, Value v);		Optional<unsigned> buildTensorExp(linalg::GenericOp op, Value v);

/// Merger data structures.		/// Merger data structures.
const unsigned outTensor;		const unsigned outTensor;
const unsigned syntheticTensor;		const unsigned syntheticTensor;
const unsigned numTensors;		const unsigned numTensors;
		const unsigned numNativeLoops;
const unsigned numLoops;		const unsigned numLoops;
bool hasSparseOut;		bool hasSparseOut;
// Map that converts pair<tensor id, loop id> to the corresponding dimension		// Map that converts pair<tensor id, loop id> to the corresponding dimension
// level type.		// level type.
std::vector<std::vector<DimLevelType>> dimTypes;		std::vector<std::vector<DimLevelType>> dimTypes;
// Map that converts pair<tensor id, loop id> to the corresponding dimension.		// Map that converts pair<tensor id, loop id> to the corresponding dimension.
std::vector<std::vector<Optional<unsigned>>> loopIdxToDim;		std::vector<std::vector<Optional<unsigned>>> loopIdxToDim;
		// Map that converts pair<tensor id, dim> to the corresponding loop id.
		std::vector<std::vector<Optional<unsigned>>> dimToLoopIdx;
llvm::SmallVector<TensorExp> tensorExps;		llvm::SmallVector<TensorExp> tensorExps;
llvm::SmallVector<LatPoint> latPoints;		llvm::SmallVector<LatPoint> latPoints;
llvm::SmallVector<SmallVector<unsigned>> latSets;		llvm::SmallVector<SmallVector<unsigned>> latSets;
};		};

} // namespace sparse_tensor		} // namespace sparse_tensor
} // namespace mlir		} // namespace mlir

#endif // MLIR_DIALECT_SPARSETENSOR_UTILS_MERGER_H_		#endif // MLIR_DIALECT_SPARSETENSOR_UTILS_MERGER_H_

mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h

Show First 20 Lines • Show All 312 Lines • ▼ Show 20 Lines
// loopEmiter.enterLoopOverTensorAtDim(T1, 1);		// loopEmiter.enterLoopOverTensorAtDim(T1, 1);
// loopEmiter.exitCurrentLoop();		// loopEmiter.exitCurrentLoop();
// loopEmiter.enterLoopOverTensorAtDim(T2, 1);		// loopEmiter.enterLoopOverTensorAtDim(T2, 1);
// loopEmiter.exitCurrentLoop(); // exit k		// loopEmiter.exitCurrentLoop(); // exit k
// loopEmiter.exitCurrentLoop(); // exit j		// loopEmiter.exitCurrentLoop(); // exit j
// loopEmiter.exitCurrentLoop(); // exit i		// loopEmiter.exitCurrentLoop(); // exit i
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

// TODO: Sparsification should also rely on this class to generate loops.
class SparseTensorLoopEmitter {		class SparseTensorLoopEmitter {
public:		public:
/// Optional callback function to setup dense output tensors when		/// Optional callback function to setup dense output tensors when
/// initializing the loop emitter (e.g., to fill a dense output with zeros).		/// initializing the loop emitter (e.g., to fill a dense output with zeros).
using OutputUpdater = function_ref<Value(OpBuilder &builder, Location loc,		using OutputUpdater = function_ref<Value(OpBuilder &builder, Location loc,
Value memref, Value tensor)>;		Value memref, Value tensor)>;

/// Constructor: take an array of tensors inputs, on which the generated loops		/// Constructor: take an array of tensors inputs, on which the generated loops
/// will iterate on. The index of the tensor in the array is also the		/// will iterate on. The index of the tensor in the array is also the
/// tensor id (tid) used in related functions.		/// tensor id (tid) used in related functions.
/// If isSparseOut is set, loop emitter assume that the sparse output tensor		/// If isSparseOut is set, loop emitter assume that the sparse output tensor
/// is empty, and will always generate loops on it based on the dim sizes.		/// is empty, and will always generate loops on it based on the dim sizes.
explicit SparseTensorLoopEmitter(ValueRange tensors, bool hasOutput = false,		explicit SparseTensorLoopEmitter(ValueRange tensors, bool hasOutput = false,
bool isSparseOut = false);		bool isSparseOut = false,
		ArrayRef<unsigned> topSort = {});

/// Starts a loop emitting session by generating all the buffers needed to		/// Starts a loop emitting session by generating all the buffers needed to
/// iterate tensors.		/// iterate tensors.
void initializeLoopEmit(OpBuilder &builder, Location loc,		void initializeLoopEmit(OpBuilder &builder, Location loc,
OutputUpdater updater = nullptr);		OutputUpdater updater = nullptr);

		/// Generates a list of operations to compute the affine expression.
		Value genAffine(OpBuilder &builder, AffineExpr a, Location loc);

/// Enters a new loop sequence, the loops within the same sequence starts from		/// Enters a new loop sequence, the loops within the same sequence starts from
/// the break points of previous loop instead of starting over from 0.		/// the break points of previous loop instead of starting over from 0.
/// e.g.,		/// e.g.,
/// {		/// {
/// // loop sequence start.		/// // loop sequence start.
/// p0 = while(xxx)		/// p0 = while(xxx)
/// ...		/// ...
/// break p0		/// break p0
Show All 22 Lines	public:
/// return the reduction variable used inside the generated loop.		/// return the reduction variable used inside the generated loop.
Operation *enterLoopOverTensorAtDim(OpBuilder &builder, Location loc,		Operation *enterLoopOverTensorAtDim(OpBuilder &builder, Location loc,
size_t tid, size_t dim,		size_t tid, size_t dim,
MutableArrayRef<Value> reduc = {},		MutableArrayRef<Value> reduc = {},
bool isParallel = false,		bool isParallel = false,
ArrayRef<size_t> extraTids = {},		ArrayRef<size_t> extraTids = {},
ArrayRef<size_t> extraDims = {});		ArrayRef<size_t> extraDims = {});

		Operation *enterFilterLoopOverTensorAtDim(OpBuilder &builder, Location loc,
		size_t tid, size_t dim,
		AffineExpr affine,
		MutableArrayRef<Value> reduc = {});

		void genDenseAffineAddressAtCurLevel(OpBuilder &builder, Location loc,
		size_t tid, size_t dim,
		AffineExpr affine);

/// Emits a co-iteration loop over a set of tensors.		/// Emits a co-iteration loop over a set of tensors.
Operation *enterCoIterationOverTensorsAtDims(		Operation *enterCoIterationOverTensorsAtDims(
OpBuilder &builder, Location loc, ArrayRef<size_t> tids,		OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
ArrayRef<size_t> dims, bool needsUniv, MutableArrayRef<Value> reduc = {},		ArrayRef<size_t> dims, bool needsUniv, MutableArrayRef<Value> reduc = {},
ArrayRef<size_t> extraTids = {}, ArrayRef<size_t> extraDims = {});		ArrayRef<size_t> extraTids = {}, ArrayRef<size_t> extraDims = {});

void exitCurrentLoop(RewriterBase &rewriter, Location loc,		void exitCurrentLoop(RewriterBase &rewriter, Location loc,
MutableArrayRef<Value> reduc = {});		MutableArrayRef<Value> reduc = {});

/// Returns the array of coordinate for all the loop generated till now.		/// Returns the array of coordinate for all the loop generated till now.
void getCoordinateArray(SmallVectorImpl<Value> &coords) const {		void getCoordinateArray(SmallVectorImpl<Value> &coords) const {
for (auto &l : loopStack)		for (auto &l : loopStack)
coords.push_back(l.iv);		coords.push_back(l.iv);
}		}

/// Gets loop induction variable at the given level.		/// Gets loop induction variable at the given level.
		unsigned getCurrentDepth() const { return loopStack.size(); }

		/// Gets loop induction variable at the given level.
Value getLoopIV(size_t level) const {		Value getLoopIV(size_t level) const {
if (level < loopStack.size())		if (level < loopStack.size())
return loopStack[level].iv;		return loopStack[level].iv;
return nullptr;		return nullptr;
}		}

///		///
/// Getters.		/// Getters.
▲ Show 20 Lines • Show All 100 Lines • ▼ Show 20 Lines	private:

// Loop Stack, stores the information of all the nested loops that are alive.		// Loop Stack, stores the information of all the nested loops that are alive.
std::vector<LoopLevelInfo> loopStack;		std::vector<LoopLevelInfo> loopStack;

// Loop Sequence Stack, stores the unversial index for the current loop		// Loop Sequence Stack, stores the unversial index for the current loop
// sequence.		// sequence.
std::vector<Value> loopSeqStack;		std::vector<Value> loopSeqStack;

		// Maps AffineDimExpr to the index of the loop in loopStack.
		aartbikUnsubmitted Not Done Reply Inline Actions This is really just the inverse topsort, right? It makes sense you need this for affine mapping, but the explanation right now seems to explain it in terms of the affine mapping, rather than its true meaning? aartbik: This is really just the inverse topsort, right? It makes sense you need this for affine mapping…
		// TODO: We should probably use a callback function here to make it more
		// general.
		std::vector<unsigned> sparsiferLoopLvlMap;

// TODO: not yet used, it should track the current level for each tensor		// TODO: not yet used, it should track the current level for each tensor
// to help eliminate `dim` paramters from above APIs.		// to help eliminate `dim` paramters from above APIs.
// std::vector<size_t> curLv;		// std::vector<size_t> curLv;
};		};

} // namespace sparse_tensor		} // namespace sparse_tensor
} // namespace mlir		} // namespace mlir

#endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_CODEGENUTILS_H_		#endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_CODEGENUTILS_H_

mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp

Show First 20 Lines • Show All 90 Lines • ▼ Show 20 Lines
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Sparse tensor loop emitter class implementations		// Sparse tensor loop emitter class implementations
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

SparseTensorLoopEmitter::SparseTensorLoopEmitter(ValueRange tensors,		SparseTensorLoopEmitter::SparseTensorLoopEmitter(ValueRange tensors,
bool hasOutput,		bool hasOutput,
bool isSparseOut)		bool isSparseOut,
		ArrayRef<unsigned> topSort)
: hasOutput(hasOutput), isSparseOut(isSparseOut),		: hasOutput(hasOutput), isSparseOut(isSparseOut),
tensors(tensors.begin(), tensors.end()), dimTypes(tensors.size()),		tensors(tensors.begin(), tensors.end()), dimTypes(tensors.size()),
pidxs(tensors.size()), coord(tensors.size()), highs(tensors.size()),		pidxs(tensors.size()), coord(tensors.size()), highs(tensors.size()),
ptrBuffer(tensors.size()), idxBuffer(tensors.size()),		ptrBuffer(tensors.size()), idxBuffer(tensors.size()),
valBuffer(tensors.size()), loopStack() {		valBuffer(tensors.size()), loopStack(),
		sparsiferLoopLvlMap(topSort.size(), 0) {
for (size_t tid = 0, e = tensors.size(); tid < e; tid++) {		for (size_t tid = 0, e = tensors.size(); tid < e; tid++) {
auto t = tensors[tid];		auto t = tensors[tid];
// a scalar or 0-dimension tensors		// a scalar or 0-dimension tensors
if (isZeroRankedTensorOrScalar(t.getType()))		if (isZeroRankedTensorOrScalar(t.getType()))
continue;		continue;
auto rtp = t.getType().cast<RankedTensorType>();		auto rtp = t.getType().cast<RankedTensorType>();
auto rank = static_cast<size_t>(rtp.getRank());		auto rank = static_cast<size_t>(rtp.getRank());
auto enc = getSparseTensorEncoding(rtp);		auto enc = getSparseTensorEncoding(rtp);
// We always treat sparse output tensor as dense so that we always iterate		// We always treat sparse output tensor as dense so that we always iterate
// it based on dim size.		// it based on dim size.
if (enc && !(isOutputTensor(tid) && isSparseOut))		if (enc && !(isOutputTensor(tid) && isSparseOut))
for (auto dimTp : enc.getDimLevelType())		for (auto dimTp : enc.getDimLevelType())
dimTypes[tid].push_back(dimTp);		dimTypes[tid].push_back(dimTp);
else		else
dimTypes[tid].assign(rank, DimLevelType::Dense);		dimTypes[tid].assign(rank, DimLevelType::Dense);

// Initialize using empty value.		// Initialize using empty value.
pidxs[tid].assign(rank, Value());		pidxs[tid].assign(rank, Value());
coord[tid].assign(rank, Value());		coord[tid].assign(rank, Value());
highs[tid].assign(rank, Value());		highs[tid].assign(rank, Value());
ptrBuffer[tid].assign(rank, Value());		ptrBuffer[tid].assign(rank, Value());
idxBuffer[tid].assign(rank, Value());		idxBuffer[tid].assign(rank, Value());
}		}

		for (unsigned i = 0, e = topSort.size(); i < e; i++) {
		// This is an inverse map of the topologically sorted loop index from
		// sparsifier. This is needed to map the AffineDimExpr back to the loopStack
		// index used in loop emitter.
		sparsiferLoopLvlMap[topSort[i]] = i;
		}
}		}

void SparseTensorLoopEmitter::initializeLoopEmit(		void SparseTensorLoopEmitter::initializeLoopEmit(
OpBuilder &builder, Location loc,		OpBuilder &builder, Location loc,
SparseTensorLoopEmitter::OutputUpdater updater) {		SparseTensorLoopEmitter::OutputUpdater updater) {
// For every tensor, find lower and upper bound on dimensions, set the		// For every tensor, find lower and upper bound on dimensions, set the
// same bounds on loop indices, and obtain dense or sparse buffer(s).		// same bounds on loop indices, and obtain dense or sparse buffer(s).
for (size_t t = 0, e = tensors.size(); t < e; t++) {		for (size_t t = 0, e = tensors.size(); t < e; t++) {
▲ Show 20 Lines • Show All 150 Lines • ▼ Show 20 Lines	Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim(
loopStack.emplace_back(ArrayRef<size_t>(tid), ArrayRef<size_t>(dim), loop,		loopStack.emplace_back(ArrayRef<size_t>(tid), ArrayRef<size_t>(dim), loop,
coord[tid][dim]);		coord[tid][dim]);
// Emit extra locals.		// Emit extra locals.
emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims);		emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims);

return loop;		return loop;
}		}

		Value SparseTensorLoopEmitter::genAffine(OpBuilder &builder, AffineExpr a,
		Location loc) {
		switch (a.getKind()) {
		case AffineExprKind::DimId: {
		unsigned idx = a.cast<AffineDimExpr>().getPosition();
		return loopStack[sparsiferLoopLvlMap[idx]].iv;
		}
		case AffineExprKind::Add: {
		auto binOp = a.cast<AffineBinaryOpExpr>();
		return builder.create<arith::AddIOp>(
		loc, genAffine(builder, binOp.getLHS(), loc),
		genAffine(builder, binOp.getRHS(), loc));
		}
		case AffineExprKind::Mul: {
		auto binOp = a.cast<AffineBinaryOpExpr>();
		return builder.create<arith::MulIOp>(
		loc, genAffine(builder, binOp.getLHS(), loc),
		genAffine(builder, binOp.getRHS(), loc));
		}
		case AffineExprKind::Constant: {
		int64_t c = a.cast<AffineConstantExpr>().getValue();
		return constantIndex(builder, loc, c);
		}
		default:
		llvm_unreachable("unexpected affine subscript");
		}
		}

		Operation *SparseTensorLoopEmitter::enterFilterLoopOverTensorAtDim(
		OpBuilder &builder, Location loc, size_t tid, size_t dim, AffineExpr affine,
		MutableArrayRef<Value> reduc) {
		assert(!affine.isa<AffineDimExpr>() && !isDenseDLT(dimTypes[tid][dim]));
		assert(dimTypes[tid].size() > dim);
		// We can not re-enter the same level.
		assert(!coord[tid][dim]);

		Value step = constantIndex(builder, loc, 1);

		Value lo = pidxs[tid][dim];
		Value hi = highs[tid][dim];

		// TODO: We should instead use a whileOp for filter loop to allow early
		// break when exceeding (for ordered dimensions).
		// TODO: There are many other potiential opportunities that we might apply in
		// the future. E.g., we could use binary search to located the pointer index.
		scf::ForOp forOp = builder.create<scf::ForOp>(loc, lo, hi, step, reduc);

		// In-place update on the reduction variable vector.
		assert(forOp.getNumRegionIterArgs() == reduc.size());
		for (int i = 0, e = reduc.size(); i < e; i++)
		reduc[i] = forOp.getRegionIterArg(i);

		builder.setInsertionPointToStart(forOp.getBody());
		Value iv = forOp.getInductionVar();

		pidxs[tid][dim] = iv;
		// Generating a load on the indices array yields the coordinate.
		Value ptr = idxBuffer[tid][dim];
		coord[tid][dim] = genIndexLoad(builder, loc, ptr, iv);

		// Generate a if condition to filter out indices that is not equal to the
		// result of the affine expression.
		Value expected = genAffine(builder, affine, loc);
		auto pred = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
		coord[tid][dim], expected);
		SmallVector<Type> types;
		for (Value red : reduc) {
		types.push_back(red.getType());
		}

		bool hasReduc = !types.empty();
		scf::IfOp ifOp =
		builder.create<scf::IfOp>(loc, types, pred, /else/ hasReduc);
		if (hasReduc) {
		// scf.for (a) -> v
		// %s = scf.if (a) -> v
		// user-generated code.
		// else
		// yield a
		// yield %s
		builder.create<scf::YieldOp>(loc, ifOp.getResults());
		builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
		// On mismatch.
		builder.create<scf::YieldOp>(loc, reduc);
		}
		// Set the insert point to matched branch.
		builder.setInsertionPointToStart(&ifOp.getThenRegion().front());

		// NOTE: we can also prepares for next dim here in advance
		// Push the loop into stack
		loopStack.emplace_back(ArrayRef<size_t>(tid), ArrayRef<size_t>(dim), forOp,
		coord[tid][dim]);
		return forOp;
		}

		void SparseTensorLoopEmitter::genDenseAffineAddressAtCurLevel(
		OpBuilder &builder, Location loc, size_t tid, size_t dim,
		AffineExpr affine) {
		Value affineV = genAffine(builder, affine, loc);
		pidxs[tid][dim] = genAddress(builder, loc, tid, dim, affineV);
		}

Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims(		Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims(
OpBuilder &builder, Location loc, ArrayRef<size_t> tids,		OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
ArrayRef<size_t> dims, bool needsUniv, MutableArrayRef<Value> reduc,		ArrayRef<size_t> dims, bool needsUniv, MutableArrayRef<Value> reduc,
ArrayRef<size_t> extraTids, ArrayRef<size_t> extraDims) {		ArrayRef<size_t> extraTids, ArrayRef<size_t> extraDims) {
assert(tids.size() == dims.size());		assert(tids.size() == dims.size());
SmallVector<Type> types;		SmallVector<Type> types;
SmallVector<Value> operands;		SmallVector<Value> operands;
// Construct the while-loop with a parameter for each index.		// Construct the while-loop with a parameter for each index.
▲ Show 20 Lines • Show All 162 Lines • ▼ Show 20 Lines	void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc,
MutableArrayRef<Value> reduc) {		MutableArrayRef<Value> reduc) {
LoopLevelInfo &loopInfo = loopStack.back();		LoopLevelInfo &loopInfo = loopStack.back();
auto &dims = loopStack.back().dims;		auto &dims = loopStack.back().dims;
auto &tids = loopStack.back().tids;		auto &tids = loopStack.back().tids;
auto forOp = llvm::dyn_cast<scf::ForOp>(loopInfo.loop);		auto forOp = llvm::dyn_cast<scf::ForOp>(loopInfo.loop);
if (forOp) {		if (forOp) {
if (!reduc.empty()) {		if (!reduc.empty()) {
assert(reduc.size() == forOp.getNumResults());		assert(reduc.size() == forOp.getNumResults());
rewriter.setInsertionPointToEnd(forOp.getBody());
aartbikUnsubmitted Not Done Reply Inline Actions was this already set right? aartbik: was this already set right?
PeimingAuthorUnsubmitted Done Reply Inline Actions Yes, because filter loop will look like for() if (idx == affine) // user-generated so, we need to generate yield in the `ifOp` instead of `forOp` But maybe I should add a doc to indicate that it is the users' responsibility to ensure the insertion point is correct. Peiming: Yes, because filter loop will look like ``` for() if (idx == affine) // user-generated ```…
rewriter.create<scf::YieldOp>(loc, reduc);		rewriter.create<scf::YieldOp>(loc, reduc);
}		}
// Exit the loop.		// Exit the loop.
rewriter.setInsertionPointAfter(forOp);		rewriter.setInsertionPointAfter(forOp);
// In-place update reduction variables.		// In-place update reduction variables.
for (unsigned i = 0, e = forOp.getResults().size(); i < e; i++)		for (unsigned i = 0, e = forOp.getResults().size(); i < e; i++)
reduc[i] = forOp.getResult(i);		reduc[i] = forOp.getResult(i);
} else {		} else {
▲ Show 20 Lines • Show All 545 Lines • Show Last 20 Lines

mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp

Show All 21 Lines
#include "mlir/Dialect/Linalg/Utils/Utils.h"		#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"		#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"		#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/SCF/Transforms/Transforms.h"		#include "mlir/Dialect/SCF/Transforms/Transforms.h"
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"		#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
#include "mlir/Dialect/SparseTensor/Transforms/Passes.h"		#include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
#include "mlir/Dialect/SparseTensor/Utils/Merger.h"		#include "mlir/Dialect/SparseTensor/Utils/Merger.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"		#include "mlir/Dialect/Tensor/IR/Tensor.h"
		#include "mlir/IR/AffineExprVisitor.h"
#include "mlir/IR/Matchers.h"		#include "mlir/IR/Matchers.h"
#include "mlir/IR/TensorEncoding.h"		#include "mlir/IR/TensorEncoding.h"
#include "llvm/ADT/SmallBitVector.h"		#include "llvm/ADT/SmallBitVector.h"

using namespace mlir;		using namespace mlir;
using namespace mlir::sparse_tensor;		using namespace mlir::sparse_tensor;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
Show All 14 Lines
enum Reduction { kNoReduc, kSum, kProduct, kAnd, kOr, kXor, kCustom };		enum Reduction { kNoReduc, kSum, kProduct, kAnd, kOr, kXor, kCustom };

// Code generation.		// Code generation.
struct CodeGen {		struct CodeGen {
CodeGen(SparsificationOptions o, ValueRange tensors, unsigned numTensors,		CodeGen(SparsificationOptions o, ValueRange tensors, unsigned numTensors,
unsigned numLoops, OpOperand *op, unsigned nest,		unsigned numLoops, OpOperand *op, unsigned nest,
std::vector<unsigned> &ts)		std::vector<unsigned> &ts)
: options(o), loopEmitter(tensors, /hasOutput=/true,		: options(o), loopEmitter(tensors, /hasOutput=/true,
/isSparseOut=/op != nullptr),		/isSparseOut=/op != nullptr, ts),
sparseOut(op), outerParNest(nest), topSort(ts) {		sparseOut(op), outerParNest(nest), topSort(ts) {
if (op)		if (op)
insChain = op->get();		insChain = op->get();
}		}
/// Sparsification options.		/// Sparsification options.
SparsificationOptions options;		SparsificationOptions options;
/// Loop emitter helper class.		/// Loop emitter helper class.
SparseTensorLoopEmitter loopEmitter;		SparseTensorLoopEmitter loopEmitter;
Show All 11 Lines	struct CodeGen {
Value insChain; // bookkeeping for insertion chain		Value insChain; // bookkeeping for insertion chain
Value expValues;		Value expValues;
Value expFilled;		Value expFilled;
Value expAdded;		Value expAdded;
Value expCount;		Value expCount;
// Topsort (reference should remain in scope).		// Topsort (reference should remain in scope).
std::vector<unsigned> &topSort;		std::vector<unsigned> &topSort;

		ArrayRef<unsigned> getLoopCurStack() const {
		ArrayRef<unsigned> topSortRef = topSort;
		return topSortRef.slice(0, loopEmitter.getCurrentDepth());
		}

Value getLoopIdxValue(size_t loopIdx) const {		Value getLoopIdxValue(size_t loopIdx) const {
for (unsigned lv = 0; lv < topSort.size(); lv++)		for (unsigned lv = 0; lv < topSort.size(); lv++)
if (topSort[lv] == loopIdx)		if (topSort[lv] == loopIdx)
return loopEmitter.getLoopIV(lv);		return loopEmitter.getLoopIV(lv);

llvm_unreachable("invalid loop index");		llvm_unreachable("invalid loop index");
}		}
};		};

		class ParallelAffineDimFinder
		: public AffineExprVisitor<ParallelAffineDimFinder> {
		AffineExpr paraDim;
		utils::IteratorType pickIterType;
		SmallVector<utils::IteratorType> iterTypes;

		public:
		explicit ParallelAffineDimFinder(linalg::GenericOp op)
		: iterTypes(op.getIteratorTypesArray()) {}
		void visitDimExpr(AffineDimExpr expr) {
		if (paraDim == nullptr \|\| pickIterType == iterTypes[expr.getPosition()]) {
		paraDim = expr;
		}
		}

		void setPickedIterType(utils::IteratorType iterType) {
		pickIterType = iterType;
		}

		AffineDimExpr getDimExpr() const { return paraDim.cast<AffineDimExpr>(); }
		};
} // namespace		} // namespace

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Sparse compiler analysis methods.		// Sparse compiler analysis methods.
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

		/// Determines if affine expression is invariant.
		static bool isInvariantAffine(AffineExpr a, ArrayRef<unsigned> loopStack,
		unsigned ldx, bool &atLevel) {
		switch (a.getKind()) {
		case AffineExprKind::DimId: {
		unsigned idx = a.cast<AffineDimExpr>().getPosition();
		if (idx == ldx) {
		atLevel = true;
		// Must be invariant if we are at the level.
		return true;
		}
		bool isInvariant = false;
		for (unsigned loop : loopStack) {
		isInvariant = (loop == idx);
		if (isInvariant)
		break;
		}
		return isInvariant;
		}
		case AffineExprKind::Add:
		case AffineExprKind::Mul: {
		auto binOp = a.cast<AffineBinaryOpExpr>();
		return isInvariantAffine(binOp.getLHS(), loopStack, ldx, atLevel) &&
		isInvariantAffine(binOp.getRHS(), loopStack, ldx, atLevel);
		}
		default: {
		assert(a.isa<AffineConstantExpr>());
		return true;
		}
		}
		}

		/// Determines if affine expression is invariant.
		static bool isInvariantAffine(const CodeGen &codegen, AffineExpr a,
		unsigned ldx, bool &atLevel) {
		return isInvariantAffine(a, codegen.getLoopCurStack(), ldx, atLevel);
		}

/// Helper method to construct a permuted dimension ordering		/// Helper method to construct a permuted dimension ordering
/// that adheres to the given topological sort.		/// that adheres to the given topological sort.
static AffineMap permute(MLIRContext *context, AffineMap m,		static AffineMap permute(const Merger &merger, MLIRContext *context,
std::vector<unsigned> &topSort) {		AffineMap m, ArrayRef<unsigned> topSort) {
unsigned sz = topSort.size();		unsigned sz = topSort.size();
assert(m.getNumResults() == sz && "TopoSort/AffineMap size mismatch");		assert(m.getNumDims() + merger.getNumFilterLoops() == sz &&
		"TopoSort/AffineMap size mismatch");
		PeimingAuthorUnsubmitted Done Reply Inline Actions This is (probably) a typo, but I am not sure, it does not affect the patch actually. Let me know if you want to keep it or not. Peiming: This is (probably) a typo, but I am not sure, it does not affect the patch actually. Let me…
		PeimingAuthorUnsubmitted Done Reply Inline Actions Despite that we had an offline discussion on this, I still think it should be `getNumDims()` after taking more time understanding resolveCycles. Also, I believe currently it failed to handle cases when `NumDims != NumResults`, e.g., `AffineMap<(d0, d1, d2, d3) -> (d0, d1)>` Peiming: Despite that we had an offline discussion on this, I still think it should be `getNumDims()`…
		aartbikUnsubmitted Not Done Reply Inline Actions Yes, you are absolutely right. I thought we discussed num inputs vs num dims (since they should be the same), but num results is of course wrong! the topsort has the size of the "parallel" index space. So either Dims or Inputs should be used. aartbik: Yes, you are absolutely right. I thought we discussed num inputs vs num dims (since they should…
// Construct the inverse of `m`; to avoid the asymptotic complexity		// Construct the inverse of `m`; to avoid the asymptotic complexity
// of calling `m.getPermutedPosition` repeatedly.		// of calling `m.getPermutedPosition` repeatedly.
SmallVector<unsigned> inv(sz);		SmallVector<unsigned> perm;
for (unsigned i = 0; i < sz; i++)		unsigned numResults = m.getNumResults();
inv[i] = m.getDimPosition(i);		BitVector worklist(numResults, true);
		unsigned loopDepth = 1;

// Construct the permutation.		// Construct the permutation.
SmallVector<unsigned> perm(sz);		while (worklist.any() && loopDepth <= topSort.size()) {
for (unsigned i = 0; i < sz; i++)		unsigned preSize = perm.size();
perm[i] = inv[topSort[i]];		for (auto dim : worklist.set_bits()) {
		bool atLevel = false;
		if (m.getResult(dim).isa<AffineConstantExpr>() \|\|
		(isInvariantAffine(m.getResult(dim), topSort.slice(0, loopDepth),
		topSort[loopDepth - 1], atLevel) &&
		atLevel)) {
		// If the matching affine is constant expression or just become
		// invariant. We can visit the dimension now without breaking the
		// topSort constraint.
		perm.push_back(dim);
		}
		}

		// Removes resolved dimension.
		for (unsigned i = preSize, e = perm.size(); i < e; i++)
		worklist.reset(perm[i]);

		// Tries to entering the next loop level.
		loopDepth += 1;
		}

		assert(perm.size() == numResults);
return AffineMap::getPermutationMap(perm, context);		return AffineMap::getPermutationMap(perm, context);
}		}

/// Helper method to inspect affine expressions. Rejects cases where the		/// Helper method to inspect affine expressions. Rejects cases where the
/// same index is used more than once. Also rejects compound affine		/// same index is used more than once. Also rejects compound affine
/// expressions in sparse dimensions.		/// expressions in sparse dimensions.
static bool findAffine(Merger &merger, unsigned tensor, unsigned dim,		static bool findAffine(Merger &merger, unsigned tensor, unsigned dim,
AffineExpr a, DimLevelType dlt,		AffineExpr a, DimLevelType dlt, unsigned &filterLdx,
bool setLvlFormat = true) {		bool setLvlFormat = true) {
switch (a.getKind()) {		switch (a.getKind()) {
case AffineExprKind::DimId: {		case AffineExprKind::DimId: {
unsigned idx = a.cast<AffineDimExpr>().getPosition();		unsigned idx = a.cast<AffineDimExpr>().getPosition();
if (!isUndefDLT(merger.getDimLevelType(tensor, idx)))		if (!isUndefDLT(merger.getDimLevelType(tensor, idx)))
return false; // used more than once		return false; // used more than once

if (setLvlFormat)		if (setLvlFormat)
merger.setDimAndDimLevelType(tensor, idx, dim, dlt);		merger.setDimAndDimLevelType(tensor, idx, dim, dlt);
return true;		return true;
}		}
case AffineExprKind::Add:		case AffineExprKind::Add:
case AffineExprKind::Mul: {		case AffineExprKind::Mul:
if (!isDenseDLT(dlt))		case AffineExprKind::Constant: {
return false; // compound only in dense dim		if (!isDenseDLT(dlt) && setLvlFormat) {
auto binOp = a.cast<AffineBinaryOpExpr>();		assert(isUndefDLT(merger.getDimLevelType(tensor, filterLdx)));
		// Use a filter loop for sparse affine expression.
		merger.setDimAndDimLevelType(tensor, filterLdx++, dim, dlt);
		}

		if (auto binOp = a.dyn_cast<AffineBinaryOpExpr>()) {
// We do not set dim level format for affine expresssion like d0 + d1 on		// We do not set dim level format for affine expresssion like d0 + d1 on
// both loop index at d0 and d1,		// either loop index at d0 or d1/
return findAffine(merger, tensor, dim, binOp.getLHS(), dlt, false) &&		// We continues the recursion merely to check whether current affine is
findAffine(merger, tensor, dim, binOp.getRHS(), dlt, false);		// admissible or not.
		return findAffine(merger, tensor, dim, binOp.getLHS(), dlt, filterLdx,
		false) &&
		findAffine(merger, tensor, dim, binOp.getRHS(), dlt, filterLdx,
		false);
		}
		// Falls through when it is a constant Affine
		return true;
}		}
case AffineExprKind::Constant:
return isDenseDLT(dlt); // const only in dense dim
default:		default:
return false;		return false;
}		}
}		}

		static unsigned getNumCompoundAffineOnSparseDims(AffineMap affineMap,
		Value tensor) {
		unsigned num = 0;
		auto enc = getSparseTensorEncoding(tensor.getType());
		if (enc) {
		ArrayRef<AffineExpr> exps = affineMap.getResults();
		for (unsigned rank = 0; rank < exps.size(); rank++) {
		auto aidx = toOrigDim(enc, rank);
		auto affine = exps[aidx];
		if (!affine.isa<AffineDimExpr>())
		if (!isDenseDLT(getDimLevelType(enc, rank)))
		num++;
		}
		}

		return num;
		}

		static unsigned getNumCompoundAffineOnSparseDims(linalg::GenericOp op) {
		unsigned num = 0;
		for (OpOperand &t : op->getOpOperands())
		num += getNumCompoundAffineOnSparseDims(op.getMatchingIndexingMap(&t),
		t.get());
		return num;
		}

/// Helper method to inspect sparse encodings in the tensor types.		/// Helper method to inspect sparse encodings in the tensor types.
/// Fills the per-dimension sparsity information for all tensors.		/// Fills the per-dimension sparsity information for all tensors.
/// Returns true if the sparse annotations and affine subscript		/// Returns true if the sparse annotations and affine subscript
/// expressions of all tensors are admissible. Returns false if		/// expressions of all tensors are admissible. Returns false if
/// no annotations are found or inadmissible constructs occur.		/// no annotations are found or inadmissible constructs occur.
static bool findSparseAnnotations(Merger &merger, linalg::GenericOp op) {		static bool findSparseAnnotations(Merger &merger, linalg::GenericOp op) {
bool annotated = false;		bool annotated = false;
		unsigned filterLdx = merger.getFilterLoopStartingIdx();
for (OpOperand &t : op->getOpOperands()) {		for (OpOperand &t : op->getOpOperands()) {
auto map = op.getMatchingIndexingMap(&t);		auto map = op.getMatchingIndexingMap(&t);
auto enc = getSparseTensorEncoding(t.get().getType());		auto enc = getSparseTensorEncoding(t.get().getType());
if (enc)		if (enc)
annotated = true;		annotated = true;
assert(map.getNumResults() == op.getRank(&t));		assert(map.getNumResults() == op.getRank(&t));

for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {		for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
unsigned tensor = t.getOperandNumber();		unsigned tensor = t.getOperandNumber();
AffineExpr a = map.getResult(toOrigDim(enc, d));		AffineExpr a = map.getResult(toOrigDim(enc, d));
if (!findAffine(merger, tensor, d, a, getDimLevelType(enc, d)))		if (!findAffine(merger, tensor, d, a, getDimLevelType(enc, d), filterLdx))
return false; // inadmissible affine expression		return false; // inadmissible affine expression
}		}
}		}
		assert(filterLdx == merger.getNumLoops());
return annotated;		return annotated;
}		}

/// A helper to compute a topological sort. O(n^2) time complexity		/// A helper to compute a topological sort. O(n^2) time complexity
/// as we use adj matrix for the graph.		/// as we use adj matrix for the graph.
/// The sorted result will put the first Reduction iterator to the		/// The sorted result will put the first Reduction iterator to the
/// latest possible index.		/// latest possible index.
static bool topSortOptimal(unsigned n,		static bool topSortOptimal(unsigned n,
ArrayRef<utils::IteratorType> iteratorTypes,		ArrayRef<utils::IteratorType> iteratorTypes,
std::vector<unsigned> &topSort,		const Merger &merger, std::vector<unsigned> &topSort,
std::vector<unsigned> &inDegree,		std::vector<unsigned> &inDegree,
std::vector<std::vector<bool>> &adjM) {		std::vector<std::vector<bool>> &adjM) {
std::vector<unsigned> redIt; // reduce iterator with 0 degree		std::vector<unsigned> redIt; // reduce iterator with 0 degree
std::vector<unsigned> parIt; // parallel iterator with 0 degree		std::vector<unsigned> parIt; // parallel iterator with 0 degree
		std::vector<unsigned> filterIt; // filter loop with 0 degree
for (unsigned i = 0; i < n; i++) {		for (unsigned i = 0; i < n; i++) {
if (inDegree[i] == 0) {		if (inDegree[i] == 0) {
if (linalg::isReductionIterator(iteratorTypes[i]))		if (merger.isFilterLoop(i))
		filterIt.push_back(i);
		else if (linalg::isReductionIterator(iteratorTypes[i]))
redIt.push_back(i);		redIt.push_back(i);
else		else
parIt.push_back(i);		parIt.push_back(i);
}		}
}		}

while (!redIt.empty() \|\| !parIt.empty()) {		while (!redIt.empty() \|\| !parIt.empty() \|\| !filterIt.empty()) {
// We always choose parallel iterator if there is any.		// We always choose in order of filter loop -> parallel loop -> reduction
auto &it = !parIt.empty() ? parIt : redIt;		// loop because
		// 1. Putting reduction loop early might make the loop sequence
		// inadmissible.
		// 2. Filter Loop should be put as early as possible for better performance,
		// since only one (if any) iteration will carry the computation. E.g.,
		// for (1 to N)
		// for (1 to M)
		// for (1 to K)
		// if (xxx)
		// O(X) computation => O(NMK+NMX) time complexity
		//
		// By putting the filter loop one level up, we got
		//
		// for (1 to N)
		// for (1 to K)
		// if (xxx)
		// for (1 to M)
		// O(X) computation => O(NK+NMX) time complexity
		auto &it = !filterIt.empty() ? filterIt : (!parIt.empty() ? parIt : redIt);
auto src = it.back();		auto src = it.back();
topSort.push_back(src);		topSort.push_back(src);
it.pop_back();		it.pop_back();
// Update in-degree, and push 0-degree node into worklist.		// Update in-degree, and push 0-degree node into worklist.
for (unsigned dst = 0; dst < n; dst++)		for (unsigned dst = 0; dst < n; dst++) {
if (adjM[src][dst] && --inDegree[dst] == 0) {		if (adjM[src][dst] && --inDegree[dst] == 0) {
if (linalg::isReductionIterator(iteratorTypes[dst]))		if (merger.isFilterLoop(dst))
		filterIt.push_back(dst);
		else if (linalg::isReductionIterator(iteratorTypes[dst]))
redIt.push_back(dst);		redIt.push_back(dst);
else		else
parIt.push_back(dst);		parIt.push_back(dst);
}		}
}		}
		}
return topSort.size() == n;		return topSort.size() == n;
}		}

/// Helper method to add all constraints from the indices in one affine		/// Helper method to add all constraints from the indices in one affine
/// expression before all indices in the other affine expression. For		/// expression before all indices in the other affine expression. For
/// example i0+i1 < i2+i3+1 yields i0<i2, i0<i3, i1<i2, and i1<i3.		/// example i0+i1 < i2+i3+1 yields i0<i2, i0<i3, i1<i2, and i1<i3.
static void addAffineOrderings(std::vector<std::vector<bool>> &adjM,		static void addAffineOrderings(std::vector<std::vector<bool>> &adjM,
std::vector<unsigned> &inDegree, AffineExpr a,		std::vector<unsigned> &inDegree, AffineExpr a,
AffineExpr b, unsigned fidx) {		AffineExpr b, Optional<unsigned> fidx,
switch (a.getKind()) {		Optional<unsigned> tidx) {
case AffineExprKind::DimId: {		if (!a && !b) {
unsigned idx = a.cast<AffineDimExpr>().getPosition();		// Recursion leaf.
if (b)		assert(fidx && tidx);
addAffineOrderings(adjM, inDegree, b, AffineExpr(), idx);		unsigned f = fidx, t = tidx;
else if (!adjM[fidx][idx]) {		if (!adjM[f][t]) {
adjM[fidx][idx] = true;		adjM[f][t] = true;
inDegree[idx]++;		inDegree[t]++;
}		}
		return;
		}
		auto toExpand = a ? a : b;
		switch (toExpand.getKind()) {
		case AffineExprKind::DimId: {
		auto idx = toExpand.cast<AffineDimExpr>().getPosition();
		if (toExpand == a)
		addAffineOrderings(adjM, inDegree, AffineExpr(), b, idx, tidx);
		else // toExpand == b
		addAffineOrderings(adjM, inDegree, a, AffineExpr(), fidx, idx);
break;		break;
}		}
case AffineExprKind::Add:		case AffineExprKind::Add:
case AffineExprKind::Mul: {		case AffineExprKind::Mul: {
auto binOp = a.cast<AffineBinaryOpExpr>();		auto binOp = toExpand.cast<AffineBinaryOpExpr>();
addAffineOrderings(adjM, inDegree, binOp.getLHS(), b, fidx);		if (toExpand == a) {
addAffineOrderings(adjM, inDegree, binOp.getRHS(), b, fidx);		addAffineOrderings(adjM, inDegree, binOp.getLHS(), b, fidx, tidx);
		addAffineOrderings(adjM, inDegree, binOp.getRHS(), b, fidx, tidx);
		} else {
		addAffineOrderings(adjM, inDegree, a, binOp.getLHS(), fidx, tidx);
		addAffineOrderings(adjM, inDegree, a, binOp.getRHS(), fidx, tidx);
		}
break;		break;
}		}
default:		default:
break;		break;
}		}
}		}

/// Computes a topologically sorted iteration graph for the linalg operation.		/// Computes a topologically sorted iteration graph for the linalg operation.
/// Ensures all tensors are visited in natural index order. This is essential		/// Ensures all tensors are visited in natural index order. This is essential
/// for sparse storage formats since these only support access along fixed		/// for sparse storage formats since these only support access along fixed
/// dimensions. Even for dense storage formats, however, the natural index		/// dimensions. Even for dense storage formats, however, the natural index
/// order yields innermost unit-stride access with better spatial locality.		/// order yields innermost unit-stride access with better spatial locality.
static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,		static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
std::vector<unsigned> &topSort, unsigned mask,		std::vector<unsigned> &topSort, unsigned mask,
OpOperand *skip = nullptr) {		OpOperand *skip = nullptr) {
// Set up an n x n from/to adjacency matrix of the iteration graph		// Set up an n x n from/to adjacency matrix of the iteration graph
// for the implicit loop indices i_0 .. i_n-1.		// for the implicit loop indices i_0 .. i_n-1.
unsigned n = op.getNumLoops();		unsigned n = merger.getNumLoops();
std::vector<std::vector<bool>> adjM(n, std::vector<bool>(n, false));		std::vector<std::vector<bool>> adjM(n, std::vector<bool>(n, false));
std::vector<unsigned> inDegree(n, 0); // in-degree of each node.		std::vector<unsigned> inDegree(n, 0); // in-degree of each node.
auto iteratorTypes = op.getIteratorTypesArray();		auto iteratorTypes = op.getIteratorTypesArray();
// Iterate over the indexing maps of every tensor in the tensor expression.		// Iterate over the indexing maps of every tensor in the tensor expression.
for (OpOperand &t : op->getOpOperands()) {		for (OpOperand &t : op->getOpOperands()) {
// Skip tensor during cycle resolution.
if (&t == skip)
continue;
// Get map and encoding.		// Get map and encoding.
auto map = op.getMatchingIndexingMap(&t);		auto map = op.getMatchingIndexingMap(&t);
auto enc = getSparseTensorEncoding(t.get().getType());		auto enc = getSparseTensorEncoding(t.get().getType());
assert(map.getNumDims() == n);		assert(map.getNumDims() + getNumCompoundAffineOnSparseDims(op) == n);
// Skip dense tensor constraints when not requested.		// Skip dense tensor constraints when not requested.
if (!(mask & SortMask::kIncludeDense) && !enc)		if (!(mask & SortMask::kIncludeDense) && !enc)
continue;		continue;
// Each tensor expression and optional dimension ordering (row-major		// Each tensor expression and optional dimension ordering (row-major
// by default) puts an ordering constraint on the loop indices. For		// by default) puts an ordering constraint on the loop indices. For
// example, the tensor expresion A_ijk forces the ordering i < j < k		// example, the tensor expresion A_ijk forces the ordering i < j < k
// on the loop indices if no explicit dimension ordering is given.		// on the loop indices if no explicit dimension ordering is given.
for (unsigned d = 1, rank = map.getNumResults(); d < rank; d++) {		for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
AffineExpr f = map.getResult(toOrigDim(enc, d - 1));		AffineExpr ta = map.getResult(toOrigDim(enc, d));
AffineExpr t = map.getResult(toOrigDim(enc, d));		Optional<unsigned> tldx = merger.getLoopIdx(t.getOperandNumber(), d);
addAffineOrderings(adjM, inDegree, f, t, 0);
		// Filter loops should be constructed after all the dependent loops,
		// i.e., d0 + d1 < filter_loop(d0 + d1)
		if (tldx && merger.isFilterLoop(tldx.value())) {
		assert(!ta.isa<AffineDimExpr>() &&
		!isDenseDLT(getDimLevelType(enc, d)));
		addAffineOrderings(adjM, inDegree, ta, AffineExpr(), llvm::None, tldx);
		// Now that the ordering of affine expression is captured by filter
		// loop idx, we only need to ensure the affine ordering against filter
		// loop. Thus, we reset the affine express to nil here to mark it as
		// resolved.
		ta = AffineExpr();
		}

		// Skip tensor during cycle resolution, though order between filter loop
		// and dependent loops need to be guaranteed unconditionally.
		if (&t == skip)
		continue;

		if (d > 0) {
		AffineExpr fa = map.getResult(toOrigDim(enc, d - 1));
		Optional<unsigned> fldx =
		merger.getLoopIdx(t.getOperandNumber(), d - 1);

		if (fldx && merger.isFilterLoop(fldx.value())) {
		// This must be a compound affine expression on sparse dimension.
		assert(!fa.isa<AffineDimExpr>() &&
		!isDenseDLT(getDimLevelType(enc, d - 1)));
		// For the same reason above.
		fa = AffineExpr();
		}

		if (!(mask & SortMask::kIncludeDense) && !tldx) {
		ParallelAffineDimFinder finder(op);
		// e.g, for [dense, dense] -> (d0 + d1, d2 + d3)
		// It is totally fine to have loop sequence d0->d2->d1->d3 instead of
		// requiring d0 < d2, d1 < d2, d0 < d3, d1 < d3.
		PeimingAuthorUnsubmitted Done Reply Inline Actions @aartbik We should have some discussion on this! Peiming: @aartbik We should have some discussion on this!
		PeimingAuthorUnsubmitted Done Reply Inline Actions The patch is updated per offline discussion (by only picking one AffineDimExpr from compound affines to establish the order instead of adding order constraints between every pair of their sub-AffineDimExprs) Peiming: The patch is updated per offline discussion (by only picking one AffineDimExpr from compound…
		// We use a heuristic here to only pick one dim expression from each
		// compound affine expression to establish the order between two dense
		// dimensions.
		// NOTE: The ordering can only be loosen when the destination level is
		// dense, for [dense, sparse] -> (d0 + d1, d2), we still require both
		// d0 < d2 and d1 < d2 to ensure correct ordering (i.e., no ordering
		// like d0->d2->d1).
		// TODO: this is obviously a sub optimal solution.
		if (!fldx && fa.isa<AffineConstantExpr>()) {
		assert(isDenseDLT(getDimLevelType(enc, d - 1)) &&
		!fa.isa<AffineDimExpr>());
		// Heuristic: we prefer parallel loop for lhs to reduce the chance
		// we add reduce < parallel ordering.
		finder.setPickedIterType(utils::IteratorType::parallel);
		finder.walkPostOrder(fa);
		fa = finder.getDimExpr();
		fldx = finder.getDimExpr().getPosition();
		}
		if (!ta.isa<AffineConstantExpr>()) {
		// Dense compound affine
		assert(isDenseDLT(getDimLevelType(enc, d)) &&
		!ta.isa<AffineDimExpr>());
		// Heuristic: we prefer reduction loop for rhs to reduce the chance
		// addint reduce < parallel ordering.
		finder.setPickedIterType(utils::IteratorType::reduction);
		finder.walkPostOrder(ta);
		ta = finder.getDimExpr();
		tldx = finder.getDimExpr().getPosition();
		}
		}

		// (d0 + d1) < (d2 + d3), or
		// filter_loop_d-1 < (d2 + d3), or
		// (d0 + d1) < filter_loop_d, or
		// filter_loop_d-1 < filter_loop_d depending on whether fa/ta is reset
		// above.
		addAffineOrderings(adjM, inDegree, fa, ta, fldx, tldx);
		}
}		}
// Push unrelated loops into sparse iteration space, so these		// Push unrelated loops into sparse iteration space, so these
// will be skipped more often.		// will be skipped more often.
if (mask & SortMask::kIncludeUndef) {		if (mask & SortMask::kIncludeUndef) {
unsigned tensor = t.getOperandNumber();		unsigned tensor = t.getOperandNumber();
for (unsigned i = 0; i < n; i++)		for (unsigned i = 0; i < n; i++)
if (isCompressedDLT(merger.getDimLevelType(tensor, i)) \|\|		if (isCompressedDLT(merger.getDimLevelType(tensor, i)) \|\|
isSingletonDLT(merger.getDimLevelType(tensor, i))) {		isSingletonDLT(merger.getDimLevelType(tensor, i))) {
for (unsigned j = 0; j < n; j++)		for (unsigned j = 0; j < n; j++)
if (isUndefDLT(merger.getDimLevelType(tensor, j))) {		if (isUndefDLT(merger.getDimLevelType(tensor, j))) {
adjM[i][j] = true;		adjM[i][j] = true;
inDegree[j]++;		inDegree[j]++;
}		}
} else {		} else {
assert(isDenseDLT(merger.getDimLevelType(tensor, i)) \|\|		assert(isDenseDLT(merger.getDimLevelType(tensor, i)) \|\|
isUndefDLT(merger.getDimLevelType(tensor, i)));		isUndefDLT(merger.getDimLevelType(tensor, i)));
}		}
}		}
}		}
// Topologically sort the iteration graph to determine loop order.		// Topologically sort the iteration graph to determine loop order.
// Report failure for a cyclic iteration graph.		// Report failure for a cyclic iteration graph.
topSort.clear();		topSort.clear();
topSort.reserve(n);		topSort.reserve(n);
return topSortOptimal(n, iteratorTypes, topSort, inDegree, adjM);		return topSortOptimal(n, iteratorTypes, merger, topSort, inDegree, adjM);
}		}

/// Returns true if tensor materializes uninitialized into the computation.		/// Returns true if tensor materializes uninitialized into the computation.
static bool isMaterializing(Value val) {		static bool isMaterializing(Value val) {
return val.getDefiningOp<tensor::EmptyOp>() \|\|		return val.getDefiningOp<tensor::EmptyOp>() \|\|
val.getDefiningOp<bufferization::AllocTensorOp>();		val.getDefiningOp<bufferization::AllocTensorOp>();
}		}

Show All 11 Lines	static bool isAdmissibleTensorExp(Merger &merger, linalg::GenericOp op,
auto enc = getSparseTensorEncoding(lhs->get().getType());		auto enc = getSparseTensorEncoding(lhs->get().getType());
// An non-annotated output tensor is assumed dense, and becomes a random		// An non-annotated output tensor is assumed dense, and becomes a random
// access n-dim memref. Admissible since insertions cannot occur.		// access n-dim memref. Admissible since insertions cannot occur.
if (!enc)		if (!enc)
return true;		return true;
// An all-dense annotated "sparse" output tensor becomes a linearized random		// An all-dense annotated "sparse" output tensor becomes a linearized random
// access 1-dim memref. Also admissible since insertions cannot occur.		// access 1-dim memref. Also admissible since insertions cannot occur.
bool allDense = true;		bool allDense = true;
auto iteratorTypes = op.getIteratorTypesArray();		unsigned numLoops = merger.getNumLoops(); // numNativeLoops + numFilterLoops
unsigned numLoops = iteratorTypes.size();		for (unsigned i = 0; i < merger.getNumLoops(); i++)
for (unsigned i = 0; i < numLoops; i++)
if (isCompressedDLT(merger.getDimLevelType(tensor, i)) \|\|		if (isCompressedDLT(merger.getDimLevelType(tensor, i)) \|\|
isSingletonDLT(merger.getDimLevelType(tensor, i))) {		isSingletonDLT(merger.getDimLevelType(tensor, i))) {
allDense = false;		allDense = false;
break;		break;
} else {		} else {
assert(isDenseDLT(merger.getDimLevelType(tensor, i)) \|\|		assert(isDenseDLT(merger.getDimLevelType(tensor, i)) \|\|
isUndefDLT(merger.getDimLevelType(tensor, i)));		isUndefDLT(merger.getDimLevelType(tensor, i)));
}		}
if (allDense)		if (allDense)
return true;		return true;

		// TODO: support compound affine expression on sparse output.
		if (getNumCompoundAffineOnSparseDims(op.getMatchingIndexingMap(lhs),
		lhs->get()) != 0)
		return false;

// A tensor expression with a sparse output tensor that changes its values		// A tensor expression with a sparse output tensor that changes its values
// but not its nonzero structure, an operation called "simply dynamic" in		// but not its nonzero structure, an operation called "simply dynamic" in
// [Bik96,Ch9], is also admissible without special codegen.		// [Bik96,Ch9], is also admissible without special codegen.
if (merger.isSingleCondition(tensor, exp))		if (merger.isSingleCondition(tensor, exp))
return true;		return true;

// Accept "truly dynamic" if the output tensor materializes uninitialized		// Accept "truly dynamic" if the output tensor materializes uninitialized
// into the computation and insertions occur in lexicographic index order.		// into the computation and insertions occur in lexicographic index order.
if (isMaterializing(lhs->get())) {		if (isMaterializing(lhs->get())) {
		auto iteratorTypes = op.getIteratorTypesArray();
unsigned nest = 0;		unsigned nest = 0;
for (unsigned i = 0; i < numLoops; i++) {		for (unsigned i = 0; i < numLoops; i++) {
		if (!merger.isFilterLoop(topSort[i])) {
		// We only count non-filter loops as filter loops should be considered
		// as a special type of parallel loops.
if (linalg::isReductionIterator(iteratorTypes[topSort[i]]))		if (linalg::isReductionIterator(iteratorTypes[topSort[i]]))
break; // terminate at first reduction		break; // terminate at first reduction
nest++;		nest++;
}		}
		}
// Determine admissible dynamic insertion situations:		// Determine admissible dynamic insertion situations:
// (1) fully injective, since there are no reductions,		// (1) fully injective, since there are no reductions,
// (2) admissible 1-d expansion in innermost dimension.		// (2) admissible 1-d expansion in innermost dimension.
if (nest >= op.getRank(lhs) - 1) {		if (nest >= op.getRank(lhs) - 1) {
*sparseOut = lhs;		*sparseOut = lhs;
outerParNest = nest;		outerParNest = nest;
return true;		return true;
}		}
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder,		static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder,
linalg::GenericOp op) {		linalg::GenericOp op) {
Location loc = op.getLoc();		Location loc = op.getLoc();
assert(op.getNumOperands() == op.getNumDpsInputs() + 1);		assert(op.getNumOperands() == op.getNumDpsInputs() + 1);

codegen.loopEmitter.initializeLoopEmit(		codegen.loopEmitter.initializeLoopEmit(
builder, loc,		builder, loc,
/// Generates buffer for the output tensor.		/// Generates buffer for the output tensor.
/// Note that all sparse kernels assume that when all elements are written		/// Note that all sparse kernels assume that when all elements are
/// to (viz. x(i) = y(i) * z(i)), the output buffer is already initialized		/// written to (viz. x(i) = y(i) * z(i)), the output buffer is already
/// to all zeroes and only nonzeroes values are computed and written out.		/// initialized to all zeroes and only nonzeroes values are computed and
/// For updates (viz. x(i) += y(i) * z(i)), only nonzeroes values are used		/// written out. For updates (viz. x(i) += y(i) * z(i)), only nonzeroes
/// for the updates and no assumption on the original contents of the		/// values are used for the updates and no assumption on the original
/// output buffer is necessary.		/// contents of the output buffer is necessary.
[&op](OpBuilder &builder, Location loc, Value memref,		[&op](OpBuilder &builder, Location loc, Value memref,
Value tensor) -> Value {		Value tensor) -> Value {
// Must not be a sparse tensor.		// Must not be a sparse tensor.
assert(!getSparseTensorEncoding(tensor.getType()));		assert(!getSparseTensorEncoding(tensor.getType()));
OpOperand *lhs = op.getDpsInitOperand(0);		OpOperand *lhs = op.getDpsInitOperand(0);
// Two output tensors references should pointed to the same object.		// Two output tensors references should pointed to the same object.
assert(lhs->get() == tensor);		assert(lhs->get() == tensor);
bool isInit = op.isInitTensor(lhs);		bool isInit = op.isInitTensor(lhs);
// An output tensor can simply materialize from the buffer of the tensor		// An output tensor can simply materialize from the buffer of the
// that appears in the outs() clause. For updates, this has the		// tensor that appears in the outs() clause. For updates, this has the
// advantage that only the nonzero value are involved in the		// advantage that only the nonzero value are involved in the
// computation, keeping the operation O(nnz). In all other cases, we are		// computation, keeping the operation O(nnz). In all other cases, we
// forced to zero out the buffer to enforce the assumption above, which		// are forced to zero out the buffer to enforce the assumption above,
// may negatively impact running complexity (viz. O(n^2 + nnz) vs.		// which may negatively impact running complexity (viz. O(n^2 + nnz)
// O(nnz) for matrices).		// vs. O(nnz) for matrices).
// TODO: use better analysis to avoid zeroing out the buffer?		// TODO: use better analysis to avoid zeroing out the buffer?
Value init = memref;		Value init = memref;
if (!isInit) {		if (!isInit) {
Value zero = constantZero(builder, loc,		Value zero = constantZero(builder, loc,
getElementTypeOrSelf(tensor.getType()));		getElementTypeOrSelf(tensor.getType()));
builder.create<linalg::FillOp>(loc, ValueRange{zero},		builder.create<linalg::FillOp>(loc, ValueRange{zero},
ValueRange{init});		ValueRange{init});
}		}
return init;		return init;
});		});
}		}

/// Generates an affine expression.
//
// TODO: generalize for sparse tensor subscripts
//
static Value genAffine(CodeGen &codegen, OpBuilder &builder, AffineExpr a,
Location loc) {
switch (a.getKind()) {
case AffineExprKind::DimId: {
unsigned idx = a.cast<AffineDimExpr>().getPosition();
return codegen.getLoopIdxValue(idx); // universal dense index
}
case AffineExprKind::Add: {
auto binOp = a.cast<AffineBinaryOpExpr>();
return builder.create<arith::AddIOp>(
loc, genAffine(codegen, builder, binOp.getLHS(), loc),
genAffine(codegen, builder, binOp.getRHS(), loc));
}
case AffineExprKind::Mul: {
auto binOp = a.cast<AffineBinaryOpExpr>();
return builder.create<arith::MulIOp>(
loc, genAffine(codegen, builder, binOp.getLHS(), loc),
genAffine(codegen, builder, binOp.getRHS(), loc));
}
case AffineExprKind::Constant: {
int64_t c = a.cast<AffineConstantExpr>().getValue();
return constantIndex(builder, loc, c);
}
default:
llvm_unreachable("unexpected affine subscript");
}
}

/// Generates index for load/store on sparse tensor.		/// Generates index for load/store on sparse tensor.
static Value genIndex(CodeGen &codegen, linalg::GenericOp op, OpOperand *t) {		static Value genIndex(CodeGen &codegen, linalg::GenericOp op, OpOperand *t) {
auto map = op.getMatchingIndexingMap(t);		auto map = op.getMatchingIndexingMap(t);
auto enc = getSparseTensorEncoding(t->get().getType());		auto enc = getSparseTensorEncoding(t->get().getType());
AffineExpr a = map.getResult(toOrigDim(enc, map.getNumResults() - 1));		AffineExpr a = map.getResult(toOrigDim(enc, map.getNumResults() - 1));
assert(a.getKind() == AffineExprKind::DimId);		assert(a.getKind() == AffineExprKind::DimId);
unsigned idx = a.cast<AffineDimExpr>().getPosition();		unsigned idx = a.cast<AffineDimExpr>().getPosition();
return codegen.getLoopIdxValue(idx);		return codegen.getLoopIdxValue(idx);
}		}

/// Generates subscript for load/store on a dense or sparse tensor.		/// Generates subscript for load/store on a dense or sparse tensor.
static Value genSubscript(CodeGen &codegen, OpBuilder &builder,		static Value genSubscript(CodeGen &codegen, OpBuilder &builder,
linalg::GenericOp op, OpOperand *t,		linalg::GenericOp op, OpOperand *t,
SmallVectorImpl<Value> &args) {		SmallVectorImpl<Value> &args) {
unsigned tensor = t->getOperandNumber();		unsigned tensor = t->getOperandNumber();
auto map = op.getMatchingIndexingMap(t);		auto map = op.getMatchingIndexingMap(t);
auto enc = getSparseTensorEncoding(t->get().getType());		auto enc = getSparseTensorEncoding(t->get().getType());
unsigned rank = map.getNumResults();		unsigned rank = map.getNumResults();
if (enc) {		if (enc) {
// Note that currently, all sparse subscripts are simple.		// Note that currently, all sparse subscripts are simple.
// TODO: accept affine too?
assert(map.getResult(toOrigDim(enc, rank - 1)).getKind() ==
AffineExprKind::DimId);
Value pidx = codegen.loopEmitter.getPidxs()[tensor].back();		Value pidx = codegen.loopEmitter.getPidxs()[tensor].back();
assert(pidx);		assert(pidx);
args.push_back(pidx); // position index		args.push_back(pidx); // position index
} else {		} else {
for (unsigned d = 0; d < rank; d++) {		for (unsigned d = 0; d < rank; d++) {
AffineExpr a = map.getResult(d);		AffineExpr a = map.getResult(d);
args.push_back(genAffine(codegen, builder, a, op.getLoc()));		args.push_back(codegen.loopEmitter.genAffine(builder, a, op.getLoc()));
}		}
}		}
return codegen.loopEmitter.getValBuffer()[tensor];		return codegen.loopEmitter.getValBuffer()[tensor];
}		}

/// Generates insertion code to implement dynamic tensor load.		/// Generates insertion code to implement dynamic tensor load.
static Value genInsertionLoad(CodeGen &codegen, OpBuilder &builder,		static Value genInsertionLoad(CodeGen &codegen, OpBuilder &builder,
linalg::GenericOp op, OpOperand *t) {		linalg::GenericOp op, OpOperand *t) {
▲ Show 20 Lines • Show All 111 Lines • ▼ Show 20 Lines	if (t == codegen.sparseOut) {
if (!rhs) {		if (!rhs) {
// Only unary and binary are allowed to return uninitialized rhs		// Only unary and binary are allowed to return uninitialized rhs
// to indicate missing output.		// to indicate missing output.
assert(merger.exp(exp).kind == kUnary \|\| merger.exp(exp).kind == kBinary);		assert(merger.exp(exp).kind == kUnary \|\| merger.exp(exp).kind == kBinary);
} else if (merger.exp(exp).kind == kSelect) {		} else if (merger.exp(exp).kind == kSelect) {
// Select operation insertion.		// Select operation insertion.
Value insChain = codegen.insChain;		Value insChain = codegen.insChain;
assert(insChain);		assert(insChain);
scf::IfOp ifOp = builder.create<scf::IfOp>(		scf::IfOp ifOp = builder.create<scf::IfOp>(loc, insChain.getType(), rhs,
loc, insChain.getType(), rhs, /else=/true);		/else=/true);
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());		builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
// Existing value was preserved to be used here.		// Existing value was preserved to be used here.
assert(merger.exp(exp).val);		assert(merger.exp(exp).val);
Value v0 = merger.exp(exp).val;		Value v0 = merger.exp(exp).val;
genInsertionStore(codegen, builder, op, t, v0);		genInsertionStore(codegen, builder, op, t, v0);
merger.exp(exp).val = Value();		merger.exp(exp).val = Value();
// Yield modified insertion chain along true branch.		// Yield modified insertion chain along true branch.
builder.create<scf::YieldOp>(op.getLoc(), codegen.insChain);		builder.create<scf::YieldOp>(op.getLoc(), codegen.insChain);
▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines	static Value genExp(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
if (merger.exp(exp).kind == Kind::kReduce) {		if (merger.exp(exp).kind == Kind::kReduce) {
assert(codegen.redCustom != -1u);		assert(codegen.redCustom != -1u);
codegen.redCustom = -1u;		codegen.redCustom = -1u;
}		}

return ee;		return ee;
}		}

/// Determines if affine expression is invariant.
static bool isInvariantAffine(const CodeGen &codegen, AffineExpr a,
unsigned ldx, bool &atLevel) {
switch (a.getKind()) {
case AffineExprKind::DimId: {
unsigned idx = a.cast<AffineDimExpr>().getPosition();
if (idx == ldx)
atLevel = true;
return codegen.getLoopIdxValue(idx) != nullptr; // no longer in play?
}
case AffineExprKind::Add:
case AffineExprKind::Mul: {
auto binOp = a.cast<AffineBinaryOpExpr>();
return isInvariantAffine(codegen, binOp.getLHS(), ldx, atLevel) &&
isInvariantAffine(codegen, binOp.getRHS(), ldx, atLevel);
}
default:
return true;
}
}

/// Hoists loop invariant tensor loads for which indices have been exhausted.		/// Hoists loop invariant tensor loads for which indices have been exhausted.
static void genInvariants(Merger &merger, CodeGen &codegen, OpBuilder &builder,		static void genInvariants(Merger &merger, CodeGen &codegen, OpBuilder &builder,
linalg::GenericOp op, unsigned exp, unsigned ldx,		linalg::GenericOp op, unsigned exp, unsigned ldx,
bool atStart, unsigned last = -1u) {		bool atStart, unsigned last = -1u) {
if (exp == -1u)		if (exp == -1u)
return;		return;
if (merger.exp(exp).kind == Kind::kTensor) {		if (merger.exp(exp).kind == Kind::kTensor) {
// Inspect tensor indices.		// Inspect tensor indices.
bool atLevel = ldx == -1u;		bool atLevel = ldx == -1u;
OpOperand &t = op->getOpOperand(merger.exp(exp).tensor);		OpOperand &t = op->getOpOperand(merger.exp(exp).tensor);
auto map = op.getMatchingIndexingMap(&t);		auto map = op.getMatchingIndexingMap(&t);
auto enc = getSparseTensorEncoding(t.get().getType());		auto enc = getSparseTensorEncoding(t.get().getType());
for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {		for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
AffineExpr a = map.getResult(toOrigDim(enc, d));		AffineExpr a = map.getResult(toOrigDim(enc, d));
if (!isInvariantAffine(codegen, a, ldx, atLevel))		Optional<unsigned> sldx = merger.getLoopIdx(t.getOperandNumber(), d);
		if (sldx && merger.isFilterLoop(sldx.value())) {
		if (!codegen.getLoopIdxValue(sldx.value()))
		// The filter loops has not been constructed.
		return;
		if (sldx.value() == ldx)
		atLevel = true;
		} else if (!isInvariantAffine(codegen, a, ldx, atLevel))
return; // still in play		return; // still in play
}		}
// All exhausted at this level (atLevel denotes exactly at this level).		// All exhausted at this level (atLevel denotes exactly at this level).
if (!atLevel)		if (!atLevel)
return;		return;
OpOperand *lhs = op.getDpsInitOperand(0);		OpOperand *lhs = op.getDpsInitOperand(0);
if (lhs == &t) {		if (lhs == &t) {
// Start or end a scalarized reduction		// Start or end a scalarized reduction
▲ Show 20 Lines • Show All 102 Lines • ▼ Show 20 Lines

/// Generates a for-loop on a single index.		/// Generates a for-loop on a single index.
static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder,		static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder,
linalg::GenericOp op, bool isOuter, bool isInner,		linalg::GenericOp op, bool isOuter, bool isInner,
unsigned idx, size_t tid, size_t dim,		unsigned idx, size_t tid, size_t dim,
ArrayRef<size_t> extraTids,		ArrayRef<size_t> extraTids,
ArrayRef<size_t> extraDims) {		ArrayRef<size_t> extraDims) {
Location loc = op.getLoc();		Location loc = op.getLoc();
auto iteratorTypes = op.getIteratorTypesArray();
bool isSparse = isCompressedDLT(merger.getDimLevelType(tid, idx)) \|\|		bool isSparse = isCompressedDLT(merger.getDimLevelType(tid, idx)) \|\|
isSingletonDLT(merger.getDimLevelType(tid, idx));		isSingletonDLT(merger.getDimLevelType(tid, idx));
bool isParallel = isParallelFor(codegen, isOuter, isSparse);		bool isParallel = isParallelFor(codegen, isOuter, isSparse);

Operation *loop =		Operation *loop =
genLoopBoundary(codegen, merger, [&](MutableArrayRef<Value> reduc) {		genLoopBoundary(codegen, merger, [&](MutableArrayRef<Value> reduc) {
		if (merger.isFilterLoop(idx)) {
		assert(isSparse);
		OpOperand *t = &op->getOpOperand(tid);
		auto enc = getSparseTensorEncoding(t->get().getType());
		// Retrieves the affine expression for the filter loop.
		AffineExpr a =
		op.getMatchingIndexingMap(t).getResult(toOrigDim(enc, dim));
		return codegen.loopEmitter.enterFilterLoopOverTensorAtDim(
		builder, loc, tid, dim, a, reduc);
		}
return codegen.loopEmitter.enterLoopOverTensorAtDim(		return codegen.loopEmitter.enterLoopOverTensorAtDim(
builder, loc, tid, dim, reduc, isParallel, extraTids, extraDims);		builder, loc, tid, dim, reduc, isParallel, extraTids, extraDims);
}).value();		}).value();
assert(loop);		assert(loop);
return loop;		return loop;
}		}

/// Emit a while-loop for co-iteration over multiple indices.		/// Emit a while-loop for co-iteration over multiple indices.
▲ Show 20 Lines • Show All 169 Lines • ▼ Show 20 Lines	for (unsigned i = 1; i < lsize; i++) {
unsigned li = merger.set(lts)[i];		unsigned li = merger.set(lts)[i];
if (!merger.hasAnySparse(merger.lat(li).simple))		if (!merger.hasAnySparse(merger.lat(li).simple))
return true;		return true;
}		}
}		}
return false;		return false;
}		}

static void translateBitsToTidDimPairs(Merger &merger, CodeGen &codegen,		static void translateBitsToTidDimPairs(
unsigned li, unsigned idx,		Merger &merger, CodeGen &codegen, linalg::GenericOp op, unsigned li,
SmallVectorImpl<size_t> &condTids,		unsigned idx, SmallVectorImpl<size_t> &condTids,
SmallVectorImpl<size_t> &condDims,		SmallVectorImpl<size_t> &condDims, SmallVectorImpl<size_t> &extraTids,
SmallVectorImpl<size_t> &extraTids,		SmallVectorImpl<size_t> &extraDims, SmallVectorImpl<size_t> &affineTids,
SmallVectorImpl<size_t> &extraDims) {		SmallVectorImpl<size_t> &affineDims, SmallVectorImpl<AffineExpr> &exps) {

const BitVector &all = merger.lat(li).bits;		const BitVector &all = merger.lat(li).bits;
const BitVector &simple = merger.lat(li).simple;		const BitVector &simple = merger.lat(li).simple;

// Converts bits to array + dim pair		// Converts bits to array + dim pair
merger.foreachTidDimPairInBits(all, [&, idx](unsigned b, unsigned tid,		merger.foreachTidDimPairInBits(all, [&, idx](unsigned b, unsigned tid,
Optional<unsigned> dim,		Optional<unsigned> dim,
DimLevelType dlt) {		DimLevelType dlt) {
if (simple.test(b)) {		if (simple.test(b)) {
Show All 11 Lines	if (simple.test(b)) {
return;		return;
}		}
condTids.push_back(tid);		condTids.push_back(tid);
condDims.push_back(dim.value());		condDims.push_back(dim.value());
} else if (isDenseDLT(dlt)) {		} else if (isDenseDLT(dlt)) {
// TODO: get rid of extraTids and extraDims.		// TODO: get rid of extraTids and extraDims.
extraTids.push_back(tid);		extraTids.push_back(tid);
extraDims.push_back(dim.value());		extraDims.push_back(dim.value());
		} else {
		assert(isUndefDLT(dlt));
		if (tid >= op.getNumDpsInputs())
		// We only handle affine expression on input tensors (for now).
		return;
		OpOperand *operand = &op->getOpOperand(tid);
		auto enc = getSparseTensorEncoding(operand->get().getType());
		if (!enc)
		// Non-annotated dense tensors requires no special handling.
		return;

		ArrayRef<AffineExpr> affines =
		op.getMatchingIndexingMap(operand).getResults();
		assert(affines.size() == enc.getDimLevelType().size());
		for (unsigned i = 0, e = affines.size(); i < e; i++) {
		AffineExpr exp = affines[toOrigDim(enc, i)];
		if (exp.isa<AffineDimExpr>() \|\| !isDenseDLT(getDimLevelType(enc, i)))
		// Skip simple affine expression and non dense dimensions (which has
		// it own filter loop).
		continue;

		// Constant affine expressions on dense level required to be generated
		// when
		// 1. The previous level is an (at-level) invariant compound dense
		// affine (with no corresponding loop idx); or
		// 2. The previous level is being generated right now.
		if (exp.isa<AffineConstantExpr>()) {
		// TODO: Should we come up with a more adhersive way to handle
		// constant expression? We now requires two (somehow ad-hoc) code for
		// it.
		if (i != 0 && // i == 0 cases are handled in genConstantDenseAddress
		((!affineTids.empty() && affineTids.back() == tid &&
		affineDims.back() == i - 1) \|\| // Condition 1
		merger.getLoopIdx(tid, i - 1) == idx)) { // Condition 2
		affineTids.push_back(tid);
		affineDims.push_back(i);
		exps.push_back(exp);
		}
		} else {
		bool atLevel = false;
		if (isInvariantAffine(codegen, exp, idx, atLevel) && atLevel) {
		// If the compound affine is invariant and we are right at the
		// level. We need to generate the address according to the affine
		// expression. This is also the best place we can do it to avoid
		// putting it inside inner loops.
		// NOTE: It assumes that the levels of the input tensor are
		// initialized in order, another more admissible approach might be
		// accepting out-of-order access between consecutive dense levels.
		affineTids.push_back(tid);
		affineDims.push_back(i);
		exps.push_back(exp);
		}
		}
		}
}		}
});		});

if (isDenseDLT(merger.getDimLevelType(merger.getOutTensorID(), idx))) {		if (isDenseDLT(merger.getDimLevelType(merger.getOutTensorID(), idx))) {
// Note that we generate dense indices of the output tensor		// Note that we generate dense indices of the output tensor
// unconditionally, since they may not appear in the lattice, but may be		// unconditionally, since they may not appear in the lattice, but may be
// needed for linearized codegen.		// needed for linearized codegen.
// Only dense dimensions should be optimized from conditions.
auto dim = merger.getDimNum(merger.getOutTensorID(), idx).value();		auto dim = merger.getDimNum(merger.getOutTensorID(), idx).value();
extraTids.push_back(merger.getOutTensorID());		extraTids.push_back(merger.getOutTensorID());
extraDims.push_back(dim);		extraDims.push_back(dim);
}		}
}		}

/// Starts a single loop in current sequence.		/// Starts a single loop in current sequence.
static Operation *startLoop(Merger &merger, CodeGen &codegen,		static Operation *startLoop(Merger &merger, CodeGen &codegen,
OpBuilder &builder, linalg::GenericOp op,		OpBuilder &builder, linalg::GenericOp op,
unsigned at, unsigned li, bool needsUniv) {		unsigned at, unsigned li, bool needsUniv) {
// The set of tensors + dims to generate loops on		// The set of tensors + dims to generate loops on
SmallVector<size_t> condTids, condDims;		SmallVector<size_t> condTids, condDims;
// The set of (dense) tensors that is optimized from condition, yet still		// The set of (dense) tensors that is optimized from condition, yet still
// need extra locals to iterate on them.		// need extra locals to iterate on them.
SmallVector<size_t> extraTids, extraDims;		SmallVector<size_t> extraTids, extraDims;
		// The set of dense tensors with non-trivial affine expression that just
translateBitsToTidDimPairs(merger, codegen, li, codegen.topSort[at], condTids,		// becomes invariant and the address shall now be generated at the current
condDims, extraTids, extraDims);		// level.
		SmallVector<size_t> affineTids, affineDims;
		SmallVector<AffineExpr> affines;

		translateBitsToTidDimPairs(merger, codegen, op, li, codegen.topSort[at],
		condTids, condDims, extraTids, extraDims,
		affineTids, affineDims, affines);
// Emit the for/while-loop control.		// Emit the for/while-loop control.
Operation *loop = genLoop(merger, codegen, builder, op, at, needsUniv,		Operation *loop = genLoop(merger, codegen, builder, op, at, needsUniv,
condTids, condDims, extraTids, extraDims);		condTids, condDims, extraTids, extraDims);

		for (auto [tid, dim, exp] : llvm::zip(affineTids, affineDims, affines)) {
		codegen.loopEmitter.genDenseAffineAddressAtCurLevel(builder, op.getLoc(),
		tid, dim, exp);
		}
return loop;		return loop;
}		}

/// Ends a single loop in current sequence. Returns new values for needsUniv.		/// Ends a single loop in current sequence. Returns new values for needsUniv.
static bool endLoop(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,		static bool endLoop(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
linalg::GenericOp op, Operation *loop, unsigned idx,		linalg::GenericOp op, Operation *loop, unsigned idx,
unsigned li, bool needsUniv) {		unsigned li, bool needsUniv) {
// End a while-loop.		// End a while-loop.
Show All 19 Lines	static void endLoopSeq(Merger &merger, CodeGen &codegen, OpBuilder &builder,
assert(codegen.getLoopIdxValue(idx) == nullptr);		assert(codegen.getLoopIdxValue(idx) == nullptr);
codegen.loopEmitter.exitCurrentLoopSeq();		codegen.loopEmitter.exitCurrentLoopSeq();
// Unmark bookkeeping of invariants and loop index.		// Unmark bookkeeping of invariants and loop index.
genInvariants(merger, codegen, builder, op, exp, ldx, /atStart=/false);		genInvariants(merger, codegen, builder, op, exp, ldx, /atStart=/false);
// Finalize access pattern expansion for sparse tensor output.		// Finalize access pattern expansion for sparse tensor output.
genExpansion(merger, codegen, builder, op, at, /atStart=/false);		genExpansion(merger, codegen, builder, op, at, /atStart=/false);
}		}

		static void genConstantDenseAddress(CodeGen &codegen, RewriterBase &rewriter,
		linalg::GenericOp op) {
		// We can generates address for constant affine expression before any loops
		// starting from the first level as they do not depend on any thing.
		// E.g., [Dense, Dense, Sparse] -> (1, 2, d0), the addresses for the first two
		// levels can be determined before loops.
		for (OpOperand *input : op.getDpsInputOperands()) {
		ArrayRef<AffineExpr> affines =
		op.getMatchingIndexingMap(input).getResults();
		auto enc = getSparseTensorEncoding(input->get().getType());
		if (enc) {
		for (unsigned i = 0, e = affines.size(); i < e; i++) {
		AffineExpr affine = affines[toOrigDim(enc, i)];
		if (isDenseDLT(getDimLevelType(enc, i)) &&
		affine.isa<AffineConstantExpr>()) {
		codegen.loopEmitter.genDenseAffineAddressAtCurLevel(
		rewriter, op.getLoc(), input->getOperandNumber(), i, affine);
		} else {
		// Breaks on first non-dense non-constant level.
		break;
		}
		}
		}
		}
		}

/// Recursively generates code while computing iteration lattices in order		/// Recursively generates code while computing iteration lattices in order
/// to manage the complexity of implementing co-iteration over unions		/// to manage the complexity of implementing co-iteration over unions
/// and intersections of sparse iterations spaces.		/// and intersections of sparse iterations spaces.
static void genStmt(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,		static void genStmt(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
linalg::GenericOp op, unsigned exp, unsigned at) {		linalg::GenericOp op, unsigned exp, unsigned at) {
// At each leaf, assign remaining tensor (sub)expression to output tensor.		// At each leaf, assign remaining tensor (sub)expression to output tensor.
if (at == codegen.topSort.size()) {		if (at == codegen.topSort.size()) {
unsigned ldx = codegen.topSort[at - 1];		unsigned ldx = codegen.topSort[at - 1];
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines	static void genResult(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
}		}
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Sparse compiler rewriting methods.		// Sparse compiler rewriting methods.
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

namespace {		namespace {

/// Sparse rewriting rule for generic Lingalg operation.		/// Sparse rewriting rule for generic Lingalg operation.
struct GenericOpSparsifier : public OpRewritePattern<linalg::GenericOp> {		struct GenericOpSparsifier : public OpRewritePattern<linalg::GenericOp> {
public:		public:
GenericOpSparsifier(MLIRContext *context, SparsificationOptions o)		GenericOpSparsifier(MLIRContext *context, SparsificationOptions o)
: OpRewritePattern<linalg::GenericOp>(context), options(o) {}		: OpRewritePattern<linalg::GenericOp>(context), options(o) {}

LogicalResult matchAndRewrite(linalg::GenericOp op,		LogicalResult matchAndRewrite(linalg::GenericOp op,
PatternRewriter &rewriter) const override {		PatternRewriter &rewriter) const override {
// Detects sparse annotations and translate the per-dimension sparsity		// Detects sparse annotations and translate the per-dimension sparsity
// information for all tensors to loop indices in the kernel.		// information for all tensors to loop indices in the kernel.
if (op.getNumDpsInits() != 1)		if (op.getNumDpsInits() != 1)
return failure();		return failure();
unsigned numTensors = op->getNumOperands();		unsigned numTensors = op->getNumOperands();
unsigned numLoops = op.getNumLoops();		unsigned numLoops = op.getNumLoops();
Merger merger(numTensors, numLoops);		unsigned numFilterLoops = getNumCompoundAffineOnSparseDims(op);
		Merger merger(numTensors, numLoops, numFilterLoops);
if (!findSparseAnnotations(merger, op))		if (!findSparseAnnotations(merger, op))
return failure();		return failure();

// Builds the tensor expression for the Linalg operation in SSA form.		// Builds the tensor expression for the Linalg operation in SSA form.
Optional<unsigned> optExp = merger.buildTensorExpFromLinalg(op);		Optional<unsigned> optExp = merger.buildTensorExpFromLinalg(op);
if (!optExp.has_value())		if (!optExp.has_value())
return failure();		return failure();

unsigned exp = optExp.value();		unsigned exp = optExp.value();
OpOperand *sparseOut = nullptr;		OpOperand *sparseOut = nullptr;
unsigned outerParNest = 0;		unsigned outerParNest = 0;
// Computes a topologically sorted iteration graph to ensure tensors		// Computes a topologically sorted iteration graph to ensure tensors
// are visited in natural index order. Gradually relaxes the considered		// are visited in natural index order. Gradually relaxes the considered
// constraints until an acyclic iteration graph results, such that sparse		// constraints until an acyclic iteration graph results, such that
// code generation can proceed. As a last resort, an attempt is made		// sparse code generation can proceed. As a last resort, an attempt is
// to resolve cycles by inserting a conversion.		// made to resolve cycles by inserting a conversion.
std::vector<unsigned> topSort;		std::vector<unsigned> topSort;
// Whether the current GenericOp is admissible.		// Whether the current GenericOp is admissible.
bool isAdmissible = false;		bool isAdmissible = false;
bool hasCycle = true;		bool hasCycle = true;
// An const list of all masks that we used for interation graph		// An const list of all masks that we used for interation graph
// computation. Must be ordered from strict -> loose.		// computation. Must be ordered from strict -> loose.
const auto allMask = {SortMask::kIncludeAll, SortMask::kIncludeUndef,		const auto allMask = {SortMask::kIncludeAll, SortMask::kIncludeUndef,
SortMask::kIncludeDense, SortMask::kSparseOnly};		SortMask::kIncludeDense, SortMask::kSparseOnly};
Show All 20 Lines	LogicalResult matchAndRewrite(linalg::GenericOp op,
SmallVector<Value> tensors;		SmallVector<Value> tensors;
for (OpOperand &t : op->getOpOperands())		for (OpOperand &t : op->getOpOperands())
tensors.push_back(t.get());		tensors.push_back(t.get());

// Recursively generates code if admissible.		// Recursively generates code if admissible.
CodeGen codegen(options, tensors, numTensors, numLoops, sparseOut,		CodeGen codegen(options, tensors, numTensors, numLoops, sparseOut,
outerParNest, topSort);		outerParNest, topSort);
genBuffers(merger, codegen, rewriter, op);		genBuffers(merger, codegen, rewriter, op);
		genConstantDenseAddress(codegen, rewriter, op);
genStmt(merger, codegen, rewriter, op, exp, 0);		genStmt(merger, codegen, rewriter, op, exp, 0);
genResult(merger, codegen, rewriter, op);		genResult(merger, codegen, rewriter, op);
return success();		return success();
}		}

private:		private:
// Last resort cycle resolution.		// Last resort cycle resolution.
LogicalResult resolveCycle(Merger &merger, PatternRewriter &rewriter,		LogicalResult resolveCycle(Merger &merger, PatternRewriter &rewriter,
Show All 14 Lines	for (OpOperand *t : op.getDpsInputOperands()) {
// graph order. Also releases the temporary sparse tensor.		// graph order. Also releases the temporary sparse tensor.
//		//
// TODO: investigate fusing the conversion with computation,		// TODO: investigate fusing the conversion with computation,
// especially if it is a direct yield!		// especially if it is a direct yield!
//		//
auto srcTp = tval.getType().cast<RankedTensorType>();		auto srcTp = tval.getType().cast<RankedTensorType>();
auto dstEnc = SparseTensorEncodingAttr::get(		auto dstEnc = SparseTensorEncodingAttr::get(
op->getContext(), srcEnc.getDimLevelType(),		op->getContext(), srcEnc.getDimLevelType(),
permute(getContext(), op.getMatchingIndexingMap(t),		permute(merger, getContext(), op.getMatchingIndexingMap(t),
topSort), // new order		topSort), // new order
srcEnc.getHigherOrdering(), srcEnc.getPointerBitWidth(),		srcEnc.getHigherOrdering(), srcEnc.getPointerBitWidth(),
srcEnc.getIndexBitWidth());		srcEnc.getIndexBitWidth());
auto dstTp = RankedTensorType::get(srcTp.getShape(),		auto dstTp = RankedTensorType::get(srcTp.getShape(),
srcTp.getElementType(), dstEnc);		srcTp.getElementType(), dstEnc);
auto convert = rewriter.create<ConvertOp>(tval.getLoc(), dstTp, tval);		auto convert = rewriter.create<ConvertOp>(tval.getLoc(), dstTp, tval);
op->setOperand(tensor, convert);		op->setOperand(tensor, convert);
rewriter.setInsertionPointAfter(op);		rewriter.setInsertionPointAfter(op);
Show All 20 Lines

mlir/test/Dialect/SparseTensor/sparse_affine.mlir

// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py		// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
// RUN: mlir-opt %s -sparsification \| FileCheck %s		// RUN: mlir-opt %s -sparsification \| FileCheck %s

#SpVec = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>		#SpVec = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
		#EncDenseVec = #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>
#CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>		#CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>

#trait1 = {		#trait1 = {
indexing_maps = [		indexing_maps = [
affine_map<(i) -> (i)>, // a		affine_map<(i) -> (i)>, // a
affine_map<(i) -> (3)>, // b		affine_map<(i) -> (3)>, // b
affine_map<(i) -> (i)> // x (out)		affine_map<(i) -> (i)> // x (out)
],		],
Show All 36 Lines	outs(%argx: tensor<32xf32>) {
^bb(%a: f32, %b: f32, %x: f32):		^bb(%a: f32, %b: f32, %x: f32):
%0 = arith.mulf %a, %b : f32		%0 = arith.mulf %a, %b : f32
%1 = arith.addf %x, %0 : f32		%1 = arith.addf %x, %0 : f32
linalg.yield %1 : f32		linalg.yield %1 : f32
} -> tensor<32xf32>		} -> tensor<32xf32>
return %0 : tensor<32xf32>		return %0 : tensor<32xf32>
}		}

		// CHECK-LABEL: func.func @mul_inv_sparse1d(
		// CHECK-SAME: %[[VAL_0:.]]: tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>>,
		// CHECK-SAME: %[[VAL_1:.]]: tensor<4xf32, #sparse_tensor.encoding<{{{.}}}>>)
		// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
		// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index
		// CHECK: %[[VAL_4:.*]] = arith.constant 3 : index
		// CHECK: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32
		// CHECK: %[[VAL_6:.]] = bufferization.alloc_tensor() : tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: %[[VAL_7:.]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xindex>
		// CHECK: %[[VAL_8:.]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xf32>
		// CHECK: %[[VAL_9:.]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 0 : index} : tensor<4xf32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xindex>
		// CHECK: %[[VAL_10:.]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<4xf32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xindex>
		// CHECK: %[[VAL_11:.]] = sparse_tensor.values %[[VAL_1]] : tensor<4xf32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xf32>
		// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_2]]] : memref<?xindex>
		// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref<?xindex>
		// CHECK: %[[VAL_14:.]] = scf.for %[[VAL_15:.]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_3]] iter_args(%[[VAL_16:.]] = %[[VAL_6]]) -> (tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>>) {
		// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_15]]] : memref<?xindex>
		// CHECK: %[[VAL_18:.*]] = arith.cmpi eq, %[[VAL_17]], %[[VAL_4]] : index
		// CHECK: %[[VAL_19:.]] = scf.if %[[VAL_18]] -> (tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>>) {
		// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_15]]] : memref<?xf32>
		// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_2]]] : memref<?xindex>
		// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_3]]] : memref<?xindex>
		// CHECK: %[[VAL_23:.]] = scf.for %[[VAL_24:.]] = %[[VAL_21]] to %[[VAL_22]] step %[[VAL_3]] iter_args(%[[VAL_25:.]] = %[[VAL_16]]) -> (tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>>) {
		// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
		// CHECK: %[[VAL_27:.*]] = arith.mulf %[[VAL_26]], %[[VAL_20]] : f32
		// CHECK: %[[VAL_28:.*]] = arith.addf %[[VAL_27]], %[[VAL_5]] : f32
		// CHECK: %[[VAL_29:.]] = sparse_tensor.insert %[[VAL_28]] into %[[VAL_25]]{{\[}}%[[VAL_17]]] : tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: scf.yield %[[VAL_29]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: }
		// CHECK: scf.yield %[[VAL_30:.]] : tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: } else {
		// CHECK: scf.yield %[[VAL_16]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: }
		// CHECK: scf.yield %[[VAL_31:.]] : tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: }
		// CHECK: %[[VAL_32:.]] = sparse_tensor.load %[[VAL_33:.]] hasInserts : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: return %[[VAL_32]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
		func.func @mul_inv_sparse1d(%arga: tensor<32xf32, #SpVec>,
		%argb: tensor<4xf32, #SpVec>) -> tensor<32xf32, #SpVec> {
		%argx = bufferization.alloc_tensor() : tensor<32xf32, #SpVec>
		%0 = linalg.generic #trait1
		ins(%arga, %argb: tensor<32xf32, #SpVec>, tensor<4xf32, #SpVec>)
		outs(%argx: tensor<32xf32, #SpVec>) {
		^bb(%a: f32, %b: f32, %x: f32):
		%0 = arith.mulf %a, %b : f32
		%1 = arith.addf %x, %0 : f32
		linalg.yield %1 : f32
		} -> tensor<32xf32, #SpVec>
		return %0 : tensor<32xf32, #SpVec>
		}


		// CHECK-LABEL: func.func @mul_inv_enc_dense1d(
		// CHECK-SAME: %[[VAL_0:.]]: tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>>,
		// CHECK-SAME: %[[VAL_1:.]]: tensor<4xf32, #sparse_tensor.encoding<{{{.}}}>>) -> tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> {
		// CHECK: %[[VAL_2:.*]] = arith.constant 32 : index
		// CHECK: %[[VAL_3:.*]] = arith.constant 3 : index
		// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
		// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
		// CHECK: %[[VAL_6:.]] = bufferization.alloc_tensor() : tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: %[[VAL_7:.]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xf32>
		// CHECK: %[[VAL_8:.]] = sparse_tensor.values %[[VAL_1]] : tensor<4xf32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xf32>
		// CHECK: %[[VAL_9:.]] = sparse_tensor.values %[[VAL_6]] : tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xf32>
		// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xf32>
		// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] {
		// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_11]]] : memref<?xf32>
		// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<?xf32>
		// CHECK: %[[VAL_14:.*]] = arith.mulf %[[VAL_13]], %[[VAL_10]] : f32
		// CHECK: %[[VAL_15:.*]] = arith.addf %[[VAL_12]], %[[VAL_14]] : f32
		// CHECK: memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_11]]] : memref<?xf32>
		// CHECK: }
		// CHECK: %[[VAL_16:.]] = sparse_tensor.load %[[VAL_6]] : tensor<32xf32, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: return %[[VAL_16]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: }
		func.func @mul_inv_enc_dense1d(%arga: tensor<32xf32, #EncDenseVec>,
		%argb: tensor<4xf32, #EncDenseVec>) -> tensor<32xf32, #EncDenseVec> {
		%argx = bufferization.alloc_tensor() : tensor<32xf32, #EncDenseVec>
		%0 = linalg.generic #trait1
		ins(%arga, %argb: tensor<32xf32, #EncDenseVec>, tensor<4xf32, #EncDenseVec>)
		outs(%argx: tensor<32xf32, #EncDenseVec>) {
		^bb(%a: f32, %b: f32, %x: f32):
		%0 = arith.mulf %a, %b : f32
		%1 = arith.addf %x, %0 : f32
		linalg.yield %1 : f32
		} -> tensor<32xf32, #EncDenseVec>
		return %0 : tensor<32xf32, #EncDenseVec>
		}

#trait2 = {		#trait2 = {
indexing_maps = [		indexing_maps = [
affine_map<(i) -> (i)>, // a		affine_map<(i) -> (i)>, // a
affine_map<(i) -> (i+2)>, // b		affine_map<(i) -> (i+2)>, // b
affine_map<(i) -> (i)> // x (out)		affine_map<(i) -> (i)> // x (out)
],		],
iterator_types = ["parallel"],		iterator_types = ["parallel"],
doc = "x(i) = a(i) & b(i+2)"		doc = "x(i) = a(i) & b(i+2)"
Show All 34 Lines	%0 = linalg.generic #trait2
outs(%argx: tensor<32xi32>) {		outs(%argx: tensor<32xi32>) {
^bb(%a: i32, %b: i32, %x: i32):		^bb(%a: i32, %b: i32, %x: i32):
%0 = arith.andi %a, %b : i32		%0 = arith.andi %a, %b : i32
linalg.yield %0 : i32		linalg.yield %0 : i32
} -> tensor<32xi32>		} -> tensor<32xi32>
return %0 : tensor<32xi32>		return %0 : tensor<32xi32>
}		}

		// CHECK-LABEL: func.func @and_affine_sparse1d(
		// CHECK-SAME: %[[VAL_0:.]]: tensor<32xi32, #sparse_tensor.encoding<{{{.}}}>>,
		// CHECK-SAME: %[[VAL_1:.]]: tensor<34xi32, #sparse_tensor.encoding<{{{.}}}>>)
		// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
		// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index
		// CHECK: %[[VAL_4:.*]] = arith.constant 2 : index
		// CHECK: %[[VAL_5:.]] = bufferization.alloc_tensor() : tensor<32xi32, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: %[[VAL_6:.]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<32xi32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xindex>
		// CHECK: %[[VAL_7:.]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32xi32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xindex>
		// CHECK: %[[VAL_8:.]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xi32>
		// CHECK: %[[VAL_9:.]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 0 : index} : tensor<34xi32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xindex>
		// CHECK: %[[VAL_10:.]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<34xi32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xindex>
		// CHECK: %[[VAL_11:.]] = sparse_tensor.values %[[VAL_1]] : tensor<34xi32, #sparse_tensor.encoding<{{{.}}}>> to memref<?xi32>
		// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_2]]] : memref<?xindex>
		// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
		// CHECK: %[[VAL_14:.]] = scf.for %[[VAL_15:.]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_3]] iter_args(%[[VAL_16:.]] = %[[VAL_5]]) -> (tensor<32xi32, #sparse_tensor.encoding<{{{.}}}>>) {
		// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
		// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_15]]] : memref<?xi32>
		// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_2]]] : memref<?xindex>
		// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref<?xindex>
		// CHECK: %[[VAL_21:.]] = scf.for %[[VAL_22:.]] = %[[VAL_19]] to %[[VAL_20]] step %[[VAL_3]] iter_args(%[[VAL_23:.]] = %[[VAL_16]]) -> (tensor<32xi32, #sparse_tensor.encoding<{{{.}}}>>) {
		// CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xindex>
		// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_17]], %[[VAL_4]] : index
		// CHECK: %[[VAL_26:.*]] = arith.cmpi eq, %[[VAL_24]], %[[VAL_25]] : index
		// CHECK: %[[VAL_27:.]] = scf.if %[[VAL_26]] -> (tensor<32xi32, #sparse_tensor.encoding<{{{.}}}>>) {
		// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_22]]] : memref<?xi32>
		// CHECK: %[[VAL_29:.*]] = arith.andi %[[VAL_18]], %[[VAL_28]] : i32
		// CHECK: %[[VAL_30:.]] = sparse_tensor.insert %[[VAL_29]] into %[[VAL_23]]{{\[}}%[[VAL_17]]] : tensor<32xi32, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: scf.yield %[[VAL_30]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: } else {
		// CHECK: scf.yield %[[VAL_23]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: }
		// CHECK: scf.yield %[[VAL_31:.]] : tensor<32xi32, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: }
		// CHECK: scf.yield %[[VAL_32:.]] : tensor<32xi32, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: }
		// CHECK: %[[VAL_33:.]] = sparse_tensor.load %[[VAL_34:.]] hasInserts : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: return %[[VAL_33]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
		func.func @and_affine_sparse1d(%arga: tensor<32xi32, #SpVec>,
		%argb: tensor<34xi32, #SpVec>) -> tensor<32xi32, #SpVec> {
		%argx = bufferization.alloc_tensor() : tensor<32xi32, #SpVec>
		%0 = linalg.generic #trait2
		ins(%arga, %argb: tensor<32xi32, #SpVec>, tensor<34xi32, #SpVec>)
		outs(%argx: tensor<32xi32, #SpVec>) {
		^bb(%a: i32, %b: i32, %x: i32):
		%0 = arith.andi %a, %b : i32
		linalg.yield %0 : i32
		} -> tensor<32xi32, #SpVec>
		return %0 : tensor<32xi32, #SpVec>
		}


#trait3 = {		#trait3 = {
indexing_maps = [		indexing_maps = [
affine_map<(i,j) -> (i,j)>, // a		affine_map<(i,j) -> (i,j)>, // a
affine_map<(i,j) -> (i+2,j+3)>, // b		affine_map<(i,j) -> (i+2,j+3)>, // b
affine_map<(i,j) -> (i,j)> // x (out)		affine_map<(i,j) -> (i,j)> // x (out)
],		],
iterator_types = ["parallel","parallel"],		iterator_types = ["parallel","parallel"],
doc = "x(i,j) += a(i,j) * b(i+2,j+3)"		doc = "x(i,j) += a(i,j) * b(i+2,j+3)"
Show All 40 Lines	%0 = linalg.generic #trait3
outs(%argx: tensor<32x16xf64>) {		outs(%argx: tensor<32x16xf64>) {
^bb(%a: f64, %b: f64, %x: f64):		^bb(%a: f64, %b: f64, %x: f64):
%0 = arith.mulf %a, %b : f64		%0 = arith.mulf %a, %b : f64
%1 = arith.addf %x, %0 : f64		%1 = arith.addf %x, %0 : f64
linalg.yield %1 : f64		linalg.yield %1 : f64
} -> tensor<32x16xf64>		} -> tensor<32x16xf64>
return %0 : tensor<32x16xf64>		return %0 : tensor<32x16xf64>
}		}

		// CHECK-LABEL: func.func @mul_affine_sparse2d(
		// CHECK-SAME: %[[VAL_0:.]]: tensor<32x16xf64, #sparse_tensor.encoding<{{{.}}}>>,
		// CHECK-SAME: %[[VAL_1:.]]: tensor<34x19xf64, #sparse_tensor.encoding<{{{.}}}>>) -> tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> {
		// CHECK: %[[VAL_2:.*]] = arith.constant 32 : index
		// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
		// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
		// CHECK: %[[VAL_5:.*]] = arith.constant 2 : index
		// CHECK: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f64
		// CHECK: %[[VAL_7:.*]] = arith.constant 3 : index
		// CHECK: %[[VAL_8:.]] = bufferization.alloc_tensor() : tensor<32x16xf64, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: %[[VAL_9:.]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 1 : index} : tensor<32x16xf64, #sparse_tensor.encoding<{{{.}}}>> to memref<?xindex>
		// CHECK: %[[VAL_10:.]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<32x16xf64, #sparse_tensor.encoding<{{{.}}}>> to memref<?xindex>
		// CHECK: %[[VAL_11:.]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.}}}>> to memref<?xf64>
		// CHECK: %[[VAL_12:.]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 1 : index} : tensor<34x19xf64, #sparse_tensor.encoding<{{{.}}}>> to memref<?xindex>
		// CHECK: %[[VAL_13:.]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<34x19xf64, #sparse_tensor.encoding<{{{.}}}>> to memref<?xindex>
		// CHECK: %[[VAL_14:.]] = sparse_tensor.values %[[VAL_1]] : tensor<34x19xf64, #sparse_tensor.encoding<{{{.}}}>> to memref<?xf64>
		// CHECK: %[[VAL_15:.]] = scf.for %[[VAL_16:.]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] iter_args(%[[VAL_17:.]] = %[[VAL_8]]) -> (tensor<32x16xf64, #sparse_tensor.encoding<{{{.}}}>>) {
		// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_16]], %[[VAL_5]] : index
		// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_16]]] : memref<?xindex>
		// CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_16]], %[[VAL_4]] : index
		// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xindex>
		// CHECK: %[[VAL_22:.]] = scf.for %[[VAL_23:.]] = %[[VAL_19]] to %[[VAL_21]] step %[[VAL_4]] iter_args(%[[VAL_24:.]] = %[[VAL_17]]) -> (tensor<32x16xf64, #sparse_tensor.encoding<{{{.}}}>>) {
		// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xindex>
		// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref<?xf64>
		// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_18]]] : memref<?xindex>
		// CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_18]], %[[VAL_4]] : index
		// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
		// CHECK: %[[VAL_30:.]]:2 = scf.for %[[VAL_31:.]] = %[[VAL_27]] to %[[VAL_29]] step %[[VAL_4]] iter_args(%[[VAL_32:.]] = %[[VAL_6]], %[[VAL_33:.]] = %[[VAL_24]]) -> (f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
		// CHECK: %[[VAL_34:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_31]]] : memref<?xindex>
		// CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_25]], %[[VAL_7]] : index
		// CHECK: %[[VAL_36:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_35]] : index
		// CHECK: %[[VAL_37:.]]:2 = scf.if %[[VAL_36]] -> (f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.}}}>>) {
		// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_31]]] : memref<?xf64>
		// CHECK: %[[VAL_39:.*]] = arith.mulf %[[VAL_26]], %[[VAL_38]] : f64
		// CHECK: %[[VAL_40:.*]] = arith.addf %[[VAL_32]], %[[VAL_39]] : f64
		// CHECK: scf.yield %[[VAL_40]], %[[VAL_33]] : f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: } else {
		// CHECK: scf.yield %[[VAL_32]], %[[VAL_33]] : f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: }
		// CHECK: scf.yield %[[VAL_41:.]]#0, %[[VAL_41]]#1 : f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: }
		// CHECK: %[[VAL_42:.]] = sparse_tensor.insert %[[VAL_43:.]]#0 into %[[VAL_43]]#1{{\[}}%[[VAL_16]], %[[VAL_25]]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: scf.yield %[[VAL_42]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: }
		// CHECK: scf.yield %[[VAL_44:.]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.}}}>>
		// CHECK: }
		// CHECK: %[[VAL_45:.]] = sparse_tensor.load %[[VAL_46:.]] hasInserts : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
		// CHECK: return %[[VAL_45]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
		func.func @mul_affine_sparse2d(%arga: tensor<32x16xf64, #CSR>,
		%argb: tensor<34x19xf64, #CSR>) -> tensor<32x16xf64, #CSR> {
		%argx = bufferization.alloc_tensor() : tensor<32x16xf64, #CSR>
		%0 = linalg.generic #trait3
		ins(%arga, %argb: tensor<32x16xf64, #CSR>, tensor<34x19xf64, #CSR>)
		outs(%argx: tensor<32x16xf64, #CSR>) {
		^bb(%a: f64, %b: f64, %x: f64):
		%0 = arith.mulf %a, %b : f64
		%1 = arith.addf %x, %0 : f64
		linalg.yield %1 : f64
		} -> tensor<32x16xf64, #CSR>
		return %0 : tensor<32x16xf64, #CSR>
		}

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir

This file was added.

				// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true \| \
				// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
				// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \| \
				// RUN: FileCheck %s

				// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" \| \
				// RUN: mlir-cpu-runner \
				// RUN: -e entry -entry-point-result=void \
				// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \| \
				// RUN: FileCheck %s

				#CCC = #sparse_tensor.encoding<{
				dimLevelType = [ "compressed", "compressed", "compressed" ] }>
				PeimingAuthorUnsubmitted Done Reply Inline Actions @aartbik Maybe we can have some discussion on this Peiming: @aartbik Maybe we can have some discussion on this

				#CDC = #sparse_tensor.encoding<{
				dimLevelType = [ "compressed", "dense", "compressed" ]
				// FIXME: Still inadmissible might need investigation
				// dimOrdering = affine_map<(i,j,k) -> (j,k,i)>
				}>

				// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
				func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor<?x?x?xf32> {
				%buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor<?x?x?xf32>
				%ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
				return %ret : tensor<?x?x?xf32>
				}

				func.func @conv_1d_nwc_wcf(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
				%ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
				strides = dense<1> : tensor<1xi64>}
				ins (%arg0, %arg1: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
				outs (%arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
				return %ret : tensor<?x?x?xf32>
				}

				func.func @conv_1d_nwc_wcf_CCC(%arg0: tensor<?x?x?xf32, #CCC>, %arg1: tensor<?x?x?xf32, #CCC>) -> tensor<?x?x?xf32, #CCC> {
				%c1 = arith.constant 1 : index
				%c3 = arith.constant 3 : index
				%c6 = arith.constant 6 : index
				%s = bufferization.alloc_tensor(%c3, %c6, %c1) : tensor<?x?x?xf32, #CCC>
				%ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
				strides = dense<1> : tensor<1xi64>}
				ins (%arg0, %arg1: tensor<?x?x?xf32, #CCC>, tensor<?x?x?xf32, #CCC>)
				outs (%s: tensor<?x?x?xf32, #CCC>) -> tensor<?x?x?xf32, #CCC>
				return %ret : tensor<?x?x?xf32, #CCC>
				}

				func.func @conv_1d_nwc_wcf_CDC(%arg0: tensor<?x?x?xf32, #CDC>, %arg1: tensor<?x?x?xf32, #CDC>) -> tensor<?x?x?xf32, #CDC> {
				%c1 = arith.constant 1 : index
				%c3 = arith.constant 3 : index
				%c6 = arith.constant 6 : index
				%s = bufferization.alloc_tensor(%c3, %c6, %c1) : tensor<?x?x?xf32, #CDC>
				%ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
				strides = dense<1> : tensor<1xi64>}
				ins (%arg0, %arg1: tensor<?x?x?xf32, #CDC>, tensor<?x?x?xf32, #CDC>)
				outs (%s: tensor<?x?x?xf32, #CDC>) -> tensor<?x?x?xf32, #CDC>
				return %ret : tensor<?x?x?xf32, #CDC>
				}

				func.func @entry() {
				%c0 = arith.constant 0 : index
				%c1 = arith.constant 1 : index
				%c3 = arith.constant 3 : index
				%c6 = arith.constant 6 : index
				%c8 = arith.constant 8 : index
				%f10 = arith.constant 10.00000e+00 : f32
				%val = arith.constant 2.00000e+00 : f32
				%zero = arith.constant 0.00000e+00 : f32

				%in1D_tmp = call @alloc_3d_filled_f32(%c3, %c8, %c1, %val) : (index, index, index, f32) -> (tensor<?x?x?xf32>)
				%in1D_nwc = tensor.insert %f10 into %in1D_tmp[%c0, %c3, %c0] : tensor<?x?x?xf32>
				%filter1D_nwc = call @alloc_3d_filled_f32(%c3, %c1, %c1, %val) : (index, index, index, f32) -> (tensor<?x?x?xf32>)
				%out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (tensor<?x?x?xf32>)

				%in1D_nwc_CCC = sparse_tensor.convert %in1D_nwc
				: tensor<?x?x?xf32> to tensor<?x?x?xf32, #CCC>
				%filter1D_nwc_CCC = sparse_tensor.convert %filter1D_nwc
				: tensor<?x?x?xf32> to tensor<?x?x?xf32, #CCC>

				%in1D_nwc_CDC = sparse_tensor.convert %in1D_nwc
				: tensor<?x?x?xf32> to tensor<?x?x?xf32, #CDC>
				%filter1D_nwc_CDC = sparse_tensor.convert %filter1D_nwc
				: tensor<?x?x?xf32> to tensor<?x?x?xf32, #CDC>

				%dense_ret = call @conv_1d_nwc_wcf(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>)
				%CCC_ret = call @conv_1d_nwc_wcf_CCC(%in1D_nwc_CCC, %filter1D_nwc_CCC) : (tensor<?x?x?xf32, #CCC>, tensor<?x?x?xf32, #CCC>) -> (tensor<?x?x?xf32, #CCC>)
				%CDC_ret = call @conv_1d_nwc_wcf_CDC(%in1D_nwc_CDC, %filter1D_nwc_CDC) : (tensor<?x?x?xf32, #CDC>, tensor<?x?x?xf32, #CDC>) -> (tensor<?x?x?xf32, #CDC>)

				// CHECK: ( ( ( 12 ), ( 28 ), ( 28 ), ( 28 ), ( 12 ), ( 12 ) ),
				// CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ),
				// CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ) )
				%dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0], %zero
				: tensor<?x?x?xf32>, vector<3x6x1xf32>
				vector.print %dense_v : vector<3x6x1xf32>

				// CHECK: ( ( ( 12 ), ( 28 ), ( 28 ), ( 28 ), ( 12 ), ( 12 ) ),
				// CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ),
				// CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ) )
				%1 = sparse_tensor.convert %CCC_ret
				: tensor<?x?x?xf32, #CCC> to tensor<?x?x?xf32>
				%v1 = vector.transfer_read %1[%c0, %c0, %c0], %zero
				: tensor<?x?x?xf32>, vector<3x6x1xf32>
				vector.print %v1 : vector<3x6x1xf32>

				// CHECK: ( ( ( 12 ), ( 28 ), ( 28 ), ( 28 ), ( 12 ), ( 12 ) ),
				// CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ),
				// CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ) )
				%2 = sparse_tensor.convert %CDC_ret
				: tensor<?x?x?xf32, #CDC> to tensor<?x?x?xf32>
				%v2 = vector.transfer_read %2[%c0, %c0, %c0], %zero
				: tensor<?x?x?xf32>, vector<3x6x1xf32>
				vector.print %v2 : vector<3x6x1xf32>

				return
				}

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir

This file was added.

				// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true \| \
				// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
				// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \| \
				// RUN: FileCheck %s

				// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" \| \
				// RUN: mlir-cpu-runner \
				// RUN: -e entry -entry-point-result=void \
				// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \| \
				// RUN: FileCheck %s

				#DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
				#CSR = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"]}>
				#CSC = #sparse_tensor.encoding<{
				dimLevelType = [ "dense", "compressed" ],
				dimOrdering = affine_map<(i,j) -> (j,i)>
				}>

				// An example of a 2D convolution with a sparse filter.
				module {

				func.func @conv2d(%input: tensor<8x8xi32>,
				%filter: tensor<3x3xi32, #DCSR>,
				%output: tensor<6x6xi32>) -> tensor<6x6xi32> {
				%0 = linalg.conv_2d
				ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>)
				outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32>
				return %0 : tensor<6x6xi32>
				}

				func.func @conv2d_sparse_out(%input: tensor<8x8xi32>,
				%filter: tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> {
				%s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR>
				%0 = linalg.conv_2d
				ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>)
				outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
				return %0 : tensor<6x6xi32, #DCSR>
				}

				func.func @conv2d_all_sparse_DCSR(%input: tensor<8x8xi32, #DCSR>,
				%filter: tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> {
				%s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR>
				%0 = linalg.conv_2d
				ins (%input, %filter: tensor<8x8xi32, #DCSR>, tensor<3x3xi32, #DCSR>)
				outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
				return %0 : tensor<6x6xi32, #DCSR>
				}

				func.func @conv2d_all_sparse_CSR(%input: tensor<8x8xi32, #CSR>,
				%filter: tensor<3x3xi32, #CSR>) -> tensor<6x6xi32, #CSR> {
				%s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSR>
				%0 = linalg.conv_2d
				ins (%input, %filter: tensor<8x8xi32, #CSR>, tensor<3x3xi32, #CSR>)
				outs (%s: tensor<6x6xi32, #CSR>) -> tensor<6x6xi32, #CSR>
				return %0 : tensor<6x6xi32, #CSR>
				}

				func.func @conv2d_all_sparse_CSC(%input: tensor<8x8xi32, #CSC>,
				%filter: tensor<3x3xi32, #CSC>) -> tensor<6x6xi32, #CSC> {
				%s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSC>
				%0 = linalg.conv_2d
				ins (%input, %filter: tensor<8x8xi32, #CSC>, tensor<3x3xi32, #CSC>)
				outs (%s: tensor<6x6xi32, #CSC>) -> tensor<6x6xi32, #CSC>
				return %0 : tensor<6x6xi32, #CSC>
				}

				func.func @entry() {
				%c0 = arith.constant 0 : index
				%i0 = arith.constant 0 : i32

				// A typical edge detection filter.
				%filter = arith.constant dense<[
				[ 1, 0, -1 ],
				[ 0, 0, 0 ],
				[ -1, 0, 1 ]
				]> : tensor<3x3xi32>
				%sparse_filter_DCSR = sparse_tensor.convert %filter
				: tensor<3x3xi32> to tensor<3x3xi32, #DCSR>
				%sparse_filter_CSR = sparse_tensor.convert %filter
				: tensor<3x3xi32> to tensor<3x3xi32, #CSR>
				%sparse_filter_CSC = sparse_tensor.convert %filter
				: tensor<3x3xi32> to tensor<3x3xi32, #CSC>


				%input = arith.constant dense<[
				[ 1, 2, 3, 4, 0, 6, 7, 8 ],
				[ 2, 2, 4, 4, 0, 0, 6, 8 ],
				[ 2, 2, 4, 4, 0, 0, 6, 8 ],
				[ 2, 2, 3, 4, 0, 0, 7, 8 ],
				[ 1, 3, 3, 4, 0, 0, 6, 8 ],
				[ 3, 2, 3, 4, 0, 0, 7, 8 ],
				[ 1, 3, 3, 4, 3, 6, 6, 8 ],
				[ 1, 3, 3, 4, 3, 0, 7, 8 ]
				]> : tensor<8x8xi32>
				%sparse_input_DCSR = sparse_tensor.convert %input
				: tensor<8x8xi32> to tensor<8x8xi32, #DCSR>
				%sparse_input_CSR = sparse_tensor.convert %input
				: tensor<8x8xi32> to tensor<8x8xi32, #CSR>
				%sparse_input_CSC = sparse_tensor.convert %input
				: tensor<8x8xi32> to tensor<8x8xi32, #CSC>

				// Call the kernel.
				%output = arith.constant dense<0> : tensor<6x6xi32>
				%0 = call @conv2d(%input, %sparse_filter_DCSR, %output)
				: (tensor<8x8xi32>,
				tensor<3x3xi32, #DCSR>, tensor<6x6xi32>) -> tensor<6x6xi32>
				%1 = call @conv2d_sparse_out(%input, %sparse_filter_DCSR)
				: (tensor<8x8xi32>,
				tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
				%2 = call @conv2d_all_sparse_DCSR(%sparse_input_DCSR, %sparse_filter_DCSR)
				: (tensor<8x8xi32, #DCSR>,
				tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
				%3 = call @conv2d_all_sparse_CSR(%sparse_input_CSR, %sparse_filter_CSR)
				: (tensor<8x8xi32, #CSR>,
				tensor<3x3xi32, #CSR>) -> tensor<6x6xi32, #CSR>
				%4 = call @conv2d_all_sparse_CSC(%sparse_input_CSC, %sparse_filter_CSC)
				: (tensor<8x8xi32, #CSC>,
				tensor<3x3xi32, #CSC>) -> tensor<6x6xi32, #CSC>


				// Verify the output.
				//
				// CHECK: ( ( 0, 0, -1, -6, -1, 6 ),
				// CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
				// CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
				// CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
				// CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
				// CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
				//
				%v = vector.transfer_read %0[%c0, %c0], %i0
				: tensor<6x6xi32>, vector<6x6xi32>
				vector.print %v : vector<6x6xi32>

				//
				// Should be the same as dense output
				// CHECK: ( ( 0, 0, -1, -6, -1, 6 ),
				// CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
				// CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
				// CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
				// CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
				// CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
				//
				%sparse_ret = sparse_tensor.convert %1
				: tensor<6x6xi32, #DCSR> to tensor<6x6xi32>
				%v1 = vector.transfer_read %sparse_ret[%c0, %c0], %i0
				: tensor<6x6xi32>, vector<6x6xi32>
				vector.print %v1 : vector<6x6xi32>

				//
				// Should be the same as dense output
				// CHECK: ( ( 0, 0, -1, -6, -1, 6 ),
				// CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
				// CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
				// CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
				// CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
				// CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
				//
				%all_sparse_DCSR = sparse_tensor.convert %2
				: tensor<6x6xi32, #DCSR> to tensor<6x6xi32>
				%v2 = vector.transfer_read %all_sparse_DCSR[%c0, %c0], %i0
				: tensor<6x6xi32>, vector<6x6xi32>
				vector.print %v2 : vector<6x6xi32>

				//
				// Should be the same as dense output
				// CHECK: ( ( 0, 0, -1, -6, -1, 6 ),
				// CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
				// CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
				// CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
				// CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
				// CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
				//
				%all_sparse_CSR = sparse_tensor.convert %3
				: tensor<6x6xi32, #CSR> to tensor<6x6xi32>
				%v3 = vector.transfer_read %all_sparse_CSR[%c0, %c0], %i0
				: tensor<6x6xi32>, vector<6x6xi32>
				vector.print %v3 : vector<6x6xi32>

				//
				// Should be the same as dense output
				// CHECK: ( ( 0, 0, -1, -6, -1, 6 ),
				// CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
				// CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
				// CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
				// CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
				// CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
				//
				%all_sparse_CSC = sparse_tensor.convert %4
				: tensor<6x6xi32, #CSC> to tensor<6x6xi32>
				%v4 = vector.transfer_read %all_sparse_CSC[%c0, %c0], %i0
				: tensor<6x6xi32>, vector<6x6xi32>
				vector.print %v4 : vector<6x6xi32>

				// Release the resources.
				bufferization.dealloc_tensor %sparse_filter_DCSR : tensor<3x3xi32, #DCSR>
				bufferization.dealloc_tensor %sparse_filter_CSR : tensor<3x3xi32, #CSR>
				bufferization.dealloc_tensor %sparse_filter_CSC : tensor<3x3xi32, #CSC>

				bufferization.dealloc_tensor %sparse_input_DCSR : tensor<8x8xi32, #DCSR>
				bufferization.dealloc_tensor %sparse_input_CSR : tensor<8x8xi32, #CSR>
				bufferization.dealloc_tensor %sparse_input_CSC : tensor<8x8xi32, #CSC>

				bufferization.dealloc_tensor %1 : tensor<6x6xi32, #DCSR>
				bufferization.dealloc_tensor %2 : tensor<6x6xi32, #DCSR>
				bufferization.dealloc_tensor %3 : tensor<6x6xi32, #CSR>
				bufferization.dealloc_tensor %4 : tensor<6x6xi32, #CSC>
				return
				}
				}

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir

This file was added.

				// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true \| \
				// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
				// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \| \
				// RUN: FileCheck %s

				// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" \| \
				// RUN: mlir-cpu-runner \
				// RUN: -e entry -entry-point-result=void \
				// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \| \
				// RUN: FileCheck %s

				#CCCC = #sparse_tensor.encoding<{
				dimLevelType = [ "compressed", "compressed", "compressed", "compressed" ]
				}>

				#CDCD = #sparse_tensor.encoding<{
				dimLevelType = [ "compressed", "dense", "compressed", "dense" ]
				}>

				// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
				func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> tensor<?x?x?x?xf32> {
				%buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4) : tensor<?x?x?x?xf32>
				%ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
				return %ret : tensor<?x?x?x?xf32>
				}

				func.func @conv_2d_nhwc_hwcf(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor<?x?x?x?xf32>, %arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
				%ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
				strides = dense<1> : tensor<2xi64>}
				ins (%arg0, %arg1: tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
				outs (%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
				return %ret : tensor<?x?x?x?xf32>
				}

				func.func @conv_2d_nhwc_hwcf_CCCC(%arg0: tensor<?x?x?x?xf32, #CCCC>, %arg1: tensor<?x?x?x?xf32, #CCCC>) -> tensor<?x?x?x?xf32, #CCCC> {
				%c1 = arith.constant 1 : index
				%c3 = arith.constant 3 : index
				%c6 = arith.constant 6 : index
				%s = bufferization.alloc_tensor(%c3, %c6, %c6, %c1) : tensor<?x?x?x?xf32, #CCCC>
				%ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
				strides = dense<1> : tensor<2xi64>}
				ins (%arg0, %arg1: tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32, #CCCC>)
				outs (%s: tensor<?x?x?x?xf32, #CCCC>) -> tensor<?x?x?x?xf32, #CCCC>
				return %ret : tensor<?x?x?x?xf32, #CCCC>
				}

				func.func @conv_2d_nhwc_hwcf_CDCD(%arg0: tensor<?x?x?x?xf32, #CDCD>, %arg1: tensor<?x?x?x?xf32, #CDCD>) -> tensor<?x?x?x?xf32, #CDCD> {
				%c1 = arith.constant 1 : index
				%c3 = arith.constant 3 : index
				%c6 = arith.constant 6 : index
				%s = bufferization.alloc_tensor(%c3, %c6, %c6, %c1) : tensor<?x?x?x?xf32, #CDCD>
				%ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
				strides = dense<1> : tensor<2xi64>}
				ins (%arg0, %arg1: tensor<?x?x?x?xf32, #CDCD>, tensor<?x?x?x?xf32, #CDCD>)
				outs (%s: tensor<?x?x?x?xf32, #CDCD>) -> tensor<?x?x?x?xf32, #CDCD>
				return %ret : tensor<?x?x?x?xf32, #CDCD>
				}

				func.func @entry() {
				%c0 = arith.constant 0 : index
				%c1 = arith.constant 1 : index
				%c3 = arith.constant 3 : index
				%c6 = arith.constant 6 : index
				%c8 = arith.constant 8 : index
				%f10 = arith.constant 10.00000e+00 : f32
				%val = arith.constant 2.00000e+00 : f32
				%zero = arith.constant 0.00000e+00 : f32

				%filter2D_nhwc = call @alloc_4d_filled_f32(%c3, %c3, %c3, %c1, %val) :(index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
				%in2D_tmp = call @alloc_4d_filled_f32(%c3, %c8, %c8, %c3, %val) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
				%in2D_nhwc = tensor.insert %f10 into %in2D_tmp[%c0, %c0, %c3, %c0] : tensor<?x?x?x?xf32>
				%out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c6, %c6, %c1, %zero) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)

				%in2D_nhwc_CCCC = sparse_tensor.convert %in2D_nhwc
				: tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CCCC>
				%filter2D_nhwc_CCCC = sparse_tensor.convert %filter2D_nhwc
				: tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CCCC>

				%in2D_nhwc_CDCD = sparse_tensor.convert %in2D_nhwc
				: tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CDCD>
				%filter2D_nhwc_CDCD = sparse_tensor.convert %filter2D_nhwc
				: tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CDCD>

				%dense_ret = call @conv_2d_nhwc_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>)
				%CCCC_ret = call @conv_2d_nhwc_hwcf_CCCC(%in2D_nhwc_CCCC, %filter2D_nhwc_CCCC) : (tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32, #CCCC>) -> (tensor<?x?x?x?xf32, #CCCC>)
				%CDCD_ret = call @conv_2d_nhwc_hwcf_CDCD(%in2D_nhwc_CDCD, %filter2D_nhwc_CDCD) : (tensor<?x?x?x?xf32, #CDCD>, tensor<?x?x?x?xf32, #CDCD>) -> (tensor<?x?x?x?xf32, #CDCD>)

				// CHECK: ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) )
				%dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0], %zero
				: tensor<?x?x?x?xf32>, vector<3x6x6x1xf32>
				vector.print %dense_v : vector<3x6x6x1xf32>

				// CHECK: ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) )
				%1 = sparse_tensor.convert %CCCC_ret
				: tensor<?x?x?x?xf32, #CCCC> to tensor<?x?x?x?xf32>
				%v1 = vector.transfer_read %1[%c0, %c0, %c0, %c0], %zero
				: tensor<?x?x?x?xf32>, vector<3x6x6x1xf32>
				vector.print %v1 : vector<3x6x6x1xf32>

				// CHECK: ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) )
				%2 = sparse_tensor.convert %CDCD_ret
				: tensor<?x?x?x?xf32, #CDCD> to tensor<?x?x?x?xf32>
				%v2 = vector.transfer_read %2[%c0, %c0, %c0, %c0], %zero
				: tensor<?x?x?x?xf32>, vector<3x6x6x1xf32>
				vector.print %v2 : vector<3x6x6x1xf32>
				return
				}

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir

This file was added.

				// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true \| \
				// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
				// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \| \
				// RUN: FileCheck %s

				// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" \| \
				// RUN: mlir-cpu-runner \
				// RUN: -e entry -entry-point-result=void \
				// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \| \
				// RUN: FileCheck %s

				#CCC = #sparse_tensor.encoding<{
				dimLevelType = [ "compressed", "compressed", "compressed" ]
				}>

				#CDC = #sparse_tensor.encoding<{
				dimLevelType = [ "compressed", "dense", "compressed" ]
				}>

				// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
				func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor<?x?x?xf32> {
				%buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor<?x?x?xf32>
				%ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
				return %ret : tensor<?x?x?xf32>
				}

				func.func @conv_3d(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
				%ret = linalg.conv_3d
				ins (%arg0, %arg1: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
				outs (%arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
				return %ret : tensor<?x?x?xf32>
				}

				func.func @conv_3d_CCC(%arg0: tensor<?x?x?xf32, #CCC>, %arg1: tensor<?x?x?xf32, #CCC>) -> tensor<?x?x?xf32, #CCC> {
				%c6 = arith.constant 6 : index
				%s = bufferization.alloc_tensor(%c6, %c6, %c6) : tensor<?x?x?xf32, #CCC>
				%ret = linalg.conv_3d
				ins (%arg0, %arg1: tensor<?x?x?xf32, #CCC>, tensor<?x?x?xf32, #CCC>)
				outs (%s: tensor<?x?x?xf32, #CCC>) -> tensor<?x?x?xf32, #CCC>
				return %ret : tensor<?x?x?xf32, #CCC>
				}

				func.func @conv_3d_CDC(%arg0: tensor<?x?x?xf32, #CDC>, %arg1: tensor<?x?x?xf32, #CDC>) -> tensor<?x?x?xf32, #CDC> {
				%c6 = arith.constant 6 : index
				%s = bufferization.alloc_tensor(%c6, %c6, %c6) : tensor<?x?x?xf32, #CDC>
				%ret = linalg.conv_3d
				ins (%arg0, %arg1: tensor<?x?x?xf32, #CDC>, tensor<?x?x?xf32, #CDC>)
				outs (%s: tensor<?x?x?xf32, #CDC>) -> tensor<?x?x?xf32, #CDC>
				return %ret : tensor<?x?x?xf32, #CDC>
				}

				func.func @entry() {
				%c0 = arith.constant 0 : index
				%c1 = arith.constant 1 : index
				%c3 = arith.constant 3 : index
				%c6 = arith.constant 6 : index
				%c8 = arith.constant 8 : index
				%f10 = arith.constant 10.00000e+00 : f32
				%val = arith.constant 2.00000e+00 : f32
				%zero = arith.constant 0.00000e+00 : f32

				%filter3D = call @alloc_3d_filled_f32(%c3, %c3, %c3, %val) : (index, index, index, f32) -> (tensor<?x?x?xf32>)
				%in3D_tmp = call @alloc_3d_filled_f32(%c8, %c8, %c8, %val) : (index, index, index, f32) -> (tensor<?x?x?xf32>)
				%in3D = tensor.insert %f10 into %in3D_tmp[%c0, %c3, %c0] : tensor<?x?x?xf32>
				%out3D = call @alloc_3d_filled_f32(%c6, %c6, %c6, %zero) : (index, index, index, f32) -> (tensor<?x?x?xf32>)

				%in3D_CCC = sparse_tensor.convert %in3D
				: tensor<?x?x?xf32> to tensor<?x?x?xf32, #CCC>
				%filter3D_CCC = sparse_tensor.convert %filter3D
				: tensor<?x?x?xf32> to tensor<?x?x?xf32, #CCC>

				%in3D_CDC = sparse_tensor.convert %in3D
				: tensor<?x?x?xf32> to tensor<?x?x?xf32, #CDC>
				%filter3D_CDC = sparse_tensor.convert %filter3D
				: tensor<?x?x?xf32> to tensor<?x?x?xf32, #CDC>

				%dense_ret = call @conv_3d(%in3D, %filter3D, %out3D) : (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>)
				%CCC_ret = call @conv_3d_CCC(%in3D_CCC, %filter3D_CCC) : (tensor<?x?x?xf32, #CCC>, tensor<?x?x?xf32, #CCC>) -> (tensor<?x?x?xf32, #CCC>)
				%CDC_ret = call @conv_3d_CDC(%in3D_CDC, %filter3D_CDC) : (tensor<?x?x?xf32, #CDC>, tensor<?x?x?xf32, #CDC>) -> (tensor<?x?x?xf32, #CDC>)

				// CHECK:( ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) )
				%dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0], %zero
				: tensor<?x?x?xf32>, vector<6x6x6xf32>
				vector.print %dense_v : vector<6x6x6xf32>

				// CHECK-NEXT:( ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) )
				%1 = sparse_tensor.convert %CCC_ret
				: tensor<?x?x?xf32, #CCC> to tensor<?x?x?xf32>
				%v1 = vector.transfer_read %1[%c0, %c0, %c0], %zero
				: tensor<?x?x?xf32>, vector<6x6x6xf32>
				vector.print %v1 : vector<6x6x6xf32>

				// CHECK-NEXT:( ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
				// CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
				// CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) )
				%2 = sparse_tensor.convert %CCC_ret
				: tensor<?x?x?xf32, #CCC> to tensor<?x?x?xf32>
				%v2 = vector.transfer_read %2[%c0, %c0, %c0], %zero
				: tensor<?x?x?xf32>, vector<6x6x6xf32>
				vector.print %v2 : vector<6x6x6xf32>

				return
				}

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir

This file was added.

				// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true \| \
				// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
				// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \| \
				// RUN: FileCheck %s

				// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" \| \
				// RUN: mlir-cpu-runner \
				PeimingAuthorUnsubmitted Done Reply Inline Actions we can now also support codegen path for sparse convolution. Peiming: we can now also support codegen path for sparse convolution.
				// RUN: -e entry -entry-point-result=void \
				// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \| \
				// RUN: FileCheck %s

				#CCCCC = #sparse_tensor.encoding<{
				dimLevelType = [ "compressed", "compressed", "compressed", "compressed", "compressed" ]
				}>

				#CDCDC = #sparse_tensor.encoding<{
				dimLevelType = [ "compressed", "dense", "compressed", "dense", "compressed"]
				}>

				// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
				func.func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> tensor<?x?x?x?x?xf32> {
				%buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4, %s5) : tensor<?x?x?x?x?xf32>
				%ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
				return %ret : tensor<?x?x?x?x?xf32>
				}

				func.func @conv_3d_ndhwc_dhwcf(%arg0: tensor<?x?x?x?x?xf32>,
				%arg1: tensor<?x?x?x?x?xf32>,
				%arg2: tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32> {
				%ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>,
				strides = dense<1> : tensor<3xi64>}
				ins (%arg0, %arg1: tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
				outs (%arg2: tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
				return %ret : tensor<?x?x?x?x?xf32>
				}

				func.func @conv_3d_ndhwc_dhwcf_CCCCC(%arg0: tensor<?x?x?x?x?xf32, #CCCCC>,
				%arg1: tensor<?x?x?x?x?xf32, #CCCCC>)
				-> tensor<?x?x?x?x?xf32, #CCCCC> {
				%c1 = arith.constant 1 : index
				%c6 = arith.constant 6 : index
				%s = bufferization.alloc_tensor(%c1, %c6, %c6, %c6, %c1)
				: tensor<?x?x?x?x?xf32, #CCCCC>
				%ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>,
				strides = dense<1> : tensor<3xi64>}
				ins (%arg0, %arg1: tensor<?x?x?x?x?xf32, #CCCCC>, tensor<?x?x?x?x?xf32, #CCCCC>)
				outs (%s: tensor<?x?x?x?x?xf32, #CCCCC>) -> tensor<?x?x?x?x?xf32, #CCCCC>
				return %ret : tensor<?x?x?x?x?xf32, #CCCCC>
				}

				func.func @conv_3d_ndhwc_dhwcf_CDCDC(%arg0: tensor<?x?x?x?x?xf32, #CDCDC>,
				%arg1: tensor<?x?x?x?x?xf32, #CDCDC>)
				-> tensor<?x?x?x?x?xf32, #CDCDC> {
				%c1 = arith.constant 1 : index
				%c6 = arith.constant 6 : index
				%s = bufferization.alloc_tensor(%c1, %c6, %c6, %c6, %c1)
				: tensor<?x?x?x?x?xf32, #CDCDC>
				%ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>,
				strides = dense<1> : tensor<3xi64>}
				ins (%arg0, %arg1: tensor<?x?x?x?x?xf32, #CDCDC>, tensor<?x?x?x?x?xf32, #CDCDC>)
				outs (%s: tensor<?x?x?x?x?xf32, #CDCDC>) -> tensor<?x?x?x?x?xf32, #CDCDC>
				return %ret : tensor<?x?x?x?x?xf32, #CDCDC>
				}

				func.func @entry() {
				%c0 = arith.constant 0 : index
				%c1 = arith.constant 1 : index
				%c3 = arith.constant 3 : index
				%c6 = arith.constant 6 : index
				%c8 = arith.constant 8 : index
				%f10 = arith.constant 10.00000e+00 : f32
				%val = arith.constant 2.00000e+00 : f32
				%zero = arith.constant 0.00000e+00 : f32

				%in3D_tmp = call @alloc_5d_filled_f32(%c1, %c8, %c8, %c8, %c1, %val) : (index, index, index, index, index, f32) -> (tensor<?x?x?x?x?xf32>)
				%in3D_ndhwc = tensor.insert %f10 into %in3D_tmp[%c0, %c0, %c0, %c3, %c0] : tensor<?x?x?x?x?xf32>

				%filter3D_ndhwc = call @alloc_5d_filled_f32(%c3, %c3, %c3, %c1, %c1, %val) : (index, index, index, index, index, f32) -> (tensor<?x?x?x?x?xf32>)
				%out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (tensor<?x?x?x?x?xf32>)

				%in3D_ndhwc_CCCCC = sparse_tensor.convert %in3D_ndhwc
				: tensor<?x?x?x?x?xf32> to tensor<?x?x?x?x?xf32, #CCCCC>
				%filter3D_ndhwc_CCCCC = sparse_tensor.convert %filter3D_ndhwc
				: tensor<?x?x?x?x?xf32> to tensor<?x?x?x?x?xf32, #CCCCC>

				%in3D_ndhwc_CDCDC = sparse_tensor.convert %in3D_ndhwc
				: tensor<?x?x?x?x?xf32> to tensor<?x?x?x?x?xf32, #CDCDC>
				%filter3D_ndhwc_CDCDC = sparse_tensor.convert %filter3D_ndhwc
				: tensor<?x?x?x?x?xf32> to tensor<?x?x?x?x?xf32, #CDCDC>

				// CHECK:( ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) )
				%dense_ret = call @conv_3d_ndhwc_dhwcf(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc)
				: (tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>) -> (tensor<?x?x?x?x?xf32>)
				%dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0, %c0], %zero
				: tensor<?x?x?x?x?xf32>, vector<1x6x6x6x1xf32>
				vector.print %dense_v : vector<1x6x6x6x1xf32>

				%CCCCC_ret = call @conv_3d_ndhwc_dhwcf_CCCCC(%in3D_ndhwc_CCCCC, %filter3D_ndhwc_CCCCC)
				: (tensor<?x?x?x?x?xf32, #CCCCC>,
				tensor<?x?x?x?x?xf32, #CCCCC>) -> (tensor<?x?x?x?x?xf32, #CCCCC>)

				// CHECK-NEXT:( ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) )
				%1 = sparse_tensor.convert %CCCCC_ret
				: tensor<?x?x?x?x?xf32, #CCCCC> to tensor<?x?x?x?x?xf32>
				%v1 = vector.transfer_read %1[%c0, %c0, %c0, %c0, %c0], %zero
				: tensor<?x?x?x?x?xf32>, vector<1x6x6x6x1xf32>
				vector.print %v1 : vector<1x6x6x6x1xf32>

				%CDCDC_ret = call @conv_3d_ndhwc_dhwcf_CDCDC(%in3D_ndhwc_CDCDC, %filter3D_ndhwc_CDCDC)
				: (tensor<?x?x?x?x?xf32, #CDCDC>,
				tensor<?x?x?x?x?xf32, #CDCDC>) -> (tensor<?x?x?x?x?xf32, #CDCDC>)

				// CHECK-NEXT:( ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
				// CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
				// CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) )
				%2 = sparse_tensor.convert %CDCDC_ret
				: tensor<?x?x?x?x?xf32, #CDCDC> to tensor<?x?x?x?x?xf32>
				%v2 = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0, %c0], %zero
				: tensor<?x?x?x?x?xf32>, vector<1x6x6x6x1xf32>
				vector.print %v2 : vector<1x6x6x6x1xf32>

				return
				}

mlir/unittests/Dialect/SparseTensor/MergerTest.cpp

	Show First 20 Lines • Show All 121 Lines • ▼ Show 20 Lines
	FOREVERY_BINOP(IMPL_BINOP_PATTERN)			FOREVERY_BINOP(IMPL_BINOP_PATTERN)

	#undef IMPL_BINOP_PATTERN			#undef IMPL_BINOP_PATTERN

	class MergerTestBase : public ::testing::Test {			class MergerTestBase : public ::testing::Test {
	protected:			protected:
	MergerTestBase(unsigned numTensors, unsigned numLoops)			MergerTestBase(unsigned numTensors, unsigned numLoops)
	: numTensors(numTensors), numLoops(numLoops),			: numTensors(numTensors), numLoops(numLoops),
	merger(numTensors, numLoops) {}			merger(numTensors, numLoops, /numFilterLoops=/0) {}

	///			///
	/// Expression construction helpers.			/// Expression construction helpers.
	///			///

	unsigned tensor(unsigned tensor) {			unsigned tensor(unsigned tensor) {
	return merger.addExp(Kind::kTensor, tensor);			return merger.addExp(Kind::kTensor, tensor);
	}			}
	▲ Show 20 Lines • Show All 642 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][sparse] support affine expression on sparse dimensions.
AbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 475915

mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h

mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h

mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp

mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp

mlir/test/Dialect/SparseTensor/sparse_affine.mlir

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir

mlir/unittests/Dialect/SparseTensor/MergerTest.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][sparse] support affine expression on sparse dimensions.AbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 475915

mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h

mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h

mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp

mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp

mlir/test/Dialect/SparseTensor/sparse_affine.mlir

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir

mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir

mlir/unittests/Dialect/SparseTensor/MergerTest.cpp

[mlir][sparse] support affine expression on sparse dimensions.
AbandonedPublic