This is an archive of the discontinued LLVM Phabricator instance.

[mlir][sparse] Avoid values buffer reallocation for annotated all dense tensors.
ClosedPublic

Authored by bixia on Jan 11 2023, 9:08 AM.

Download Raw Diff

Details

Reviewers

aartbik
nicolasvasilache
Peiming

Commits

rGb78b54737125: [mlir][sparse] Avoid values buffer reallocation for annotated all dense tensors.

Summary

Previously, we rely on the InsertOp to gradually increase the size of the
storage for all sparse tensors. We now allocate the full size values buffer
for annotated all dense tensors when we first allocate the tensor. This avoids
the cost of gradually increasing the buffer and allows accessing the values
buffer as if it were a dense tensor.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

bixia created this revision.Jan 11 2023, 9:08 AM

Herald added a reviewer: aartbik. · View Herald TranscriptJan 11 2023, 9:08 AM

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: hanchung, jsetoain, Moerafaat and 23 others. · View Herald Transcript

bixia requested review of this revision.Jan 11 2023, 9:08 AM

Herald added a reviewer: nicolasvasilache. · View Herald TranscriptJan 11 2023, 9:08 AM

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: stephenneuendorffer, nicolasvasilache. · View Herald Transcript

bixia added a reviewer: Peiming.Jan 11 2023, 9:08 AM

Harbormaster completed remote builds in B207114: Diff 488252.Jan 11 2023, 11:20 AM

Peiming added inline comments.Jan 11 2023, 3:26 PM

mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
244–247	LG, but you can probably do the same optimization for idxMemRef at the last level, right?

Peiming added inline comments.Jan 11 2023, 3:28 PM

mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
244–247	NVM, you are doing this for all dense tensors. Probably we should extend the alloc tensor to take some heuristic, there are many time when the NNZ can be computed.

bixia added inline comments.Jan 11 2023, 3:36 PM

mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
244–247	Right, there is already a TODO in the code for improving the heuristic.

Peiming accepted this revision.Jan 11 2023, 3:37 PM

This revision is now accepted and ready to land.Jan 11 2023, 3:37 PM

Closed by commit rGb78b54737125: [mlir][sparse] Avoid values buffer reallocation for annotated all dense tensors. (authored by bixia). · Explain WhyJan 11 2023, 4:31 PM

This revision was automatically updated to reflect the committed changes.

bixia added a commit: rGb78b54737125: [mlir][sparse] Avoid values buffer reallocation for annotated all dense tensors..

Revision Contents

Path

Size

mlir/

lib/

Dialect/

SparseTensor/

Transforms/

SparseTensorCodegen.cpp

38 lines

test/

Dialect/

SparseTensor/

codegen.mlir

4 lines

Diff 488422

mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp

	Show First 20 Lines • Show All 199 Lines • ▼ Show 20 Lines
	///			///
	/// TODO: for efficiency, we will need heuristis to make educated guesses			/// TODO: for efficiency, we will need heuristis to make educated guesses
	/// on the required capacities (see heuristic variable).			/// on the required capacities (see heuristic variable).
	///			///
	static void createAllocFields(OpBuilder &builder, Location loc, Type type,			static void createAllocFields(OpBuilder &builder, Location loc, Type type,
	ValueRange dynSizes, bool enableInit,			ValueRange dynSizes, bool enableInit,
	SmallVectorImpl<Value> &fields) {			SmallVectorImpl<Value> &fields) {
	RankedTensorType rtp = type.cast<RankedTensorType>();			RankedTensorType rtp = type.cast<RankedTensorType>();
	Value heuristic = constantIndex(builder, loc, 16);			// Build original sizes.
				SmallVector<Value> sizes;
				auto shape = rtp.getShape();
				unsigned rank = shape.size();
				for (unsigned r = 0, o = 0; r < rank; r++) {
				if (ShapedType::isDynamic(shape[r]))
				sizes.push_back(dynSizes[o++]);
				else
				sizes.push_back(constantIndex(builder, loc, shape[r]));
				}

				Value heuristic = constantIndex(builder, loc, 16);
				Value valHeuristic = heuristic;
				SparseTensorEncodingAttr enc = getSparseTensorEncoding(rtp);
				if (enc.isAllDense()) {
				Value linear = sizes[0];
				for (unsigned r = 1; r < rank; r++) {
				linear = builder.create<arith::MulIOp>(loc, linear, sizes[r]);
				}
				valHeuristic = linear;
				}
	foreachFieldAndTypeInSparseTensor(			foreachFieldAndTypeInSparseTensor(
	rtp,			rtp,
	[&builder, &fields, rtp, loc, heuristic,			[&builder, &fields, rtp, loc, heuristic, valHeuristic,
	enableInit](Type fType, unsigned fIdx, SparseTensorFieldKind fKind,			enableInit](Type fType, unsigned fIdx, SparseTensorFieldKind fKind,
	unsigned /dim/, DimLevelType /dlt/) -> bool {			unsigned /dim/, DimLevelType /dlt/) -> bool {
	assert(fields.size() == fIdx);			assert(fields.size() == fIdx);
	Value field;			Value field;
	switch (fKind) {			switch (fKind) {
	case SparseTensorFieldKind::StorageSpec:			case SparseTensorFieldKind::StorageSpec:
	field = SparseTensorSpecifier::getInitValue(builder, loc, rtp);			field = SparseTensorSpecifier::getInitValue(builder, loc, rtp);
	break;			break;
	case SparseTensorFieldKind::PtrMemRef:			case SparseTensorFieldKind::PtrMemRef:
	case SparseTensorFieldKind::IdxMemRef:			case SparseTensorFieldKind::IdxMemRef:
	case SparseTensorFieldKind::ValMemRef:			case SparseTensorFieldKind::ValMemRef:
	field = createAllocation(builder, loc, fType.cast<MemRefType>(),			field = createAllocation(builder, loc, fType.cast<MemRefType>(),
	heuristic, enableInit);			fKind == SparseTensorFieldKind::ValMemRef
				? valHeuristic
				: heuristic,
				enableInit);
				PeimingUnsubmitted Not Done Reply Inline Actions LG, but you can probably do the same optimization for idxMemRef at the last level, right? Peiming: LG, but you can probably do the same optimization for idxMemRef at the last level, right?
				PeimingUnsubmitted Not Done Reply Inline Actions NVM, you are doing this for all dense tensors. Probably we should extend the alloc tensor to take some heuristic, there are many time when the NNZ can be computed. Peiming: NVM, you are doing this for all dense tensors. Probably we should extend the alloc tensor to…
				bixiaAuthorUnsubmitted Done Reply Inline Actions Right, there is already a TODO in the code for improving the heuristic. bixia: Right, there is already a TODO in the code for improving the heuristic.
	break;			break;
	}			}
	assert(field);			assert(field);
	fields.push_back(field);			fields.push_back(field);
	// Returns true to continue the iteration.			// Returns true to continue the iteration.
	return true;			return true;
	});			});

	MutSparseTensorDescriptor desc(rtp, fields);			MutSparseTensorDescriptor desc(rtp, fields);

	// Build original sizes.
	SmallVector<Value> sizes;
	auto shape = rtp.getShape();
	unsigned rank = shape.size();
	for (unsigned r = 0, o = 0; r < rank; r++) {
	if (ShapedType::isDynamic(shape[r]))
	sizes.push_back(dynSizes[o++]);
	else
	sizes.push_back(constantIndex(builder, loc, shape[r]));
	}
	// Initialize the storage scheme to an empty tensor. Initialized memSizes			// Initialize the storage scheme to an empty tensor. Initialized memSizes
	// to all zeros, sets the dimSizes to known values and gives all pointer			// to all zeros, sets the dimSizes to known values and gives all pointer
	// fields an initial zero entry, so that it is easier to maintain the			// fields an initial zero entry, so that it is easier to maintain the
	// "linear + 1" length property.			// "linear + 1" length property.
	Value ptrZero =			Value ptrZero =
	constantZero(builder, loc, getSparseTensorEncoding(rtp).getPointerType());			constantZero(builder, loc, getSparseTensorEncoding(rtp).getPointerType());
	for (unsigned r = 0; r < rank; r++) {			for (unsigned r = 0; r < rank; r++) {
	unsigned ro = toOrigDim(rtp, r);			unsigned ro = toOrigDim(rtp, r);
	▲ Show 20 Lines • Show All 773 Lines • Show Last 20 Lines

mlir/test/Dialect/SparseTensor/codegen.mlir

	Show First 20 Lines • Show All 339 Lines • ▼ Show 20 Lines
	}			}

	// CHECK-LABEL: func.func @sparse_alloc_3d() -> (memref<?xf64>, !sparse_tensor.storage_specifier			// CHECK-LABEL: func.func @sparse_alloc_3d() -> (memref<?xf64>, !sparse_tensor.storage_specifier
	// CHECK: %[[A0:.*]] = arith.constant 6000 : index			// CHECK: %[[A0:.*]] = arith.constant 6000 : index
	// CHECK: %[[A1:.*]] = arith.constant 20 : i64			// CHECK: %[[A1:.*]] = arith.constant 20 : i64
	// CHECK: %[[A2:.*]] = arith.constant 10 : i64			// CHECK: %[[A2:.*]] = arith.constant 10 : i64
	// CHECK: %[[A3:.*]] = arith.constant 30 : i64			// CHECK: %[[A3:.*]] = arith.constant 30 : i64
	// CHECK: %[[A4:.*]] = arith.constant 0.000000e+00 : f64			// CHECK: %[[A4:.*]] = arith.constant 0.000000e+00 : f64
	// CHECK: %[[A5:.*]] = memref.alloc() : memref<16xf64>			// CHECK: %[[A5:.*]] = memref.alloc() : memref<6000xf64>
	// CHECK: %[[A6:.*]] = memref.cast %[[A5]] : memref<16xf64> to memref<?xf64>			// CHECK: %[[A6:.*]] = memref.cast %[[A5]] : memref<6000xf64> to memref<?xf64>
	// CHECK: %[[A7:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier			// CHECK: %[[A7:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier
	// CHECK: %[[A8:.*]] = sparse_tensor.storage_specifier.set %[[A7]] dim_sz at 0 with %[[A3]] : i64, !sparse_tensor.storage_specifier			// CHECK: %[[A8:.*]] = sparse_tensor.storage_specifier.set %[[A7]] dim_sz at 0 with %[[A3]] : i64, !sparse_tensor.storage_specifier
	// CHECK: %[[A9:.*]] = sparse_tensor.storage_specifier.set %[[A8]] dim_sz at 1 with %[[A2]] : i64, !sparse_tensor.storage_specifier			// CHECK: %[[A9:.*]] = sparse_tensor.storage_specifier.set %[[A8]] dim_sz at 1 with %[[A2]] : i64, !sparse_tensor.storage_specifier
	// CHECK: %[[A10:.*]] = sparse_tensor.storage_specifier.set %[[A9]] dim_sz at 2 with %[[A1]] : i64, !sparse_tensor.storage_specifier			// CHECK: %[[A10:.*]] = sparse_tensor.storage_specifier.set %[[A9]] dim_sz at 2 with %[[A1]] : i64, !sparse_tensor.storage_specifier
	// CHECK: %[[A11:.*]] = sparse_tensor.storage_specifier.get %[[A10]] val_mem_sz : !sparse_tensor.storage_specifier			// CHECK: %[[A11:.*]] = sparse_tensor.storage_specifier.get %[[A10]] val_mem_sz : !sparse_tensor.storage_specifier
	// CHECK: %[[A12:.*]] = arith.index_cast %[[A11]] : i64 to index			// CHECK: %[[A12:.*]] = arith.index_cast %[[A11]] : i64 to index
	// CHECK: %[[A13:.]], %[[A14:.]] = sparse_tensor.push_back %[[A12]], %[[A6]], %[[A4]], %[[A0]] : index, memref<?xf64>, f64, index			// CHECK: %[[A13:.]], %[[A14:.]] = sparse_tensor.push_back %[[A12]], %[[A6]], %[[A4]], %[[A0]] : index, memref<?xf64>, f64, index
	// CHECK: %[[A15:.*]] = arith.index_cast %[[A14]] : index to i64			// CHECK: %[[A15:.*]] = arith.index_cast %[[A14]] : index to i64
	▲ Show 20 Lines • Show All 300 Lines • Show Last 20 Lines