Diff 524974

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Show First 20 Lines • Show All 1,896 Lines • ▼ Show 20 Lines	def GPU_SpMMBufferSizeOp : GPU_Op<"spmm_buffer_size", [GPU_AsyncOpInterface]> {
let results = (outs Res<Index>:$bufferSz, Optional<GPU_AsyncToken>:$asyncToken);		let results = (outs Res<Index>:$bufferSz, Optional<GPU_AsyncToken>:$asyncToken);

let assemblyFormat = [{		let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$env `,` $spmatA `,` $dnmatB `,` $dnmatC attr-dict		$env `,` $spmatA `,` $dnmatB `,` $dnmatC attr-dict
}];		}];
}		}

		// TODO: add GPU_SDDMMOp

def GPU_SpMMOp : GPU_Op<"spmm", [GPU_AsyncOpInterface]> {		def GPU_SpMMOp : GPU_Op<"spmm", [GPU_AsyncOpInterface]> {
let summary = "SpMM operation";		let summary = "SpMM operation";
let description = [{		let description = [{
The `gpu.spmm` operation performs the SpMM operation on the given sparse and		The `gpu.spmm` operation performs the SpMM operation on the given sparse and
dense matrix, and buffer. The operation expects handles returned by previous		dense matrix, and buffer. The operation expects handles returned by previous
sparse operations to construct an environment and the operands for SpMM. The		sparse operations to construct an environment and the operands for SpMM. The
buffer must have been allocated on the device.		buffer must have been allocated on the device.

Show All 17 Lines	def GPU_SpMMOp : GPU_Op<"spmm", [GPU_AsyncOpInterface]> {
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);		let results = (outs Optional<GPU_AsyncToken>:$asyncToken);

let assemblyFormat = [{		let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$env `,` $spmatA `,` $dnmatB `,` $dnmatC `,` $buffer attr-dict `:` type($buffer)		$env `,` $spmatA `,` $dnmatB `,` $dnmatC `,` $buffer attr-dict `:` type($buffer)
}];		}];
}		}

#endif // GPU_OPS		#endif // GPU_OPS
		PeimingUnsubmitted Done Reply Inline Actions bad indentation here Peiming: bad indentation here
		K-WuAuthorUnsubmitted Done Reply Inline Actions Good catch. Thank you! K-Wu: Good catch. Thank you!
		aartbikUnsubmitted Not Done Reply Inline Actions matrix -> matrices (since we have dense A,B now) aartbik: matrix -> matrices (since we have dense A,B now)

mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp

Show First 20 Lines • Show All 590 Lines • ▼ Show 20 Lines	public:
}		}

private:		private:
LogicalResult		LogicalResult
matchAndRewrite(gpu::SpMMBufferSizeOp op, OpAdaptor adaptor,		matchAndRewrite(gpu::SpMMBufferSizeOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;		ConversionPatternRewriter &rewriter) const override;
};		};

		// TODO: impl SDDMM Op lowering pass here

class ConvertSpMMOpToGpuRuntimeCallPattern		class ConvertSpMMOpToGpuRuntimeCallPattern
: public ConvertOpToGpuRuntimeCallPattern<gpu::SpMMOp> {		: public ConvertOpToGpuRuntimeCallPattern<gpu::SpMMOp> {
public:		public:
ConvertSpMMOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)		ConvertSpMMOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)
: ConvertOpToGpuRuntimeCallPattern<gpu::SpMMOp>(typeConverter) {}		: ConvertOpToGpuRuntimeCallPattern<gpu::SpMMOp>(typeConverter) {}

private:		private:
LogicalResult		LogicalResult
▲ Show 20 Lines • Show All 873 Lines • ▼ Show 20 Lines	void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
});		});
converter.addConversion([&converter](gpu::SparseHandleType type) -> Type {		converter.addConversion([&converter](gpu::SparseHandleType type) -> Type {
return converter.getPointerType(		return converter.getPointerType(
IntegerType::get(&converter.getContext(), 8));		IntegerType::get(&converter.getContext(), 8));
});		});

patterns.add<ConvertAllocOpToGpuRuntimeCallPattern,		patterns.add<ConvertAllocOpToGpuRuntimeCallPattern,
ConvertDeallocOpToGpuRuntimeCallPattern,		ConvertDeallocOpToGpuRuntimeCallPattern,
ConvertHostRegisterOpToGpuRuntimeCallPattern,		ConvertHostRegisterOpToGpuRuntimeCallPattern,
		aartbikUnsubmitted Not Done Reply Inline Actions note that we have this test + convert test so many times now, that perhaps a helper will be useful (next revision is fine) aartbik: note that we have this test + convert test so many times now, that perhaps a helper will be…
ConvertHostUnregisterOpToGpuRuntimeCallPattern,		ConvertHostUnregisterOpToGpuRuntimeCallPattern,
ConvertMemcpyOpToGpuRuntimeCallPattern,		ConvertMemcpyOpToGpuRuntimeCallPattern,
ConvertMemsetOpToGpuRuntimeCallPattern,		ConvertMemsetOpToGpuRuntimeCallPattern,
ConvertSetDefaultDeviceOpToGpuRuntimeCallPattern,		ConvertSetDefaultDeviceOpToGpuRuntimeCallPattern,
ConvertWaitAsyncOpToGpuRuntimeCallPattern,		ConvertWaitAsyncOpToGpuRuntimeCallPattern,
ConvertWaitOpToGpuRuntimeCallPattern,		ConvertWaitOpToGpuRuntimeCallPattern,
ConvertAsyncYieldToGpuRuntimeCallPattern,		ConvertAsyncYieldToGpuRuntimeCallPattern,
ConvertCreateSparseEnvOpToGpuRuntimeCallPattern,		ConvertCreateSparseEnvOpToGpuRuntimeCallPattern,
Show All 16 Lines

mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp

Show First 20 Lines • Show All 494 Lines • ▼ Show 20 Lines	static LogicalResult rewriteSpMV(PatternRewriter &rewriter,
genBlockingWait(rewriter, loc, tokens);		genBlockingWait(rewriter, loc, tokens);
tokens.clear();		tokens.clear();

// Done.		// Done.
rewriter.replaceOp(op, op.getDpsInitOperand(0)->get());		rewriter.replaceOp(op, op.getDpsInitOperand(0)->get());
return success();		return success();
}		}

		// TODO: implement SDDMM rewriter here

/// Match and rewrite SpMM kernel.		/// Match and rewrite SpMM kernel.
static LogicalResult rewriteSpMM(PatternRewriter &rewriter,		static LogicalResult rewriteSpMM(PatternRewriter &rewriter,
linalg::GenericOp op, bool enableRT) {		linalg::GenericOp op, bool enableRT) {
Location loc = op.getLoc();		Location loc = op.getLoc();
Value a = op.getOperand(0);		Value a = op.getOperand(0);
Value b = op.getOperand(1);		Value b = op.getOperand(1);
Value c = op.getOperand(2); // we have C = AB		Value c = op.getOperand(2); // we have C = AB
SmallVector<Value> tokens;		SmallVector<Value> tokens;
▲ Show 20 Lines • Show All 174 Lines • ▼ Show 20 Lines	LogicalResult matchAndRewrite(scf::ParallelOp forallOp,
tokens.clear();		tokens.clear();
Value kernelToken =		Value kernelToken =
genLaunchGPUFunc(rewriter, gpuFunc, args, tokens, numThreads);		genLaunchGPUFunc(rewriter, gpuFunc, args, tokens, numThreads);
// Finalize the outlined arguments.		// Finalize the outlined arguments.
genParametersOut(rewriter, loc, out, kernelToken, scalars, buffers, args,		genParametersOut(rewriter, loc, out, kernelToken, scalars, buffers, args,
tokens);		tokens);
genBlockingWait(rewriter, loc, tokens);		genBlockingWait(rewriter, loc, tokens);
rewriter.eraseOp(forallOp);		rewriter.eraseOp(forallOp);
return success();		return success();
		aartbikUnsubmitted Not Done Reply Inline Actions Note that I made a small refinement in https://reviews.llvm.org/D151404 which seems safer: keep them all on the same stream (so no blocking wait here) and move the copy after the copy-in only and buffer are released aartbik: Note that I made a small refinement in https://reviews.llvm.org/D151404 which seems safer: keep…
		K-WuAuthorUnsubmitted Done Reply Inline Actions noted. I will update this and mark done once I incorporate the rebased-pull into this diff. K-Wu: noted. I will update this and mark done once I incorporate the rebased-pull into this diff.
}		}

private:		private:
// Helper method to see if block appears in given loop.		// Helper method to see if block appears in given loop.
		aartbikUnsubmitted Not Done Reply Inline Actions sparse output will be more complex than this btw aartbik: sparse output will be more complex than this btw
static bool isNestedIn(Block *block, scf::ParallelOp forallOp) {		static bool isNestedIn(Block *block, scf::ParallelOp forallOp) {
for (Operation *o = block->getParentOp(); o; o = o->getParentOp()) {		for (Operation *o = block->getParentOp(); o; o = o->getParentOp()) {
if (o == forallOp)		if (o == forallOp)
return true;		return true;
}		}
return false;		return false;
}		}

▲ Show 20 Lines • Show All 77 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][gpu][sparse] adding initial cusparse sddmm libgen support
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 524974

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp

mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][gpu][sparse] adding initial cusparse sddmm libgen supportClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 524974

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp

mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp

[mlir][gpu][sparse] adding initial cusparse sddmm libgen support
ClosedPublic