Diff 512288

mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp

Show First 20 Lines • Show All 644 Lines • ▼ Show 20 Lines	convertConstantOpMmaSync(RewriterBase &rewriter, arith::ConstantOp op,

Value result = rewriter.create<arith::ConstantOp>(		Value result = rewriter.create<arith::ConstantOp>(
op.getLoc(), vectorType,		op.getLoc(), vectorType,
DenseElementsAttr::get(vectorType, dense.getSplatValue<Attribute>()));		DenseElementsAttr::get(vectorType, dense.getSplatValue<Attribute>()));
valueMapping[op.getResult()] = result;		valueMapping[op.getResult()] = result;
return success();		return success();
}		}

		/// Check if the loaded matrix operand requires transposed.
		/// Transposed Map Example:
		/// Example 1 : (..., d0, d1) -> (d1 * 1, d0 * 2)
		/// Example 2 : (d0, d1, d2, d3) -> (d3, d2)
		///
		/// The code below checks if the output 2D is transposed using a generalized
		/// version : (d0, d1, dn, ..., dm, ...) -> (dm, dn)
		/// Returns : true; if m > n, false o.w.

		ThomasRaouxUnsubmitted Done Reply Inline Actions nit: remove empty line ThomasRaoux: nit: remove empty line
		static bool isTransposed(vector::TransferReadOp op) {
		mlir::AffineMap map = op.getPermutationMap();
		if (map.getNumResults() != 2) {
		op->emitError("Expected 2D transfer read");
		}
		ThomasRaouxUnsubmitted Done Reply Inline Actions either make it an assert or return false. We should not be emitting error and just continuing. ThomasRaoux: either make it an assert or return false. We should not be emitting error and just continuing.

		// Output 2D matrix dimensions in the order of d0, d1.
		auto dM = map.getResult(0);
		ThomasRaouxUnsubmitted Done Reply Inline Actions the rule is that you should use `auto` only if this is a cast where the type is explicit set or this is a complex type (iterator kind of stuff) so here you need to spell out the type. ThomasRaoux: the rule is that you should use `auto` only if this is a cast where the type is explicit set or…
		auto dN = map.getResult(1);

		// Find the position of these expressions in the input.
		auto exprM = dM.dyn_cast<AffineDimExpr>();
		auto exprN = dN.dyn_cast<AffineDimExpr>();
		if (!exprM \|\| !exprN) {
		op->emitError("Expected to find AffineDimExpr in vector::TransferReadOp");
		ThomasRaouxUnsubmitted Done Reply Inline Actions Same as above emitting an error here doesn't sound right, either we should have code preventing such case before and then assert or return false or propagate an error. ThomasRaoux: Same as above emitting an error here doesn't sound right, either we should have code preventing…
		}
		return exprM.getPosition() > exprN.getPosition();
		}

static LogicalResult		static LogicalResult
creatLdMatrixCompatibleLoads(RewriterBase &rewriter, vector::TransferReadOp op,		creatLdMatrixCompatibleLoads(RewriterBase &rewriter, vector::TransferReadOp op,
llvm::DenseMap<Value, Value> &valueMapping) {		llvm::DenseMap<Value, Value> &valueMapping) {
OpBuilder::InsertionGuard g(rewriter);		OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(op);		rewriter.setInsertionPoint(op);
Location loc = op->getLoc();		Location loc = op->getLoc();

FailureOr<nvgpu::WarpMatrixInfo> warpMatrixInfo =		FailureOr<nvgpu::WarpMatrixInfo> warpMatrixInfo =
nvgpu::getWarpMatrixInfo(op);		nvgpu::getWarpMatrixInfo(op);
if (failed(warpMatrixInfo)) {		if (failed(warpMatrixInfo)) {
LLVM_DEBUG(DBGS() << "no warpMatrixInfo\n");		LLVM_DEBUG(DBGS() << "no warpMatrixInfo\n");
return rewriter.notifyMatchFailure(op, "no warpMatrixInfo");		return rewriter.notifyMatchFailure(op, "no warpMatrixInfo");
}		}

FailureOr<nvgpu::FragmentElementInfo> regInfo =		FailureOr<nvgpu::FragmentElementInfo> regInfo =
nvgpu::getMmaSyncRegisterType(*warpMatrixInfo);		nvgpu::getMmaSyncRegisterType(*warpMatrixInfo);
if (failed(regInfo)) {		if (failed(regInfo)) {
LLVM_DEBUG(DBGS() << "not mma sync reg info\n");		LLVM_DEBUG(DBGS() << "not mma sync reg info\n");
return rewriter.notifyMatchFailure(op, "not mma sync reg info");		return rewriter.notifyMatchFailure(op, "not mma sync reg info");
}		}

FailureOr<nvgpu::LdMatrixParams> params = nvgpu::getLdMatrixParams(		FailureOr<nvgpu::LdMatrixParams> params =
*warpMatrixInfo,		nvgpu::getLdMatrixParams(*warpMatrixInfo,
/transpose=/!op.getPermutationMap().isMinorIdentity());		/transpose=/isTransposed(op));
		ThomasRaouxUnsubmitted Done Reply Inline Actions nit `/transpose=/` doesn't add much anymore I would remove it. ThomasRaoux: nit `/transpose=/` doesn't add much anymore I would remove it.

if (failed(params)) {		if (failed(params)) {
LLVM_DEBUG(		LLVM_DEBUG(
DBGS()		DBGS()
<< "failed to convert vector.transfer_read to ldmatrix. "		<< "failed to convert vector.transfer_read to ldmatrix. "
<< "Op should likely not be converted to a nvgpu.ldmatrix call.\n");		<< "Op should likely not be converted to a nvgpu.ldmatrix call.\n");
return rewriter.notifyMatchFailure(		return rewriter.notifyMatchFailure(
op, "failed to convert vector.transfer_read to ldmatrix; this op "		op, "failed to convert vector.transfer_read to ldmatrix; this op "
"likely should not be converted to a nvgpu.ldmatrix call.");		"likely should not be converted to a nvgpu.ldmatrix call.");
Show All 10 Lines	creatLdMatrixCompatibleLoads(RewriterBase &rewriter, vector::TransferReadOp op,

VectorType vectorType = getMmaSyncVectorOperandType(*regInfo);		VectorType vectorType = getMmaSyncVectorOperandType(*regInfo);

SmallVector<Value, 4> indices;		SmallVector<Value, 4> indices;
getXferIndices<vector::TransferReadOp>(rewriter, op, *offsets, {laneId},		getXferIndices<vector::TransferReadOp>(rewriter, op, *offsets, {laneId},
indices);		indices);
nvgpu::LdMatrixOp newOp = rewriter.create<nvgpu::LdMatrixOp>(		nvgpu::LdMatrixOp newOp = rewriter.create<nvgpu::LdMatrixOp>(
loc, vectorType, op.getSource(), indices,		loc, vectorType, op.getSource(), indices,
!op.getPermutationMap().isMinorIdentity(), params->numTiles);		/transpose=/isTransposed(op), params->numTiles);
valueMapping[op] = newOp->getResult(0);		valueMapping[op] = newOp->getResult(0);
return success();		return success();
}		}

static LogicalResult		static LogicalResult
createNonLdMatrixLoads(RewriterBase &rewriter, vector::TransferReadOp op,		createNonLdMatrixLoads(RewriterBase &rewriter, vector::TransferReadOp op,
llvm::DenseMap<Value, Value> &valueMapping) {		llvm::DenseMap<Value, Value> &valueMapping) {
OpBuilder::InsertionGuard g(rewriter);		OpBuilder::InsertionGuard g(rewriter);
▲ Show 20 Lines • Show All 571 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][vector-to-gpu] Fix the Transpose Check in `mma.sync` VectorToGPU Lowering Path
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 512288

mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][vector-to-gpu] Fix the Transpose Check in `mma.sync` VectorToGPU Lowering PathClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 512288

mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp

[mlir][vector-to-gpu] Fix the Transpose Check in `mma.sync` VectorToGPU Lowering Path
ClosedPublic