Index: mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp =================================================================== --- mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp +++ mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp @@ -650,6 +650,34 @@ return success(); } +/// Check if the loaded matrix operand requires transposed. +/// Transposed Map Example: +/// Example 1 : (..., d0, d1) -> (d1 * 1, d0 * 2) +/// Example 2 : (d0, d1, d2, d3) -> (d3, d2) +/// +/// The code below checks if the output 2D is transposed using a generalized +/// version : (d0, d1, dn, ..., dm, ...) -> (dm, dn) +/// Returns : true; if m > n, false o.w. + +static bool isTransposed(vector::TransferReadOp op) { + mlir::AffineMap map = op.getPermutationMap(); + if (map.getNumResults() != 2) { + op->emitError("Expected 2D transfer read"); + } + + // Output 2D matrix dimensions in the order of d0, d1. + auto dM = map.getResult(0); + auto dN = map.getResult(1); + + // Find the position of these expressions in the input. + auto exprM = dM.dyn_cast(); + auto exprN = dN.dyn_cast(); + if (!exprM || !exprN) { + op->emitError("Expected to find AffineDimExpr in vector::TransferReadOp"); + } + return exprM.getPosition() > exprN.getPosition(); +} + static LogicalResult creatLdMatrixCompatibleLoads(RewriterBase &rewriter, vector::TransferReadOp op, llvm::DenseMap &valueMapping) { @@ -671,9 +699,10 @@ return rewriter.notifyMatchFailure(op, "not mma sync reg info"); } - FailureOr params = nvgpu::getLdMatrixParams( - *warpMatrixInfo, - /*transpose=*/!op.getPermutationMap().isMinorIdentity()); + FailureOr params = + nvgpu::getLdMatrixParams(*warpMatrixInfo, + /*transpose=*/isTransposed(op)); + if (failed(params)) { LLVM_DEBUG( DBGS() @@ -700,7 +729,7 @@ indices); nvgpu::LdMatrixOp newOp = rewriter.create( loc, vectorType, op.getSource(), indices, - !op.getPermutationMap().isMinorIdentity(), params->numTiles); + /*transpose=*/isTransposed(op), params->numTiles); valueMapping[op] = newOp->getResult(0); return success(); }