This is an archive of the discontinued LLVM Phabricator instance.

mlir/test/Dialect/Linalg/vectorization.mlir
320	Missing `d0 + s0` expansion :). It would be good to add more tests if the bug is not specific to symbols. I would also add cases for something like `(d0...) -> (0)` or even `(d0...) -> ()`

awarzynski added inline comments.Feb 7 2023, 3:14 AM

mlir/test/Dialect/Linalg/vectorization.mlir

320

Missing d0 + s0 expansion :)

I think that it's actually there.

INPUT

#map0 = affine_map<(d0) -> (d0)>

func.func @vectorize_affine_apply(%arg0: tensor<32xf32>, %arg3: index) -> tensor<32xi32> {
  %0 = tensor.empty() : tensor<32xi32>
  %1 = linalg.generic {indexing_maps = [#map0, #map0],
                       iterator_types = ["parallel"]}
    ins(%arg0 : tensor<32xf32>)
    outs(%0 : tensor<32xi32>) {
  ^bb0(%arg1: f32, %arg2: i32):
    %2 = linalg.index 0 : index
    %12 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg3)
    %13 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%12)[%arg3]
    %3 = arith.index_cast %13 : index to i32
    linalg.yield %3 : i32
  } -> tensor<32xi32>
  return %1 : tensor<32xi32>
}

OUTPUT (with comments where the expansions happens):

module {
  func.func @vectorize_affine_apply_3(%arg0: tensor<32xf32>, %arg1: index) -> tensor<32xi32> {
    %cst = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]> : vector<32xindex>
    %c0 = arith.constant 0 : index
   %0 = tensor.empty() : tensor<32xi32>
    %1 = vector.broadcast %arg1 : index to vector<32xindex>
    // EXPANSION 1: %12 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg3)
    %2 = arith.addi %1, %cst : vector<32xindex>
    %3 = vector.broadcast %arg1 : index to vector<32xindex>
    // EXPANSION 2: %13 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%12)[%arg3]
    %4 = arith.addi %2, %3 : vector<32xindex>
    %5 = arith.index_cast %4 : vector<32xindex> to vector<32xi32>
    %6 = vector.transfer_write %5, %0[%c0] {in_bounds = [true]} : vector<32xi32>, tensor<32xi32>
    return %6 : tensor<32xi32>
  }
  transform.sequence  failures(propagate) {
  ^bb0(%arg0: !pdl.operation):
    %0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!pdl.operation) -> !pdl.operation
    %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation
    %2 = transform.structured.vectorize %1 {vectorize_nd_extract}
  }
}

It would be good to add more tests if the bug is not specific to symbols
I would also add cases for something like (d0...) -> (0) or even (d0...) -> ()

Added in https://reviews.llvm.org/D143429. Here's one other case that's not yet tested, though present in your example in https://reviews.llvm.org/D142371: (d0, d1, d2) -> (d1 + d2 + d3). That also seems to work 🤔 .

Revision Contents

Path

Size

mlir/

lib/

Dialect/

Linalg/

Transforms/

Vectorization.cpp

14 lines

test/

Dialect/

Linalg/

vectorization.mlir

7 lines

Diff 494530

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

Show First 20 Lines • Show All 954 Lines • ▼ Show 20 Lines	static LogicalResult reductionPreconditions(LinalgOp op) {
return success();		return success();
}		}

static LogicalResult vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op) {		static LogicalResult vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op) {
// TODO: Masking only supports dynamic generic ops for now.		// TODO: Masking only supports dynamic generic ops for now.
if (!isa<linalg::GenericOp>(op))		if (!isa<linalg::GenericOp>(op))
return failure();		return failure();

		// TODO: Index vectorization assumes static shape.
		if (op.hasIndexSemantics())
		return failure();

// TODO: 0-d vectors are not supported yet.		// TODO: 0-d vectors are not supported yet.
if (llvm::any_of(op.getIndexingMapsArray(), [](AffineMap map) {		if (llvm::any_of(op.getIndexingMapsArray(), [](AffineMap map) {
return map.isEmpty() \|\| map.getResults().empty();		return map.isEmpty() \|\| map.getResults().empty();
}))		}))
return failure();		return failure();

LDBG("Dynamically-shaped op meets vectorization pre-conditions\n");		LDBG("Dynamically-shaped op meets vectorization pre-conditions\n");
return success();		return success();
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines	if (failed(reductionPreconditions(linalgOp))) {
LDBG("precondition failed: reduction preconditions\n");		LDBG("precondition failed: reduction preconditions\n");
return failure();		return failure();
}		}
return success();		return success();
}		}

/// Converts affine.apply Ops to arithmetic operations.		/// Converts affine.apply Ops to arithmetic operations.
static void convertAffineApply(RewriterBase &rewriter, LinalgOp linalgOp) {		static void convertAffineApply(RewriterBase &rewriter, LinalgOp linalgOp) {
auto &newIP = linalgOp.getBlock()->front();
OpBuilder::InsertionGuard g(rewriter);		OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPointAfter(&newIP);
auto toReplace = linalgOp.getBlock()->getOps<AffineApplyOp>();		auto toReplace = linalgOp.getBlock()->getOps<AffineApplyOp>();

for (auto op : make_early_inc_range(toReplace)) {		for (auto op : make_early_inc_range(toReplace)) {
auto expanded =		rewriter.setInsertionPoint(op);
expandAffineExpr(rewriter, op->getLoc(), op.getAffineMap().getResult(0),		auto expanded = expandAffineExpr(
op.getOperands(), ValueRange{});		rewriter, op->getLoc(), op.getAffineMap().getResult(0),
		op.getOperands().take_front(op.getAffineMap().getNumDims()),
		op.getOperands().take_back(op.getAffineMap().getNumSymbols()));
rewriter.replaceOp(op, expanded);		rewriter.replaceOp(op, expanded);
}		}
}		}

/// Emit a suitable vector form for a Linalg op. If provided, `inputVectorSizes`		/// Emit a suitable vector form for a Linalg op. If provided, `inputVectorSizes`
/// are used to vectorize this operation. `inputVectorSizes` must match the rank		/// are used to vectorize this operation. `inputVectorSizes` must match the rank
/// of the iteration space of the operation and the input vector sizes must be		/// of the iteration space of the operation and the input vector sizes must be
/// greater than or equal to their counterpart iteration space sizes, if static.		/// greater than or equal to their counterpart iteration space sizes, if static.
▲ Show 20 Lines • Show All 1,426 Lines • Show Last 20 Lines

mlir/test/Dialect/Linalg/vectorization.mlir

Show First 20 Lines • Show All 295 Lines • ▼ Show 20 Lines	func.func @vectorize_affine_apply(%arg0: tensor<32xf32>, %arg3: index) -> tensor<32xi32> {
%0 = tensor.empty() : tensor<32xi32>		%0 = tensor.empty() : tensor<32xi32>
%1 = linalg.generic {indexing_maps = [#map0, #map0],		%1 = linalg.generic {indexing_maps = [#map0, #map0],
iterator_types = ["parallel"]}		iterator_types = ["parallel"]}
ins(%arg0 : tensor<32xf32>)		ins(%arg0 : tensor<32xf32>)
outs(%0 : tensor<32xi32>) {		outs(%0 : tensor<32xi32>) {
^bb0(%arg1: f32, %arg2: i32):		^bb0(%arg1: f32, %arg2: i32):
%2 = linalg.index 0 : index		%2 = linalg.index 0 : index
%12 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg3)		%12 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg3)
%3 = arith.index_cast %12 : index to i32		%13 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%12)[%arg3]
		%3 = arith.index_cast %13 : index to i32
linalg.yield %3 : i32		linalg.yield %3 : i32
} -> tensor<32xi32>		} -> tensor<32xi32>
return %1 : tensor<32xi32>		return %1 : tensor<32xi32>
}		}

// CHECK-LABEL: func.func @vectorize_affine_apply		// CHECK-LABEL: func.func @vectorize_affine_apply
// CHECK-SAME: %arg0: tensor<32xf32>		// CHECK-SAME: %arg0: tensor<32xf32>
// CHECK-SAME: %[[ARG1:.*]]: index		// CHECK-SAME: %[[ARG1:.*]]: index
// CHECK: %[[CST:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]> : vector<32xindex>		// CHECK: %[[CST:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]> : vector<32xindex>
// CHECK: %[[C0:.*]] = arith.constant 0 : index		// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<32xi32>		// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<32xi32>
// CHECK: %[[BCAST:.*]] = vector.broadcast %[[ARG1]] : index to vector<32xindex>		// CHECK: %[[BCAST:.*]] = vector.broadcast %[[ARG1]] : index to vector<32xindex>
// CHECK: %[[ADDI:.*]] = arith.addi %[[BCAST]], %[[CST]] : vector<32xindex>		// CHECK: %[[ADDI:.*]] = arith.addi %[[BCAST]], %[[CST]] : vector<32xindex>
// CHECK: %[[CAST:.*]] = arith.index_cast %[[ADDI]] : vector<32xindex> to vector<32xi32>		// CHECK: %[[BCAST2:.*]] = vector.broadcast %[[ARG1]] : index to vector<32xindex>
		// CHECK: %[[ADDI2:.*]] = arith.addi %[[ADDI]], %[[BCAST2]] : vector<32xindex>
		dcaballeUnsubmitted Not Done Reply Inline Actions Missing `d0 + s0` expansion :). It would be good to add more tests if the bug is not specific to symbols. I would also add cases for something like `(d0...) -> (0)` or even `(d0...) -> ()` dcaballe: Missing `d0 + s0` expansion :). It would be good to add more tests if the bug is not specific…
		awarzynskiUnsubmitted Not Done Reply Inline Actions Missing d0 + s0 expansion :) I think that it's actually there. INPUT #map0 = affine_map<(d0) -> (d0)> func.func @vectorize_affine_apply(%arg0: tensor<32xf32>, %arg3: index) -> tensor<32xi32> { %0 = tensor.empty() : tensor<32xi32> %1 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} ins(%arg0 : tensor<32xf32>) outs(%0 : tensor<32xi32>) { ^bb0(%arg1: f32, %arg2: i32): %2 = linalg.index 0 : index %12 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg3) %13 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%12)[%arg3] %3 = arith.index_cast %13 : index to i32 linalg.yield %3 : i32 } -> tensor<32xi32> return %1 : tensor<32xi32> } OUTPUT (with comments where the expansions happens): module { func.func @vectorize_affine_apply_3(%arg0: tensor<32xf32>, %arg1: index) -> tensor<32xi32> { %cst = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]> : vector<32xindex> %c0 = arith.constant 0 : index %0 = tensor.empty() : tensor<32xi32> %1 = vector.broadcast %arg1 : index to vector<32xindex> // EXPANSION 1: %12 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg3) %2 = arith.addi %1, %cst : vector<32xindex> %3 = vector.broadcast %arg1 : index to vector<32xindex> // EXPANSION 2: %13 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%12)[%arg3] %4 = arith.addi %2, %3 : vector<32xindex> %5 = arith.index_cast %4 : vector<32xindex> to vector<32xi32> %6 = vector.transfer_write %5, %0[%c0] {in_bounds = [true]} : vector<32xi32>, tensor<32xi32> return %6 : tensor<32xi32> } transform.sequence failures(propagate) { ^bb0(%arg0: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation %2 = transform.structured.vectorize %1 {vectorize_nd_extract} } } It would be good to add more tests if the bug is not specific to symbols I would also add cases for something like (d0...) -> (0) or even (d0...) -> () Added in https://reviews.llvm.org/D143429. Here's one other case that's not yet tested, though present in your example in https://reviews.llvm.org/D142371: `(d0, d1, d2) -> (d1 + d2 + d3)`. That also seems to work 🤔 . awarzynski: > Missing d0 + s0 expansion :) I think that it's actually there. INPUT ```lang=cpp #map0…
		// CHECK: %[[CAST:.*]] = arith.index_cast %[[ADDI2]] : vector<32xindex> to vector<32xi32>
// CHECK: vector.transfer_write %[[CAST]], %[[EMPTY]][%[[C0:.*]]] {in_bounds = [true]} : vector<32xi32>, tensor<32xi32>		// CHECK: vector.transfer_write %[[CAST]], %[[EMPTY]][%[[C0:.*]]] {in_bounds = [true]} : vector<32xi32>, tensor<32xi32>

transform.sequence failures(propagate) {		transform.sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):		^bb1(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation		%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation		%1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation
%2 = transform.structured.vectorize %1 { vectorize_nd_extract }		%2 = transform.structured.vectorize %1 { vectorize_nd_extract }
}		}
▲ Show 20 Lines • Show All 1,717 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][linalg] Fix crash in vectorizer when expanding affine applyClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 494530

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

mlir/test/Dialect/Linalg/vectorization.mlir

[mlir][linalg] Fix crash in vectorizer when expanding affine apply
ClosedPublic