Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
mlir/test/Dialect/Linalg/vectorization.mlir
Show First 20 Lines • Show All 284 Lines • ▼ Show 20 Lines | |||||
^bb1(%arg1: !pdl.operation): | ^bb1(%arg1: !pdl.operation): | ||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation | %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation | ||||
%1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation | %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation | ||||
%2 = transform.structured.vectorize %1 | %2 = transform.structured.vectorize %1 | ||||
} | } | ||||
// ----- | // ----- | ||||
#map0 = affine_map<(d0) -> (d0)> | |||||
func.func @vectorize_affine_apply(%arg0: tensor<32xf32>, %arg3: index) -> tensor<32xi32> { | |||||
%0 = tensor.empty() : tensor<32xi32> | |||||
%1 = linalg.generic {indexing_maps = [#map0, #map0], | |||||
iterator_types = ["parallel"]} | |||||
ins(%arg0 : tensor<32xf32>) | |||||
outs(%0 : tensor<32xi32>) { | |||||
^bb0(%arg1: f32, %arg2: i32): | |||||
%2 = linalg.index 0 : index | |||||
%12 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg3) | |||||
%3 = arith.index_cast %12 : index to i32 | |||||
linalg.yield %3 : i32 | |||||
} -> tensor<32xi32> | |||||
return %1 : tensor<32xi32> | |||||
} | |||||
// CHECK-LABEL: func.func @vectorize_affine_apply | |||||
// CHECK-SAME: %arg0: tensor<32xf32> | |||||
// CHECK-SAME: %[[ARG1:.*]]: index | |||||
// CHECK: %[[CST:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]> : vector<32xindex> | |||||
// CHECK: %[[C0:.*]] = arith.constant 0 : index | |||||
// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<32xi32> | |||||
// CHECK: %[[BCAST:.*]] = vector.broadcast %[[ARG1]] : index to vector<32xindex> | |||||
// CHECK: %[[ADDI:.*]] = arith.addi %[[BCAST]], %[[CST]] : vector<32xindex> | |||||
// CHECK: %[[CAST:.*]] = arith.index_cast %[[ADDI]] : vector<32xindex> to vector<32xi32> | |||||
// CHECK: vector.transfer_write %[[CAST]], %[[EMPTY]][%[[C0:.*]]] {in_bounds = [true]} : vector<32xi32>, tensor<32xi32> | |||||
transform.sequence failures(propagate) { | |||||
^bb1(%arg1: !pdl.operation): | |||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation | |||||
%1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation | |||||
%2 = transform.structured.vectorize %1 { vectorize_nd_extract } | |||||
} | |||||
// ----- | |||||
// CHECK-LABEL: func @test_vectorize_fill | // CHECK-LABEL: func @test_vectorize_fill | ||||
func.func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) { | func.func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) { | ||||
// CHECK: %[[V:.*]] = vector.broadcast {{.*}} : f32 to vector<8x16xf32> | // CHECK: %[[V:.*]] = vector.broadcast {{.*}} : f32 to vector<8x16xf32> | ||||
// CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32> | // CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32> | ||||
linalg.fill ins(%arg0 : f32) outs(%A : memref<8x16xf32>) | linalg.fill ins(%arg0 : f32) outs(%A : memref<8x16xf32>) | ||||
return | return | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,697 Lines • ▼ Show 20 Lines | |||||
^bb1(%arg1: !pdl.operation): | ^bb1(%arg1: !pdl.operation): | ||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation | %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation | ||||
%1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation | %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation | ||||
%2 = transform.structured.vectorize %1 | %2 = transform.structured.vectorize %1 | ||||
} | } | ||||
// CHECK-LABEL: @wrong_reduction_detection | // CHECK-LABEL: @wrong_reduction_detection | ||||
// CHECK: vector.broadcast | // CHECK: vector.broadcast | ||||
// CHECK: vector.transfer_write | // CHECK: vector.transfer_write | ||||
dcaballe: This looks like a new feature to me more than a regression. I think we should match the… |
This looks like a new feature to me more than a regression. I think we should match the decomposed ops and make sure they are vectorized accordingly.