diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir --- a/mlir/test/Dialect/Linalg/pad.mlir +++ b/mlir/test/Dialect/Linalg/pad.mlir @@ -1,158 +1,120 @@ // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-FILL -// CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (7, -d0 + 12)> -// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 7)> -#map = affine_map<(d0) -> (7, -d0 + 12)> +// CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (7, -s0 + 12)> +// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)> +#map = affine_map<()[s0] -> (7, -s0 + 12)> // CHECK: static_sizes_output_divisible // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32> // CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32> +// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32> +// CHECK-SAME: %[[IV0:[0-9a-zA-Z]*]]: index +// CHECK-SAME: %[[IV1:[0-9a-zA-Z]*]]: index +// CHECK-SAME: %[[IV2:[0-9a-zA-Z]*]]: index func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>, %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[C7:.*]] = arith.constant 7 - %c0 = arith.constant 0 : index - %c12 = arith.constant 12 : index - %c25 = arith.constant 25 : index - %c24 = arith.constant 24 : index - %c7 = arith.constant 7 : index - %c5 = arith.constant 5 : index - %c4 = arith.constant 4 : index - - // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = - %0 = scf.for %arg3 = %c0 to %c24 step %c4 iter_args(%arg4 = %arg2) -> (tensor<24x25xf32>) { - - // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = - %1 = scf.for %arg5 = %c0 to %c25 step %c5 iter_args(%arg6 = %arg4) -> (tensor<24x25xf32>) { - - // CHECK: scf.for %[[IV2:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG4:.*]] = - %2 = scf.for %arg7 = %c0 to %c12 step %c7 iter_args(%arg8 = %arg6) -> (tensor<24x25xf32>) { - - // CHECK: %[[TS2:.*]] = affine.min #[[MAP0]](%[[IV2]]) - %3 = affine.min #map(%arg7) - - // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] - // CHECK: %[[T1:.*]] = tensor.extract_slice %[[ARG1]] - // CHECK: %[[T2:.*]] = tensor.extract_slice %[[ARG4]] - %4 = tensor.extract_slice %arg0[%arg3, %arg7] [4, %3] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> - %5 = tensor.extract_slice %arg1[%arg7, %arg5] [%3, 5] [1, 1] : tensor<12x25xf32> to tensor - %6 = tensor.extract_slice %arg8[%arg3, %arg5] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> - - // Check statically sized matmul inputs with partially divisible sizes are padded. - // CHECK: %[[V0:.*]] = affine.apply #[[MAP1]](%[[TS2]]) - // CHECK: %[[T3:.*]] = linalg.pad_tensor %[[T0]] nofold - // CHECK-SAME: [%[[C0]], %[[C0]]] - // CHECK-SAME: [%[[C0]], %[[V0]] - // CHECK: %[[T4:.*]] = linalg.pad_tensor %[[T1]] nofold - - // Check the statically sized matmul output with fully divisible sizes is not padded. - // CHECK: %[[T5:.*]] = linalg.matmul - // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>) - // CHECK-SAME: outs(%[[T2]] : tensor<4x5xf32>) - // CHECK: %[[T6:.*]] = tensor.insert_slice %[[T5]] - %7 = linalg.matmul ins(%4, %5 : tensor<4x?xf32>, tensor) outs(%6 : tensor<4x5xf32>) -> tensor<4x5xf32> - %8 = tensor.insert_slice %7 into %arg8[%arg3, %arg5] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - - // CHECK: scf.yield %[[T6]] - scf.yield %8 : tensor<24x25xf32> - } - scf.yield %2 : tensor<24x25xf32> - } - scf.yield %1 : tensor<24x25xf32> - } - return %0 : tensor<24x25xf32> + + // CHECK: %[[TS2:.*]] = affine.min #[[MAP0]]()[%[[IV2]]] + %0 = affine.min #map()[%iv2] + + // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] + // CHECK: %[[T1:.*]] = tensor.extract_slice %[[ARG1]] + // CHECK: %[[T2:.*]] = tensor.extract_slice %[[ARG2]] + %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + + // Check statically sized matmul inputs with partially divisible sizes are padded. + // CHECK: %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS2]]] + // CHECK: %[[T3:.*]] = linalg.pad_tensor %[[T0]] nofold + // CHECK-SAME: [%[[C0]], %[[C0]]] + // CHECK-SAME: [%[[C0]], %[[V0]] + // CHECK: %[[T4:.*]] = linalg.pad_tensor %[[T1]] nofold + + // Check the statically sized matmul output with fully divisible sizes is not padded. + // CHECK: %[[T5:.*]] = linalg.matmul + // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>) + // CHECK-SAME: outs(%[[T2]] : tensor<4x5xf32>) + // CHECK: %[[T6:.*]] = tensor.insert_slice %[[T5]] + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + return %5 : tensor<24x25xf32> } // ----- -// CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (7, -d0 + 25)> -// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 7)> -#map = affine_map<(d0) -> (7, -d0 + 25)> +// CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (7, -s0 + 25)> +// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)> +#map = affine_map<()[s0] -> (7, -s0 + 25)> // CHECK: static_sizes_input_divisible +// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32> +// CHECK-SAME: %[[IV0:[0-9a-zA-Z]*]]: index +// CHECK-SAME: %[[IV1:[0-9a-zA-Z]*]]: index +// CHECK-SAME: %[[IV2:[0-9a-zA-Z]*]]: index func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>, %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[C7:.*]] = arith.constant 7 - %c0 = arith.constant 0 : index - %c12 = arith.constant 12 : index - %c25 = arith.constant 25 : index - %c24 = arith.constant 24 : index - %c6 = arith.constant 6 : index - %c7 = arith.constant 7 : index - %c4 = arith.constant 4 : index - - // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = - %0 = scf.for %arg3 = %c0 to %c24 step %c4 iter_args(%arg4 = %arg2) -> (tensor<24x25xf32>) { - - // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = - %1 = scf.for %arg5 = %c0 to %c25 step %c7 iter_args(%arg6 = %arg4) -> (tensor<24x25xf32>) { - - // CHECK: scf.for %[[IV2:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG4:.*]] = - %2 = scf.for %arg7 = %c0 to %c12 step %c6 iter_args(%arg8 = %arg6) -> (tensor<24x25xf32>) { - %3 = tensor.extract_slice %arg0[%arg3, %arg7] [4, 6] [1, 1] : tensor<24x12xf32> to tensor<4x6xf32> - - // CHECK: %[[TS1:.*]] = affine.min #[[MAP0]](%[[IV1]]) - %4 = affine.min #map(%arg5) - %5 = tensor.extract_slice %arg1[%arg7, %arg5] [6, %4] [1, 1] : tensor<12x25xf32> to tensor<6x?xf32> - - // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG4]] - %6 = tensor.extract_slice %arg8[%arg3, %arg5] [4, %4] [1, 1] : tensor<24x25xf32> to tensor<4x?xf32> - - // Check the statically sized matmul output with partially divisible sizes is padded. - // CHECK: %[[V0:.*]] = affine.apply #[[MAP1]](%[[TS1]]) - // CHECK: %[[T1:.*]] = linalg.pad_tensor %[[T0]] low - // CHECK-SAME: [%[[C0]], %[[C0]]] - // CHECK-SAME: [%[[C0]], %[[V0]] - - // CHECK: %[[T2:.*]] = linalg.matmul - // CHECK-SAME: outs(%[[T1]] : tensor<4x7xf32>) - // CHECK: %[[T3:.*]] = tensor.extract_slice %[[T2]] - // CHECK: %[[T4:.*]] = tensor.insert_slice %[[T3]] - %7 = linalg.matmul ins(%3, %5 : tensor<4x6xf32>, tensor<6x?xf32>) outs(%6 : tensor<4x?xf32>) -> tensor<4x?xf32> - %8 = tensor.insert_slice %7 into %arg8[%arg3, %arg5] [4, %4] [1, 1] : tensor<4x?xf32> into tensor<24x25xf32> - - // CHECK: scf.yield %[[T4]] - scf.yield %8 : tensor<24x25xf32> - } - scf.yield %2 : tensor<24x25xf32> - } - scf.yield %1 : tensor<24x25xf32> - } - return %0 : tensor<24x25xf32> + + %3 = tensor.extract_slice %arg0[%iv0, %iv2] [4, 6] [1, 1] : tensor<24x12xf32> to tensor<4x6xf32> + + // CHECK: %[[TS1:.*]] = affine.min #[[MAP0]]()[%[[IV1]]] + %4 = affine.min #map()[%iv1] + %5 = tensor.extract_slice %arg1[%iv2, %iv1] [6, %4] [1, 1] : tensor<12x25xf32> to tensor<6x?xf32> + + // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG2]] + %6 = tensor.extract_slice %arg2[%iv0, %iv1] [4, %4] [1, 1] : tensor<24x25xf32> to tensor<4x?xf32> + + // Check the statically sized matmul output with partially divisible sizes is padded. + // CHECK: %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS1]]] + // CHECK: %[[T1:.*]] = linalg.pad_tensor %[[T0]] low + // CHECK-SAME: [%[[C0]], %[[C0]]] + // CHECK-SAME: [%[[C0]], %[[V0]] + + // CHECK: %[[T2:.*]] = linalg.matmul + // CHECK-SAME: outs(%[[T1]] : tensor<4x7xf32>) + // CHECK: %[[T3:.*]] = tensor.extract_slice %[[T2]] + // CHECK: %[[T4:.*]] = tensor.insert_slice %[[T3]] + %7 = linalg.matmul ins(%3, %5 : tensor<4x6xf32>, tensor<6x?xf32>) outs(%6 : tensor<4x?xf32>) -> tensor<4x?xf32> + %8 = tensor.insert_slice %7 into %arg2[%iv0, %iv1] [4, %4] [1, 1] : tensor<4x?xf32> into tensor<24x25xf32> + + // CHECK: return %[[T4]] + return %8 : tensor<24x25xf32> } // ----- -// CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0)[s0] -> (5, -d0 + s0)> -// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0)[s0] -> (7, -d0 + s0)> -// CHECK-DAG: #[[MAP2:[0-9a-z]+]] = affine_map<(d0)[s0] -> (6, -d0 + s0)> -// CHECK-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 5)> -// CHECK-DAG: #[[MAP4:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 6)> +// CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0, s1] -> (5, -s0 + s1)> +// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0, s1] -> (7, -s0 + s1)> +// CHECK-DAG: #[[MAP2:[0-9a-z]+]] = affine_map<()[s0, s1] -> (6, -s0 + s1)> +// CHECK-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 5)> +// CHECK-DAG: #[[MAP4:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 6)> -#map0 = affine_map<(d0)[s0] -> (5, -d0 + s0)> -#map1 = affine_map<(d0)[s0] -> (6, -d0 + s0)> -#map2 = affine_map<(d0)[s0] -> (7, -d0 + s0)> +#map0 = affine_map<()[s0, s1] -> (5, -s0 + s1)> +#map1 = affine_map<()[s0, s1] -> (6, -s0 + s1)> +#map2 = affine_map<()[s0, s1] -> (7, -s0 + s1)> // CHECK: dynamic_sizes // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor // CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor // CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor +// CHECK-SAME: %[[IV0:[0-9a-zA-Z]*]]: index +// CHECK-SAME: %[[IV1:[0-9a-zA-Z]*]]: index +// CHECK-SAME: %[[IV2:[0-9a-zA-Z]*]]: index func @dynamic_sizes(%arg0: tensor, %arg1: tensor, - %arg2: tensor) -> tensor { + %arg2: tensor, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor { // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 - // CHECK-DAG: %[[C5:.*]] = arith.constant 5 - // CHECK-DAG: %[[C6:.*]] = arith.constant 6 %c1 = arith.constant 1 : index %c0 = arith.constant 0 : index - %c6 = arith.constant 6 : index - %c7 = arith.constant 7 : index - %c5 = arith.constant 5 : index // CHECK-DAG: %[[D0:.*]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG0]], %[[C1]] @@ -161,54 +123,39 @@ %1 = tensor.dim %arg0, %c1 : tensor %2 = tensor.dim %arg1, %c1 : tensor - // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = - %3 = scf.for %arg3 = %c0 to %0 step %c5 iter_args(%arg4 = %arg2) -> (tensor) { - - // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = - %4 = scf.for %arg5 = %c0 to %2 step %c7 iter_args(%arg6 = %arg4) -> (tensor) { - - // CHECK: scf.for %[[IV2:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG4:.*]] = - %5 = scf.for %arg7 = %c0 to %1 step %c6 iter_args(%arg8 = %arg6) -> (tensor) { - - // CHECK: %[[TS0:.*]] = affine.min #[[MAP0]](%[[IV0]])[%[[D0]]] - // CHECK: %[[TS2:.*]] = affine.min #[[MAP2]](%[[IV2]])[%[[D2]]] - // CHECK: %[[TS1:.*]] = affine.min #[[MAP1]](%[[IV1]])[%[[D1]]] - %6 = affine.min #map0(%arg3)[%0] - %7 = affine.min #map1(%arg7)[%1] - %8 = tensor.extract_slice %arg0[%arg3, %arg7] [%6, %7] [1, 1] : tensor to tensor - %9 = affine.min #map2(%arg5)[%2] - %10 = tensor.extract_slice %arg1[%arg7, %arg5] [%7, %9] [1, 1] : tensor to tensor - %11 = tensor.extract_slice %arg8[%arg3, %arg5] [%6, %9] [1, 1] : tensor to tensor - - // Check all matmul operands are padded. - // CHECK: %[[V0:.*]] = affine.apply #[[MAP3]](%[[TS0]]) - // CHECK: %[[V1:.*]] = affine.apply #[[MAP4]](%[[TS2]]) - // CHECK: %[[T3:.*]] = linalg.pad_tensor %{{.*}} nofold - // CHECK-SAME: [%[[C0]], %[[C0]]] - // CHECK-SAME: [%[[V0]], %[[V1]] - // CHECK: %[[T4:.*]] = linalg.pad_tensor %{{.*}} nofold - // CHECK: %[[T5:.*]] = linalg.pad_tensor %{{.*}} low - - // Check the dynamic matmul has been erased. - // CHECK-NOT: = linalg.matmul {{.*}} tensor - - // Check all padded matmul operands are statically sized. - // CHECK: %[[T6:.*]] = linalg.matmul - // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<5x6xf32>, tensor<6x7xf32>) - // CHECK-SAME: outs(%[[T5]] : tensor<5x7xf32>) - // CHECK: %[[T7:.*]] = tensor.extract_slice %[[T6]][0, 0] [%[[TS0]], %[[TS1]]] - // CHECK: %[[T8:.*]] = tensor.insert_slice %[[T7]] - %12 = linalg.matmul ins(%8, %10 : tensor, tensor) outs(%11 : tensor) -> tensor - %13 = tensor.insert_slice %12 into %arg8[%arg3, %arg5] [%6, %9] [1, 1] : tensor into tensor - - // CHECK: scf.yield %[[T8]] - scf.yield %13 : tensor - } - scf.yield %5 : tensor - } - scf.yield %4 : tensor - } - return %3 : tensor + // CHECK: %[[TS0:.*]] = affine.min #[[MAP0]]()[%[[IV0]], %[[D0]]] + // CHECK: %[[TS2:.*]] = affine.min #[[MAP2]]()[%[[IV2]], %[[D2]]] + // CHECK: %[[TS1:.*]] = affine.min #[[MAP1]]()[%[[IV1]], %[[D1]]] + %6 = affine.min #map0()[%iv0, %0] + %7 = affine.min #map1()[%iv2, %1] + %8 = tensor.extract_slice %arg0[%iv0, %iv2] [%6, %7] [1, 1] : tensor to tensor + %9 = affine.min #map2()[%iv1, %2] + %10 = tensor.extract_slice %arg1[%iv2, %iv1] [%7, %9] [1, 1] : tensor to tensor + %11 = tensor.extract_slice %arg2[%iv0, %iv1] [%6, %9] [1, 1] : tensor to tensor + + // Check all matmul operands are padded. + // CHECK: %[[V0:.*]] = affine.apply #[[MAP3]]()[%[[TS0]]] + // CHECK: %[[V1:.*]] = affine.apply #[[MAP4]]()[%[[TS2]]] + // CHECK: %[[T3:.*]] = linalg.pad_tensor %{{.*}} nofold + // CHECK-SAME: [%[[C0]], %[[C0]]] + // CHECK-SAME: [%[[V0]], %[[V1]] + // CHECK: %[[T4:.*]] = linalg.pad_tensor %{{.*}} nofold + // CHECK: %[[T5:.*]] = linalg.pad_tensor %{{.*}} low + + // Check the dynamic matmul has been erased. + // CHECK-NOT: = linalg.matmul {{.*}} tensor + + // Check all padded matmul operands are statically sized. + // CHECK: %[[T6:.*]] = linalg.matmul + // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<5x6xf32>, tensor<6x7xf32>) + // CHECK-SAME: outs(%[[T5]] : tensor<5x7xf32>) + // CHECK: %[[T7:.*]] = tensor.extract_slice %[[T6]][0, 0] [%[[TS0]], %[[TS1]]] + // CHECK: %[[T8:.*]] = tensor.insert_slice %[[T7]] + %12 = linalg.matmul ins(%8, %10 : tensor, tensor) outs(%11 : tensor) -> tensor + %13 = tensor.insert_slice %12 into %arg2[%iv0, %iv1] [%6, %9] [1, 1] : tensor into tensor + + // CHECK: return %[[T8]] + return %13 : tensor } // ----- @@ -330,36 +277,23 @@ // ----- -#map = affine_map<(d0) -> (7, -d0 + 12)> +#map = affine_map<(d0) -> (7, d0)> // CHECK-FILL: scalar_operand // CHECK-FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: f32 // CHECK-FILL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<24x12xf32> -func @scalar_operand(%arg0: f32, %arg1: tensor<24x12xf32>) -> tensor<24x12xf32> { - %c0 = arith.constant 0 : index - %c12 = arith.constant 12 : index - %c24 = arith.constant 24 : index - %c7 = arith.constant 7 : index - %c4 = arith.constant 4 : index - - // CHECK-FILL: scf.for %[[IV0:[0-9a-zA-Z]*]] = - %0 = scf.for %arg2 = %c0 to %c24 step %c4 iter_args(%arg3 = %arg1) -> (tensor<24x12xf32>) { - - // CHECK-FILL: scf.for %[[IV1:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG2:.*]] = - %1 = scf.for %arg4 = %c0 to %c12 step %c7 iter_args(%arg5 = %arg3) -> (tensor<24x12xf32>) { - %2 = affine.min #map(%arg4) - - // CHECK-FILL: %[[T0:.*]] = tensor.extract_slice %[[ARG2]] - // CHECK-FILL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold - %3 = tensor.extract_slice %arg5[%arg2, %arg4] [4, %2] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> - - // Check only the fill output operand is padded. - // CHECK-FILL: %[[T6:.*]] = linalg.fill(%[[ARG0]], %[[T1]] - %4 = linalg.fill(%arg0, %3) : f32, tensor<4x?xf32> -> tensor<4x?xf32> - %5 = tensor.insert_slice %4 into %arg5[%arg2, %arg4] [4, %2] [1, 1] : tensor<4x?xf32> into tensor<24x12xf32> - scf.yield %5 : tensor<24x12xf32> - } - scf.yield %1 : tensor<24x12xf32> - } - return %0 : tensor<24x12xf32> +func @scalar_operand(%arg0: f32, + %arg1: tensor<24x12xf32>, + %iv0 : index) -> tensor<24x12xf32> { + %0 = affine.min #map(%iv0) + + // CHECK-FILL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]] + // CHECK-FILL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold + %1 = tensor.extract_slice %arg1[0, 0] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + + // Check only the fill output operand is padded. + // CHECK-FILL: %[[T6:.*]] = linalg.fill(%[[ARG0]], %[[T1]] + %2 = linalg.fill(%arg0, %1) : f32, tensor<4x?xf32> -> tensor<4x?xf32> + %3 = tensor.insert_slice %2 into %arg1[0, 0] [4, %0] [1, 1] : tensor<4x?xf32> into tensor<24x12xf32> + return %3 : tensor<24x12xf32> }