diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir @@ -30,6 +30,7 @@ // An example of vector reductions. module { + // Custom prod reduction: stored i32 elements only. func.func @prod_dreduction_i32(%arga: tensor<32xi32, #DV>, %argx: tensor) -> tensor { %c = tensor.extract %argx[] : tensor @@ -47,6 +48,7 @@ return %0 : tensor } + // Custom prod reduction: stored f32 elements only. func.func @prod_dreduction_f32(%arga: tensor<32xf32, #DV>, %argx: tensor) -> tensor { %c = tensor.extract %argx[] : tensor @@ -64,6 +66,7 @@ return %0 : tensor } + // Custom prod reduction: stored i32 elements only. func.func @prod_sreduction_i32(%arga: tensor<32xi32, #SV>, %argx: tensor) -> tensor { %c = tensor.extract %argx[] : tensor @@ -81,6 +84,7 @@ return %0 : tensor } + // Custom prod reduction: stored f32 elements only. func.func @prod_sreduction_f32(%arga: tensor<32xf32, #SV>, %argx: tensor) -> tensor { %c = tensor.extract %argx[] : tensor @@ -98,6 +102,42 @@ return %0 : tensor } + // Custom prod reduction: stored i32 elements and implicit zeros. + // + // NOTE: this is a somewhat strange operation, since for most sparse + // situations the outcome would always be zero; it is added + // to test full functionality and illustrate the subtle differences + // between the various custom operations; it would make a bit more + // sense for e.g. a min/max reductions, although it still would + // "densify" the iteration space. + // + func.func @prod_xreduction_i32(%arga: tensor<32xi32, #SV>, + %argx: tensor) -> tensor { + %c = tensor.extract %argx[] : tensor + %0 = linalg.generic #trait_reduction + ins(%arga: tensor<32xi32, #SV>) + outs(%argx: tensor) { + ^bb(%a: i32, %b: i32): + %u = sparse_tensor.unary %a : i32 to i32 + present={ + ^bb0(%x: i32): + sparse_tensor.yield %x : i32 + } absent={ + ^bb0: + %c0 = arith.constant 0 : i32 + sparse_tensor.yield %c0 : i32 + } + %1 = sparse_tensor.reduce %u, %b, %c : i32 { + ^bb0(%x: i32, %y: i32): + %2 = arith.muli %x, %y : i32 + sparse_tensor.yield %2 : i32 + } + linalg.yield %1 : i32 + } -> tensor + return %0 : tensor + } + + func.func @dump_i32(%arg0 : tensor) { %v = tensor.extract %arg0[] : tensor vector.print %v : i32 @@ -174,6 +214,7 @@ %6 = call @prod_sreduction_i32(%s1_i32, %ri) : (tensor<32xi32, #SV>, tensor) -> tensor %7 = call @prod_sreduction_f32(%s1_f32, %rf) : (tensor<32xf32, #SV>, tensor) -> tensor %8 = call @prod_sreduction_i32(%s0, %ri) : (tensor<32xi32, #SV>, tensor) -> tensor + %9 = call @prod_xreduction_i32(%s0_i32, %ri) : (tensor<32xi32, #SV>, tensor) -> tensor // Verify results. Note that the custom reduction gave permission // to treat an explicit vs implicit zero differently to compute the @@ -190,6 +231,7 @@ // CHECK: 3087 // CHECK: 168 // CHECK: 0 + // CHECK: 0 // call @dump_i32(%0) : (tensor) -> () call @dump_f32(%1) : (tensor) -> () @@ -200,6 +242,7 @@ call @dump_i32(%6) : (tensor) -> () call @dump_f32(%7) : (tensor) -> () call @dump_i32(%8) : (tensor) -> () + call @dump_i32(%9) : (tensor) -> () // Release the resources. bufferization.dealloc_tensor %d0_i32 : tensor<32xi32, #DV>