diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -332,7 +332,68 @@ Operation *SparseTensorLoopEmitter::enterFilterLoopOverTensorAtDim( OpBuilder &builder, Location loc, size_t tid, size_t dim, AffineExpr affine, MutableArrayRef reduc) { - llvm_unreachable("need to be implemented"); + assert(!affine.isa() && !isDenseDLT(dimTypes[tid][dim])); + assert(dimTypes[tid].size() > dim); + // We can not re-enter the same level. + assert(!coord[tid][dim]); + + Value step = constantIndex(builder, loc, 1); + + Value lo = pidxs[tid][dim]; + Value hi = highs[tid][dim]; + + // TODO: We should instead use a whileOp for filter loop to allow early + // break when exceeding (for ordered dimensions). + // TODO: There are many other potiential opportunities that we might apply in + // the future. E.g., we could use binary search to located the pointer index. + scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); + + // In-place update on the reduction variable vector. + assert(forOp.getNumRegionIterArgs() == reduc.size()); + for (int i = 0, e = reduc.size(); i < e; i++) + reduc[i] = forOp.getRegionIterArg(i); + + builder.setInsertionPointToStart(forOp.getBody()); + Value iv = forOp.getInductionVar(); + + pidxs[tid][dim] = iv; + // Generating a load on the indices array yields the coordinate. + Value ptr = idxBuffer[tid][dim]; + coord[tid][dim] = genIndexLoad(builder, loc, ptr, iv); + + // Generate a if condition to filter out indices that is not equal to the + // result of the affine expression. + Value expected = genAffine(builder, affine, loc); + auto pred = builder.create(loc, arith::CmpIPredicate::eq, + coord[tid][dim], expected); + SmallVector types; + for (Value red : reduc) { + types.push_back(red.getType()); + } + + bool hasReduc = !types.empty(); + scf::IfOp ifOp = + builder.create(loc, types, pred, /*else*/ hasReduc); + if (hasReduc) { + // scf.for (a) -> v + // %s = scf.if (a) -> v + // user-generated code. + // else + // yield a + // yield %s + builder.create(loc, ifOp.getResults()); + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + // On mismatch. + builder.create(loc, reduc); + } + // Set the insert point to matched branch. + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + + // NOTE: we can also prepares for next dim here in advance + // Push the loop into stack + loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), forOp, + coord[tid][dim], nullptr); + return forOp; } void SparseTensorLoopEmitter::genDenseAffineAddressAtCurLevel( @@ -520,7 +581,6 @@ if (forOp) { if (!reduc.empty()) { assert(reduc.size() == forOp.getNumResults()); - rewriter.setInsertionPointToEnd(forOp.getBody()); rewriter.create(loc, reduc); } // Exit the loop. diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir @@ -4,6 +4,7 @@ #SpVec = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }> #CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }> #Row = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ] }> +#EncDenseVec = #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }> #trait1 = { indexing_maps = [ @@ -55,6 +56,94 @@ return %0 : tensor<32xf32> } +// CHECK-LABEL: func.func @mul_inv_sparse1d( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>>) +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_6:.*]] = bufferization.alloc_tensor() : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 0 : index} : tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_3]] iter_args(%[[VAL_16:.*]] = %[[VAL_6]]) -> (tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_15]]] : memref +// CHECK: %[[VAL_18:.*]] = arith.cmpi eq, %[[VAL_17]], %[[VAL_4]] : index +// CHECK: %[[VAL_19:.*]] = scf.if %[[VAL_18]] -> (tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_15]]] : memref +// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_23:.*]] = scf.for %[[VAL_24:.*]] = %[[VAL_21]] to %[[VAL_22]] step %[[VAL_3]] iter_args(%[[VAL_25:.*]] = %[[VAL_16]]) -> (tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref +// CHECK: %[[VAL_27:.*]] = arith.mulf %[[VAL_26]], %[[VAL_20]] : f32 +// CHECK: %[[VAL_28:.*]] = arith.addf %[[VAL_27]], %[[VAL_5]] : f32 +// CHECK: %[[VAL_29:.*]] = sparse_tensor.insert %[[VAL_28]] into %[[VAL_25]]{{\[}}%[[VAL_17]]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: scf.yield %[[VAL_29]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } +// CHECK: scf.yield %[[VAL_30:.*]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } else { +// CHECK: scf.yield %[[VAL_16]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } +// CHECK: scf.yield %[[VAL_31:.*]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } +// CHECK: %[[VAL_32:.*]] = sparse_tensor.load %[[VAL_33:.*]] hasInserts : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: return %[[VAL_32]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> +func.func @mul_inv_sparse1d(%arga: tensor<32xf32, #SpVec>, + %argb: tensor<4xf32, #SpVec>) -> tensor<32xf32, #SpVec> { + %argx = bufferization.alloc_tensor() : tensor<32xf32, #SpVec> + %0 = linalg.generic #trait1 + ins(%arga, %argb: tensor<32xf32, #SpVec>, tensor<4xf32, #SpVec>) + outs(%argx: tensor<32xf32, #SpVec>) { + ^bb(%a: f32, %b: f32, %x: f32): + %0 = arith.mulf %a, %b : f32 + %1 = arith.addf %x, %0 : f32 + linalg.yield %1 : f32 + } -> tensor<32xf32, #SpVec> + return %0 : tensor<32xf32, #SpVec> +} + + +// CHECK-LABEL: func.func @mul_inv_enc_dense1d( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> { +// CHECK: %[[VAL_2:.*]] = arith.constant 32 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_6:.*]] = bufferization.alloc_tensor() : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_6]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] { +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_11]]] : memref +// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref +// CHECK: %[[VAL_14:.*]] = arith.mulf %[[VAL_13]], %[[VAL_10]] : f32 +// CHECK: %[[VAL_15:.*]] = arith.addf %[[VAL_12]], %[[VAL_14]] : f32 +// CHECK: memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_11]]] : memref +// CHECK: } +// CHECK: %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_6]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: return %[[VAL_16]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } +func.func @mul_inv_enc_dense1d(%arga: tensor<32xf32, #EncDenseVec>, + %argb: tensor<4xf32, #EncDenseVec>) -> tensor<32xf32, #EncDenseVec> { + %argx = bufferization.alloc_tensor() : tensor<32xf32, #EncDenseVec> + %0 = linalg.generic #trait1 + ins(%arga, %argb: tensor<32xf32, #EncDenseVec>, tensor<4xf32, #EncDenseVec>) + outs(%argx: tensor<32xf32, #EncDenseVec>) { + ^bb(%a: f32, %b: f32, %x: f32): + %0 = arith.mulf %a, %b : f32 + %1 = arith.addf %x, %0 : f32 + linalg.yield %1 : f32 + } -> tensor<32xf32, #EncDenseVec> + return %0 : tensor<32xf32, #EncDenseVec> +} + #trait2 = { indexing_maps = [ affine_map<(i) -> (i)>, // a @@ -105,6 +194,57 @@ return %0 : tensor<32xi32> } +// CHECK-LABEL: func.func @and_affine_sparse1d( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<34xi32, #sparse_tensor.encoding<{{{.*}}}>>) +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = bufferization.alloc_tensor() : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 0 : index} : tensor<34xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<34xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<34xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_3]] iter_args(%[[VAL_16:.*]] = %[[VAL_5]]) -> (tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_15]]] : memref +// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_21:.*]] = scf.for %[[VAL_22:.*]] = %[[VAL_19]] to %[[VAL_20]] step %[[VAL_3]] iter_args(%[[VAL_23:.*]] = %[[VAL_16]]) -> (tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref +// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_17]], %[[VAL_4]] : index +// CHECK: %[[VAL_26:.*]] = arith.cmpi eq, %[[VAL_24]], %[[VAL_25]] : index +// CHECK: %[[VAL_27:.*]] = scf.if %[[VAL_26]] -> (tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_22]]] : memref +// CHECK: %[[VAL_29:.*]] = arith.andi %[[VAL_18]], %[[VAL_28]] : i32 +// CHECK: %[[VAL_30:.*]] = sparse_tensor.insert %[[VAL_29]] into %[[VAL_23]]{{\[}}%[[VAL_17]]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: scf.yield %[[VAL_30]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } else { +// CHECK: scf.yield %[[VAL_23]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } +// CHECK: scf.yield %[[VAL_31:.*]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } +// CHECK: scf.yield %[[VAL_32:.*]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } +// CHECK: %[[VAL_33:.*]] = sparse_tensor.load %[[VAL_34:.*]] hasInserts : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: return %[[VAL_33]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> +func.func @and_affine_sparse1d(%arga: tensor<32xi32, #SpVec>, + %argb: tensor<34xi32, #SpVec>) -> tensor<32xi32, #SpVec> { + %argx = bufferization.alloc_tensor() : tensor<32xi32, #SpVec> + %0 = linalg.generic #trait2 + ins(%arga, %argb: tensor<32xi32, #SpVec>, tensor<34xi32, #SpVec>) + outs(%argx: tensor<32xi32, #SpVec>) { + ^bb(%a: i32, %b: i32, %x: i32): + %0 = arith.andi %a, %b : i32 + linalg.yield %0 : i32 + } -> tensor<32xi32, #SpVec> + return %0 : tensor<32xi32, #SpVec> +} + #trait3 = { indexing_maps = [ affine_map<(i,j) -> (i,j)>, // a @@ -162,6 +302,69 @@ return %0 : tensor<32x16xf64> } + +// CHECK-LABEL: func.func @mul_affine_sparse2d( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<34x19xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> { +// CHECK: %[[VAL_2:.*]] = arith.constant 32 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: %[[VAL_7:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_8:.*]] = bufferization.alloc_tensor() : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 1 : index} : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_12:.*]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 1 : index} : tensor<34x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<34x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<34x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref +// CHECK: %[[VAL_15:.*]] = scf.for %[[VAL_16:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] iter_args(%[[VAL_17:.*]] = %[[VAL_8]]) -> (tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_16]], %[[VAL_5]] : index +// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_16]]] : memref +// CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_16]], %[[VAL_4]] : index +// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref +// CHECK: %[[VAL_22:.*]] = scf.for %[[VAL_23:.*]] = %[[VAL_19]] to %[[VAL_21]] step %[[VAL_4]] iter_args(%[[VAL_24:.*]] = %[[VAL_17]]) -> (tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref +// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref +// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_18]]] : memref +// CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_18]], %[[VAL_4]] : index +// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref +// CHECK: %[[VAL_30:.*]]:2 = scf.for %[[VAL_31:.*]] = %[[VAL_27]] to %[[VAL_29]] step %[[VAL_4]] iter_args(%[[VAL_32:.*]] = %[[VAL_6]], %[[VAL_33:.*]] = %[[VAL_24]]) -> (f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK: %[[VAL_34:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_31]]] : memref +// CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_25]], %[[VAL_7]] : index +// CHECK: %[[VAL_36:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_35]] : index +// CHECK: %[[VAL_37:.*]]:2 = scf.if %[[VAL_36]] -> (f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_31]]] : memref +// CHECK: %[[VAL_39:.*]] = arith.mulf %[[VAL_26]], %[[VAL_38]] : f64 +// CHECK: %[[VAL_40:.*]] = arith.addf %[[VAL_32]], %[[VAL_39]] : f64 +// CHECK: scf.yield %[[VAL_40]], %[[VAL_33]] : f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } else { +// CHECK: scf.yield %[[VAL_32]], %[[VAL_33]] : f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } +// CHECK: scf.yield %[[VAL_41:.*]]#0, %[[VAL_41]]#1 : f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } +// CHECK: %[[VAL_42:.*]] = sparse_tensor.insert %[[VAL_43:.*]]#0 into %[[VAL_43]]#1{{\[}}%[[VAL_16]], %[[VAL_25]]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: scf.yield %[[VAL_42]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } +// CHECK: scf.yield %[[VAL_44:.*]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: } +// CHECK: %[[VAL_45:.*]] = sparse_tensor.load %[[VAL_46:.*]] hasInserts : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: return %[[VAL_45]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> +func.func @mul_affine_sparse2d(%arga: tensor<32x16xf64, #CSR>, + %argb: tensor<34x19xf64, #CSR>) -> tensor<32x16xf64, #CSR> { + %argx = bufferization.alloc_tensor() : tensor<32x16xf64, #CSR> + %0 = linalg.generic #trait3 + ins(%arga, %argb: tensor<32x16xf64, #CSR>, tensor<34x19xf64, #CSR>) + outs(%argx: tensor<32x16xf64, #CSR>) { + ^bb(%a: f64, %b: f64, %x: f64): + %0 = arith.mulf %a, %b : f64 + %1 = arith.addf %x, %0 : f64 + linalg.yield %1 : f64 + } -> tensor<32x16xf64, #CSR> + return %0 : tensor<32x16xf64, #CSR> +} + #trait4 = { indexing_maps = [ affine_map<(i,j) -> (i+2,j)>, // a @@ -286,3 +489,4 @@ } -> tensor<32x16xf64> return %0 : tensor<32x16xf64> } + diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir @@ -0,0 +1,128 @@ +// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#CCC = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed", "compressed" ] }> + +#CDC = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "dense", "compressed" ] + // FIXME: Still inadmissible might need investigation + // dimOrdering = affine_map<(i,j,k) -> (j,k,i)> +}> + +// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f +func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor { + %buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor + %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + return %ret : tensor +} + +func.func @conv_1d_nwc_wcf(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + %ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, + strides = dense<1> : tensor<1xi64>} + ins (%arg0, %arg1: tensor, tensor) + outs (%arg2: tensor) -> tensor + return %ret : tensor +} + +func.func @conv_1d_nwc_wcf_CCC(%arg0: tensor, %arg1: tensor) -> tensor { + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %c6 = arith.constant 6 : index + %s = bufferization.alloc_tensor(%c3, %c6, %c1) : tensor + %ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, + strides = dense<1> : tensor<1xi64>} + ins (%arg0, %arg1: tensor, tensor) + outs (%s: tensor) -> tensor + return %ret : tensor +} + +func.func @conv_1d_nwc_wcf_CDC(%arg0: tensor, %arg1: tensor) -> tensor { + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %c6 = arith.constant 6 : index + %s = bufferization.alloc_tensor(%c3, %c6, %c1) : tensor + %ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, + strides = dense<1> : tensor<1xi64>} + ins (%arg0, %arg1: tensor, tensor) + outs (%s: tensor) -> tensor + return %ret : tensor +} + +func.func @entry() { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %c6 = arith.constant 6 : index + %c8 = arith.constant 8 : index + %f10 = arith.constant 10.00000e+00 : f32 + %val = arith.constant 2.00000e+00 : f32 + %zero = arith.constant 0.00000e+00 : f32 + + %in1D_tmp = call @alloc_3d_filled_f32(%c3, %c8, %c1, %val) : (index, index, index, f32) -> (tensor) + %in1D_nwc = tensor.insert %f10 into %in1D_tmp[%c0, %c3, %c0] : tensor + %filter1D_nwc = call @alloc_3d_filled_f32(%c3, %c1, %c1, %val) : (index, index, index, f32) -> (tensor) + %out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (tensor) + + %in1D_nwc_CCC = sparse_tensor.convert %in1D_nwc + : tensor to tensor + %filter1D_nwc_CCC = sparse_tensor.convert %filter1D_nwc + : tensor to tensor + + %in1D_nwc_CDC = sparse_tensor.convert %in1D_nwc + : tensor to tensor + %filter1D_nwc_CDC = sparse_tensor.convert %filter1D_nwc + : tensor to tensor + + %dense_ret = call @conv_1d_nwc_wcf(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (tensor, tensor, tensor) -> (tensor) + %CCC_ret = call @conv_1d_nwc_wcf_CCC(%in1D_nwc_CCC, %filter1D_nwc_CCC) : (tensor, tensor) -> (tensor) + %CDC_ret = call @conv_1d_nwc_wcf_CDC(%in1D_nwc_CDC, %filter1D_nwc_CDC) : (tensor, tensor) -> (tensor) + + // CHECK: ( ( ( 12 ), ( 28 ), ( 28 ), ( 28 ), ( 12 ), ( 12 ) ), + // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ), + // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ) ) + %dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0], %zero + : tensor, vector<3x6x1xf32> + vector.print %dense_v : vector<3x6x1xf32> + + // CHECK: ( ( ( 12 ), ( 28 ), ( 28 ), ( 28 ), ( 12 ), ( 12 ) ), + // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ), + // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ) ) + %1 = sparse_tensor.convert %CCC_ret + : tensor to tensor + %v1 = vector.transfer_read %1[%c0, %c0, %c0], %zero + : tensor, vector<3x6x1xf32> + vector.print %v1 : vector<3x6x1xf32> + + // CHECK: ( ( ( 12 ), ( 28 ), ( 28 ), ( 28 ), ( 12 ), ( 12 ) ), + // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ), + // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ) ) + %2 = sparse_tensor.convert %CDC_ret + : tensor to tensor + %v2 = vector.transfer_read %2[%c0, %c0, %c0], %zero + : tensor, vector<3x6x1xf32> + vector.print %v2 : vector<3x6x1xf32> + + // Free the resources + bufferization.dealloc_tensor %in1D_nwc : tensor + bufferization.dealloc_tensor %filter1D_nwc : tensor + bufferization.dealloc_tensor %out1D_nwc : tensor + + bufferization.dealloc_tensor %in1D_nwc_CDC : tensor + bufferization.dealloc_tensor %filter1D_nwc_CDC : tensor + bufferization.dealloc_tensor %in1D_nwc_CCC : tensor + bufferization.dealloc_tensor %filter1D_nwc_CCC : tensor + + bufferization.dealloc_tensor %CCC_ret : tensor + bufferization.dealloc_tensor %CDC_ret : tensor + + return +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir @@ -0,0 +1,209 @@ +// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }> +#CSR = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"]}> +#CSC = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +// An example of a 2D convolution with a sparse filter. +module { + + func.func @conv2d(%input: tensor<8x8xi32>, + %filter: tensor<3x3xi32, #DCSR>, + %output: tensor<6x6xi32>) -> tensor<6x6xi32> { + %0 = linalg.conv_2d + ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>) + outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32> + return %0 : tensor<6x6xi32> + } + + func.func @conv2d_sparse_out(%input: tensor<8x8xi32>, + %filter: tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> { + %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR> + %0 = linalg.conv_2d + ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>) + outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> + return %0 : tensor<6x6xi32, #DCSR> + } + + func.func @conv2d_all_sparse_DCSR(%input: tensor<8x8xi32, #DCSR>, + %filter: tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> { + %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR> + %0 = linalg.conv_2d + ins (%input, %filter: tensor<8x8xi32, #DCSR>, tensor<3x3xi32, #DCSR>) + outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> + return %0 : tensor<6x6xi32, #DCSR> + } + + func.func @conv2d_all_sparse_CSR(%input: tensor<8x8xi32, #CSR>, + %filter: tensor<3x3xi32, #CSR>) -> tensor<6x6xi32, #CSR> { + %s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSR> + %0 = linalg.conv_2d + ins (%input, %filter: tensor<8x8xi32, #CSR>, tensor<3x3xi32, #CSR>) + outs (%s: tensor<6x6xi32, #CSR>) -> tensor<6x6xi32, #CSR> + return %0 : tensor<6x6xi32, #CSR> + } + + func.func @conv2d_all_sparse_CSC(%input: tensor<8x8xi32, #CSC>, + %filter: tensor<3x3xi32, #CSC>) -> tensor<6x6xi32, #CSC> { + %s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSC> + %0 = linalg.conv_2d + ins (%input, %filter: tensor<8x8xi32, #CSC>, tensor<3x3xi32, #CSC>) + outs (%s: tensor<6x6xi32, #CSC>) -> tensor<6x6xi32, #CSC> + return %0 : tensor<6x6xi32, #CSC> + } + + func.func @entry() { + %c0 = arith.constant 0 : index + %i0 = arith.constant 0 : i32 + + // A typical edge detection filter. + %filter = arith.constant dense<[ + [ 1, 0, -1 ], + [ 0, 0, 0 ], + [ -1, 0, 1 ] + ]> : tensor<3x3xi32> + %sparse_filter_DCSR = sparse_tensor.convert %filter + : tensor<3x3xi32> to tensor<3x3xi32, #DCSR> + %sparse_filter_CSR = sparse_tensor.convert %filter + : tensor<3x3xi32> to tensor<3x3xi32, #CSR> + %sparse_filter_CSC = sparse_tensor.convert %filter + : tensor<3x3xi32> to tensor<3x3xi32, #CSC> + + + %input = arith.constant dense<[ + [ 1, 2, 3, 4, 0, 6, 7, 8 ], + [ 2, 2, 4, 4, 0, 0, 6, 8 ], + [ 2, 2, 4, 4, 0, 0, 6, 8 ], + [ 2, 2, 3, 4, 0, 0, 7, 8 ], + [ 1, 3, 3, 4, 0, 0, 6, 8 ], + [ 3, 2, 3, 4, 0, 0, 7, 8 ], + [ 1, 3, 3, 4, 3, 6, 6, 8 ], + [ 1, 3, 3, 4, 3, 0, 7, 8 ] + ]> : tensor<8x8xi32> + %sparse_input_DCSR = sparse_tensor.convert %input + : tensor<8x8xi32> to tensor<8x8xi32, #DCSR> + %sparse_input_CSR = sparse_tensor.convert %input + : tensor<8x8xi32> to tensor<8x8xi32, #CSR> + %sparse_input_CSC = sparse_tensor.convert %input + : tensor<8x8xi32> to tensor<8x8xi32, #CSC> + + // Call the kernel. + %output = arith.constant dense<0> : tensor<6x6xi32> + %0 = call @conv2d(%input, %sparse_filter_DCSR, %output) + : (tensor<8x8xi32>, + tensor<3x3xi32, #DCSR>, tensor<6x6xi32>) -> tensor<6x6xi32> + %1 = call @conv2d_sparse_out(%input, %sparse_filter_DCSR) + : (tensor<8x8xi32>, + tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> + %2 = call @conv2d_all_sparse_DCSR(%sparse_input_DCSR, %sparse_filter_DCSR) + : (tensor<8x8xi32, #DCSR>, + tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> + %3 = call @conv2d_all_sparse_CSR(%sparse_input_CSR, %sparse_filter_CSR) + : (tensor<8x8xi32, #CSR>, + tensor<3x3xi32, #CSR>) -> tensor<6x6xi32, #CSR> + %4 = call @conv2d_all_sparse_CSC(%sparse_input_CSC, %sparse_filter_CSC) + : (tensor<8x8xi32, #CSC>, + tensor<3x3xi32, #CSC>) -> tensor<6x6xi32, #CSC> + + + // Verify the output. + // + // CHECK: ( ( 0, 0, -1, -6, -1, 6 ), + // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ), + // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ), + // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ), + // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ), + // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) ) + // + %v = vector.transfer_read %0[%c0, %c0], %i0 + : tensor<6x6xi32>, vector<6x6xi32> + vector.print %v : vector<6x6xi32> + + // + // Should be the same as dense output + // CHECK: ( ( 0, 0, -1, -6, -1, 6 ), + // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ), + // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ), + // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ), + // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ), + // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) ) + // + %sparse_ret = sparse_tensor.convert %1 + : tensor<6x6xi32, #DCSR> to tensor<6x6xi32> + %v1 = vector.transfer_read %sparse_ret[%c0, %c0], %i0 + : tensor<6x6xi32>, vector<6x6xi32> + vector.print %v1 : vector<6x6xi32> + + // + // Should be the same as dense output + // CHECK: ( ( 0, 0, -1, -6, -1, 6 ), + // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ), + // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ), + // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ), + // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ), + // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) ) + // + %all_sparse_DCSR = sparse_tensor.convert %2 + : tensor<6x6xi32, #DCSR> to tensor<6x6xi32> + %v2 = vector.transfer_read %all_sparse_DCSR[%c0, %c0], %i0 + : tensor<6x6xi32>, vector<6x6xi32> + vector.print %v2 : vector<6x6xi32> + + // + // Should be the same as dense output + // CHECK: ( ( 0, 0, -1, -6, -1, 6 ), + // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ), + // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ), + // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ), + // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ), + // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) ) + // + %all_sparse_CSR = sparse_tensor.convert %3 + : tensor<6x6xi32, #CSR> to tensor<6x6xi32> + %v3 = vector.transfer_read %all_sparse_CSR[%c0, %c0], %i0 + : tensor<6x6xi32>, vector<6x6xi32> + vector.print %v3 : vector<6x6xi32> + + // + // Should be the same as dense output + // CHECK: ( ( 0, 0, -1, -6, -1, 6 ), + // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ), + // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ), + // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ), + // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ), + // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) ) + // + %all_sparse_CSC = sparse_tensor.convert %4 + : tensor<6x6xi32, #CSC> to tensor<6x6xi32> + %v4 = vector.transfer_read %all_sparse_CSC[%c0, %c0], %i0 + : tensor<6x6xi32>, vector<6x6xi32> + vector.print %v4 : vector<6x6xi32> + + // Release the resources. + bufferization.dealloc_tensor %sparse_filter_DCSR : tensor<3x3xi32, #DCSR> + bufferization.dealloc_tensor %sparse_filter_CSR : tensor<3x3xi32, #CSR> + bufferization.dealloc_tensor %sparse_filter_CSC : tensor<3x3xi32, #CSC> + + bufferization.dealloc_tensor %sparse_input_DCSR : tensor<8x8xi32, #DCSR> + bufferization.dealloc_tensor %sparse_input_CSR : tensor<8x8xi32, #CSR> + bufferization.dealloc_tensor %sparse_input_CSC : tensor<8x8xi32, #CSC> + + bufferization.dealloc_tensor %1 : tensor<6x6xi32, #DCSR> + bufferization.dealloc_tensor %2 : tensor<6x6xi32, #DCSR> + bufferization.dealloc_tensor %3 : tensor<6x6xi32, #CSR> + bufferization.dealloc_tensor %4 : tensor<6x6xi32, #CSC> + return + } +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir @@ -0,0 +1,172 @@ +// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#CCCC = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed", "compressed", "compressed" ] +}> + +#CDCD = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "dense", "compressed", "dense" ] +}> + +// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f +func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> tensor { + %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4) : tensor + %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + return %ret : tensor +} + +func.func @conv_2d_nhwc_hwcf(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + %ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64>} + ins (%arg0, %arg1: tensor, tensor) + outs (%arg2: tensor) -> tensor + return %ret : tensor +} + +func.func @conv_2d_nhwc_hwcf_CCCC(%arg0: tensor, %arg1: tensor) -> tensor { + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %c6 = arith.constant 6 : index + %s = bufferization.alloc_tensor(%c3, %c6, %c6, %c1) : tensor + %ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64>} + ins (%arg0, %arg1: tensor, tensor) + outs (%s: tensor) -> tensor + return %ret : tensor +} + +func.func @conv_2d_nhwc_hwcf_CDCD(%arg0: tensor, %arg1: tensor) -> tensor { + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %c6 = arith.constant 6 : index + %s = bufferization.alloc_tensor(%c3, %c6, %c6, %c1) : tensor + %ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64>} + ins (%arg0, %arg1: tensor, tensor) + outs (%s: tensor) -> tensor + return %ret : tensor +} + +func.func @entry() { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %c6 = arith.constant 6 : index + %c8 = arith.constant 8 : index + %f10 = arith.constant 10.00000e+00 : f32 + %val = arith.constant 2.00000e+00 : f32 + %zero = arith.constant 0.00000e+00 : f32 + + %filter2D_nhwc = call @alloc_4d_filled_f32(%c3, %c3, %c3, %c1, %val) :(index, index, index, index, f32) -> (tensor) + %in2D_tmp = call @alloc_4d_filled_f32(%c3, %c8, %c8, %c3, %val) : (index, index, index, index, f32) -> (tensor) + %in2D_nhwc = tensor.insert %f10 into %in2D_tmp[%c0, %c0, %c3, %c0] : tensor + %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c6, %c6, %c1, %zero) : (index, index, index, index, f32) -> (tensor) + + %in2D_nhwc_CCCC = sparse_tensor.convert %in2D_nhwc + : tensor to tensor + %filter2D_nhwc_CCCC = sparse_tensor.convert %filter2D_nhwc + : tensor to tensor + + %in2D_nhwc_CDCD = sparse_tensor.convert %in2D_nhwc + : tensor to tensor + %filter2D_nhwc_CDCD = sparse_tensor.convert %filter2D_nhwc + : tensor to tensor + + %dense_ret = call @conv_2d_nhwc_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (tensor, tensor, tensor) -> (tensor) + %CCCC_ret = call @conv_2d_nhwc_hwcf_CCCC(%in2D_nhwc_CCCC, %filter2D_nhwc_CCCC) : (tensor, tensor) -> (tensor) + %CDCD_ret = call @conv_2d_nhwc_hwcf_CDCD(%in2D_nhwc_CDCD, %filter2D_nhwc_CDCD) : (tensor, tensor) -> (tensor) + + // CHECK: ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) + %dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0], %zero + : tensor, vector<3x6x6x1xf32> + vector.print %dense_v : vector<3x6x6x1xf32> + + // CHECK: ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) + %1 = sparse_tensor.convert %CCCC_ret + : tensor to tensor + %v1 = vector.transfer_read %1[%c0, %c0, %c0, %c0], %zero + : tensor, vector<3x6x6x1xf32> + vector.print %v1 : vector<3x6x6x1xf32> + + // CHECK: ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) + %2 = sparse_tensor.convert %CDCD_ret + : tensor to tensor + %v2 = vector.transfer_read %2[%c0, %c0, %c0, %c0], %zero + : tensor, vector<3x6x6x1xf32> + vector.print %v2 : vector<3x6x6x1xf32> + + // Free the resources + bufferization.dealloc_tensor %in2D_nhwc : tensor + bufferization.dealloc_tensor %filter2D_nhwc : tensor + bufferization.dealloc_tensor %out2D_nhwc : tensor + + bufferization.dealloc_tensor %in2D_nhwc_CDCD : tensor + bufferization.dealloc_tensor %filter2D_nhwc_CDCD : tensor + bufferization.dealloc_tensor %in2D_nhwc_CCCC : tensor + bufferization.dealloc_tensor %filter2D_nhwc_CCCC : tensor + + bufferization.dealloc_tensor %CCCC_ret : tensor + bufferization.dealloc_tensor %CDCD_ret : tensor + + return +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir @@ -0,0 +1,219 @@ +// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#CCC = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed", "compressed" ] +}> + +#CDC = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "dense", "compressed" ] +}> + +// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f +func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor { + %buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor + %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + return %ret : tensor +} + +func.func @conv_3d(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + %ret = linalg.conv_3d + ins (%arg0, %arg1: tensor, tensor) + outs (%arg2: tensor) -> tensor + return %ret : tensor +} + +func.func @conv_3d_CCC(%arg0: tensor, %arg1: tensor) -> tensor { + %c6 = arith.constant 6 : index + %s = bufferization.alloc_tensor(%c6, %c6, %c6) : tensor + %ret = linalg.conv_3d + ins (%arg0, %arg1: tensor, tensor) + outs (%s: tensor) -> tensor + return %ret : tensor +} + +func.func @conv_3d_CDC(%arg0: tensor, %arg1: tensor) -> tensor { + %c6 = arith.constant 6 : index + %s = bufferization.alloc_tensor(%c6, %c6, %c6) : tensor + %ret = linalg.conv_3d + ins (%arg0, %arg1: tensor, tensor) + outs (%s: tensor) -> tensor + return %ret : tensor +} + +func.func @entry() { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %c6 = arith.constant 6 : index + %c8 = arith.constant 8 : index + %f10 = arith.constant 10.00000e+00 : f32 + %val = arith.constant 2.00000e+00 : f32 + %zero = arith.constant 0.00000e+00 : f32 + + %filter3D = call @alloc_3d_filled_f32(%c3, %c3, %c3, %val) : (index, index, index, f32) -> (tensor) + %in3D_tmp = call @alloc_3d_filled_f32(%c8, %c8, %c8, %val) : (index, index, index, f32) -> (tensor) + %in3D = tensor.insert %f10 into %in3D_tmp[%c0, %c3, %c0] : tensor + %out3D = call @alloc_3d_filled_f32(%c6, %c6, %c6, %zero) : (index, index, index, f32) -> (tensor) + + %in3D_CCC = sparse_tensor.convert %in3D + : tensor to tensor + %filter3D_CCC = sparse_tensor.convert %filter3D + : tensor to tensor + + %in3D_CDC = sparse_tensor.convert %in3D + : tensor to tensor + %filter3D_CDC = sparse_tensor.convert %filter3D + : tensor to tensor + + %dense_ret = call @conv_3d(%in3D, %filter3D, %out3D) : (tensor, tensor, tensor) -> (tensor) + %CCC_ret = call @conv_3d_CCC(%in3D_CCC, %filter3D_CCC) : (tensor, tensor) -> (tensor) + %CDC_ret = call @conv_3d_CDC(%in3D_CDC, %filter3D_CDC) : (tensor, tensor) -> (tensor) + + // CHECK:( ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ) + %dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0], %zero + : tensor, vector<6x6x6xf32> + vector.print %dense_v : vector<6x6x6xf32> + + // CHECK-NEXT:( ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ) + %1 = sparse_tensor.convert %CCC_ret + : tensor to tensor + %v1 = vector.transfer_read %1[%c0, %c0, %c0], %zero + : tensor, vector<6x6x6xf32> + vector.print %v1 : vector<6x6x6xf32> + + // CHECK-NEXT:( ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ), + // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), + // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ) + %2 = sparse_tensor.convert %CCC_ret + : tensor to tensor + %v2 = vector.transfer_read %2[%c0, %c0, %c0], %zero + : tensor, vector<6x6x6xf32> + vector.print %v2 : vector<6x6x6xf32> + + // Free the resources + bufferization.dealloc_tensor %in3D : tensor + bufferization.dealloc_tensor %filter3D : tensor + bufferization.dealloc_tensor %out3D : tensor + + bufferization.dealloc_tensor %in3D_CDC : tensor + bufferization.dealloc_tensor %filter3D_CDC : tensor + bufferization.dealloc_tensor %in3D_CCC : tensor + bufferization.dealloc_tensor %filter3D_CCC : tensor + + bufferization.dealloc_tensor %CCC_ret : tensor + bufferization.dealloc_tensor %CDC_ret : tensor + + return +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir @@ -0,0 +1,239 @@ +// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#CCCCC = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed", "compressed", "compressed", "compressed" ] +}> + +#CDCDC = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "dense", "compressed", "dense", "compressed"] +}> + +// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f +func.func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> tensor { + %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4, %s5) : tensor + %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + return %ret : tensor +} + +func.func @conv_3d_ndhwc_dhwcf(%arg0: tensor, + %arg1: tensor, + %arg2: tensor) -> tensor { + %ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, + strides = dense<1> : tensor<3xi64>} + ins (%arg0, %arg1: tensor, tensor) + outs (%arg2: tensor) -> tensor + return %ret : tensor +} + +func.func @conv_3d_ndhwc_dhwcf_CCCCC(%arg0: tensor, + %arg1: tensor) + -> tensor { + %c1 = arith.constant 1 : index + %c6 = arith.constant 6 : index + %s = bufferization.alloc_tensor(%c1, %c6, %c6, %c6, %c1) + : tensor + %ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, + strides = dense<1> : tensor<3xi64>} + ins (%arg0, %arg1: tensor, tensor) + outs (%s: tensor) -> tensor + return %ret : tensor +} + +func.func @conv_3d_ndhwc_dhwcf_CDCDC(%arg0: tensor, + %arg1: tensor) + -> tensor { + %c1 = arith.constant 1 : index + %c6 = arith.constant 6 : index + %s = bufferization.alloc_tensor(%c1, %c6, %c6, %c6, %c1) + : tensor + %ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, + strides = dense<1> : tensor<3xi64>} + ins (%arg0, %arg1: tensor, tensor) + outs (%s: tensor) -> tensor + return %ret : tensor +} + +func.func @entry() { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %c6 = arith.constant 6 : index + %c8 = arith.constant 8 : index + %f10 = arith.constant 10.00000e+00 : f32 + %val = arith.constant 2.00000e+00 : f32 + %zero = arith.constant 0.00000e+00 : f32 + + %in3D_tmp = call @alloc_5d_filled_f32(%c1, %c8, %c8, %c8, %c1, %val) : (index, index, index, index, index, f32) -> (tensor) + %in3D_ndhwc = tensor.insert %f10 into %in3D_tmp[%c0, %c0, %c0, %c3, %c0] : tensor + + %filter3D_ndhwc = call @alloc_5d_filled_f32(%c3, %c3, %c3, %c1, %c1, %val) : (index, index, index, index, index, f32) -> (tensor) + %out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (tensor) + + %in3D_ndhwc_CCCCC = sparse_tensor.convert %in3D_ndhwc + : tensor to tensor + %filter3D_ndhwc_CCCCC = sparse_tensor.convert %filter3D_ndhwc + : tensor to tensor + + %in3D_ndhwc_CDCDC = sparse_tensor.convert %in3D_ndhwc + : tensor to tensor + %filter3D_ndhwc_CDCDC = sparse_tensor.convert %filter3D_ndhwc + : tensor to tensor + + // CHECK:( ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) ) + %dense_ret = call @conv_3d_ndhwc_dhwcf(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc) + : (tensor, tensor, tensor) -> (tensor) + %dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0, %c0], %zero + : tensor, vector<1x6x6x6x1xf32> + vector.print %dense_v : vector<1x6x6x6x1xf32> + + %CCCCC_ret = call @conv_3d_ndhwc_dhwcf_CCCCC(%in3D_ndhwc_CCCCC, %filter3D_ndhwc_CCCCC) + : (tensor, + tensor) -> (tensor) + + // CHECK-NEXT:( ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) ) + %1 = sparse_tensor.convert %CCCCC_ret + : tensor to tensor + %v1 = vector.transfer_read %1[%c0, %c0, %c0, %c0, %c0], %zero + : tensor, vector<1x6x6x6x1xf32> + vector.print %v1 : vector<1x6x6x6x1xf32> + + %CDCDC_ret = call @conv_3d_ndhwc_dhwcf_CDCDC(%in3D_ndhwc_CDCDC, %filter3D_ndhwc_CDCDC) + : (tensor, + tensor) -> (tensor) + + // CHECK-NEXT:( ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) ) + %2 = sparse_tensor.convert %CDCDC_ret + : tensor to tensor + %v2 = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0, %c0], %zero + : tensor, vector<1x6x6x6x1xf32> + vector.print %v2 : vector<1x6x6x6x1xf32> + + // Free the resources + bufferization.dealloc_tensor %in3D_ndhwc : tensor + bufferization.dealloc_tensor %filter3D_ndhwc : tensor + bufferization.dealloc_tensor %out3D_ndhwc : tensor + + bufferization.dealloc_tensor %in3D_ndhwc_CDCDC : tensor + bufferization.dealloc_tensor %filter3D_ndhwc_CDCDC : tensor + bufferization.dealloc_tensor %in3D_ndhwc_CCCCC : tensor + bufferization.dealloc_tensor %filter3D_ndhwc_CCCCC : tensor + + bufferization.dealloc_tensor %CCCCC_ret : tensor + bufferization.dealloc_tensor %CDCDC_ret : tensor + + return +}