diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp @@ -1098,13 +1098,22 @@ if (isDenseCond(loopCondKind) && isAffineIdxCond(loopCondKind)) { bool unReduc = isAffineIdxUnRedCond(loopCondKind); assert(unReduc == !depFullyReduced(tid, lvl)); - auto [size, stride] = sliceMeta[tid][lvl][sliceStack[tid].back().depth]; - assert(stride == 1 && "Not yet implemented"); - hi = size; + unsigned depth = sliceStack[tid].back().depth; + assert(depth >= 1); + // The *next* slice size after reducing the current index variable. + auto [nxSz, nxStride] = sliceMeta[tid][lvl][depth]; + // The *current* stride to reduce the current index variable. + // E.g., for 2 * i, stride = 2. + unsigned stride = sliceMeta[tid][lvl][depth - 1].second; + hi = nxSz; if (unReduc) { // Adjust for loop hi for dense slice-driven loop. hi = SUBI(lvlSizes[tid][lvl], hi); hi = ADDI(hi, C_IDX(1)); + hi = DIVUI(hi, C_IDX(stride)); + } else { + // TODO: dialuted convolution. + assert(nxStride == 1 && "Not yet implemented."); } } std::tie(l, iv) = emitForLoopOverTensorAtLvl(builder, loc, tid, lvl, lo, hi, @@ -1277,8 +1286,11 @@ // slice is strided. if (unReduc) { assert(*info.slicedOnLvl == lvl); + unsigned depth = sliceStack[tid].back().depth; + assert(depth >= 1); + unsigned stride = sliceMeta[tid][lvl][depth - 1].second; // Update the slice information as we enter the new loop. - info.minCrd = info.offset = iv; + info.minCrd = info.offset = MULI(iv, C_IDX(stride)); info.isNonEmpty = constantI1(builder, loc, true); levelReducedDep[tid][lvl]++; } else { diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_strided_conv_2d_nhwc_hwcf.mlir @@ -34,6 +34,10 @@ lvlTypes = [ "compressed", "compressed", "compressed", "compressed" ] }> +#CDCC = #sparse_tensor.encoding<{ + lvlTypes = [ "compressed", "dense", "compressed", "compressed" ] +}> + // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> tensor { %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4) : tensor @@ -57,6 +61,15 @@ return %ret : tensor } +func.func @conv_2d_nhwc_hwcf_CDCC(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + %ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, + strides = dense<2> : tensor<2xi64>} + ins (%arg0, %arg1: tensor, tensor) + outs (%arg2: tensor) -> tensor + return %ret : tensor +} + + func.func @entry() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -74,9 +87,12 @@ %in2D_nhwc_CCCC = sparse_tensor.convert %in2D_nhwc : tensor to tensor + %in2D_nhwc_CDCC = sparse_tensor.convert %in2D_nhwc + : tensor to tensor %dense_ret = call @conv_2d_nhwc_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (tensor, tensor, tensor) -> (tensor) %CCCC_ret = call @conv_2d_nhwc_hwcf_CCCC(%in2D_nhwc_CCCC, %filter2D_nhwc, %out2D_nhwc) : (tensor, tensor, tensor) -> (tensor) + %CDCC_ret = call @conv_2d_nhwc_hwcf_CDCC(%in2D_nhwc_CDCC, %filter2D_nhwc, %out2D_nhwc) : (tensor, tensor, tensor) -> (tensor) // CHECK: ( ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 20 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ), // CHECK-SAME: ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ), @@ -85,18 +101,26 @@ : tensor, vector<3x3x3x1xf32> vector.print %dense_v : vector<3x3x3x1xf32> - // CHECK: ( ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 20 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ), + // CHECK-NEXT: ( ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 20 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ), // CHECK-SAME: ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ), // CHECK-SAME: ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ) ) %v1 = vector.transfer_read %CCCC_ret[%c0, %c0, %c0, %c0], %zero : tensor, vector<3x3x3x1xf32> vector.print %v1 : vector<3x3x3x1xf32> + // CHECK-NEXT: ( ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 20 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ), + // CHECK-SAME: ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ), + // CHECK-SAME: ( ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ), ( ( 0 ), ( 0 ), ( 0 ) ) ) ) + %v2 = vector.transfer_read %CDCC_ret[%c0, %c0, %c0, %c0], %zero + : tensor, vector<3x3x3x1xf32> + vector.print %v1 : vector<3x3x3x1xf32> + // Free the resources bufferization.dealloc_tensor %in2D_nhwc : tensor bufferization.dealloc_tensor %filter2D_nhwc : tensor bufferization.dealloc_tensor %out2D_nhwc : tensor bufferization.dealloc_tensor %in2D_nhwc_CCCC : tensor + bufferization.dealloc_tensor %in2D_nhwc_CDCC : tensor return }