diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp @@ -1800,20 +1800,20 @@ pHi = genIndexLoad(builder, loc, positionsBuffers[tid][lvl], ADDI(posits[tid][lvl - 1], c1)); } - // Fills out pIdxBuffer[tid][lvl][0] with [/*memSize =*/4, 0, 0, pHi] + // Fills out pIdxBuffer[tid][lvl][0] with [/*memSize =*/4, 0, pLo, pHi] builder.create(loc, c4, sPtrBuf, c0); // memSize = 4 builder.create(loc, c0, sPtrBuf, c1); // index = 0 builder.create(loc, pLo, sPtrBuf, c2); // pLo builder.create(loc, pHi, sPtrBuf, c3); // pHi - // This is an non empty tensor if 0 < pHi. - Value isNonEmpty = CMPI(ult, c0, pHi); + // This is an non empty tensor if pLo < pHi. + Value isNonEmpty = CMPI(ult, pLo, pHi); // The minimal coord must be at the first on ordered level. // FIXME: Technically we should load the coord only when the slice is // nonempty. though we assume that even on empty sparse tensors, a non-empty // ptr/idx buffer is allocated for each level so it would not cause OOB to // avoid generating a ifOp here. - Value minCrd = genIndexLoad(builder, loc, coordinatesBuffers[tid][0], c0); + Value minCrd = genIndexLoad(builder, loc, coordinatesBuffers[tid][lvl], pLo); // FIXME: We need the relative offset related to the base slice. Value absOffset = offsetFromMinCoord(builder, loc, minCrd, size, isNonEmpty); diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir @@ -38,6 +38,10 @@ lvlTypes = [ "compressed", "dense", "compressed", "dense" ] }> +#DCCD = #sparse_tensor.encoding<{ + lvlTypes = [ "dense", "compressed", "compressed", "dense" ] +}> + // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> tensor { %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4) : tensor @@ -77,6 +81,18 @@ return %ret : tensor } +func.func @conv_2d_nhwc_hwcf_DCCD(%arg0: tensor, %arg1: tensor) -> tensor { + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %c6 = arith.constant 6 : index + %s = bufferization.alloc_tensor(%c3, %c6, %c6, %c1) : tensor + %ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64>} + ins (%arg0, %arg1: tensor, tensor) + outs (%s: tensor) -> tensor + return %ret : tensor +} + func.func @entry() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -96,10 +112,13 @@ : tensor to tensor %in2D_nhwc_CDCD = sparse_tensor.convert %in2D_nhwc : tensor to tensor + %in2D_nhwc_DCCD = sparse_tensor.convert %in2D_nhwc + : tensor to tensor %dense_ret = call @conv_2d_nhwc_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (tensor, tensor, tensor) -> (tensor) %CCCC_ret = call @conv_2d_nhwc_hwcf_CCCC(%in2D_nhwc_CCCC, %filter2D_nhwc) : (tensor, tensor) -> (tensor) %CDCD_ret = call @conv_2d_nhwc_hwcf_CDCD(%in2D_nhwc_CDCD, %filter2D_nhwc) : (tensor, tensor) -> (tensor) + %DCCD_ret = call @conv_2d_nhwc_hwcf_DCCD(%in2D_nhwc_DCCD, %filter2D_nhwc) : (tensor, tensor) -> (tensor) // CHECK: ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ), // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), @@ -171,6 +190,30 @@ : tensor, vector<3x6x6x1xf32> vector.print %v2 : vector<3x6x6x1xf32> + // CHECK: ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ), + // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ), + // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) + %3 = sparse_tensor.convert %DCCD_ret + : tensor to tensor + %v3 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %zero + : tensor, vector<3x6x6x1xf32> + vector.print %v3 : vector<3x6x6x1xf32> + // Free the resources bufferization.dealloc_tensor %in2D_nhwc : tensor bufferization.dealloc_tensor %filter2D_nhwc : tensor @@ -178,9 +221,11 @@ bufferization.dealloc_tensor %in2D_nhwc_CDCD : tensor bufferization.dealloc_tensor %in2D_nhwc_CCCC : tensor + bufferization.dealloc_tensor %in2D_nhwc_DCCD : tensor bufferization.dealloc_tensor %CCCC_ret : tensor bufferization.dealloc_tensor %CDCD_ret : tensor + bufferization.dealloc_tensor %DCCD_ret : tensor return }