diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -192,7 +192,7 @@ ```mlir sparse_tensor.lex_insert %tensor, %indices, %val - : tensor<1024x1024xf64, #CSR>, memref, f64 + : tensor<1024x1024xf64, #CSR>, memref, memref ``` }]; let assemblyFormat = "$tensor `,` $indices `,` $value attr-dict `:`" diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h --- a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h @@ -88,16 +88,8 @@ kC32 = 10 }; -// This x-macro only specifies the non-complex `V` types, because the ABI -// for complex types has compiler-/architecture-dependent details we need -// to work around. Namely, when a function takes a parameter of C/C++ -// type `complex32` (per se), then there is additional padding that causes -// it not to match the LLVM type `!llvm.struct<(f32, f32)>`. This only -// happens with the `complex32` type itself, not with pointers/arrays -// of complex values. We also exclude `complex64` because it is in -// principle susceptible to analogous ABI issues (even though we haven't -// yet encountered them in practice). -#define FOREVERY_SIMPLEX_V(DO) \ +// This x-macro includes all `V` types. +#define FOREVERY_V(DO) \ DO(F64, double) \ DO(F32, float) \ DO(F16, f16) \ @@ -105,12 +97,7 @@ DO(I64, int64_t) \ DO(I32, int32_t) \ DO(I16, int16_t) \ - DO(I8, int8_t) - -// This x-macro includes all `V` types, for when the aforementioned ABI -// issues don't apply (e.g., because the functions take pointers/arrays). -#define FOREVERY_V(DO) \ - FOREVERY_SIMPLEX_V(DO) \ + DO(I8, int8_t) \ DO(C64, complex64) \ DO(C32, complex32) @@ -195,18 +182,11 @@ /// Coordinate-scheme method for adding a new element. #define DECL_ADDELT(VNAME, V) \ MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_addElt##VNAME( \ - void *coo, V value, StridedMemRefType *iref, \ + void *coo, \ + StridedMemRefType *vref, StridedMemRefType *iref, \ StridedMemRefType *pref); -FOREVERY_SIMPLEX_V(DECL_ADDELT) -DECL_ADDELT(C64, complex64) +FOREVERY_V(DECL_ADDELT) #undef DECL_ADDELT -// Explicitly unpack the `complex32` into a pair of `float` arguments, -// to work around ABI issues. -// TODO: cleaner way to avoid ABI padding problem? -MLIR_CRUNNERUTILS_EXPORT void * -_mlir_ciface_addEltC32(void *coo, float r, float i, - StridedMemRefType *iref, - StridedMemRefType *pref); /// Coordinate-scheme method for getting the next element while iterating. #define DECL_GETNEXT(VNAME, V) \ @@ -219,16 +199,10 @@ /// Tensor-storage method to insert elements in lexicographical index order. #define DECL_LEXINSERT(VNAME, V) \ MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_lexInsert##VNAME( \ - void *tensor, StridedMemRefType *cref, V val); -FOREVERY_SIMPLEX_V(DECL_LEXINSERT) -DECL_LEXINSERT(C64, complex64) + void *tensor, StridedMemRefType *cref, \ + StridedMemRefType *vref); +FOREVERY_V(DECL_LEXINSERT) #undef DECL_LEXINSERT -// Explicitly unpack the `complex32` into a pair of `float` arguments, -// to work around ABI issues. -// TODO: cleaner way to avoid ABI padding problem? -MLIR_CRUNNERUTILS_EXPORT void -_mlir_ciface_lexInsertC32(void *tensor, StridedMemRefType *cref, - float r, float i); /// Tensor-storage method to insert using expansion. #define DECL_EXPINSERT(VNAME, V) \ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -266,11 +266,11 @@ /// In particular, this generates code like the following: /// val = a[i1,..,ik]; /// if val != 0 -/// t->add(val, [i1,..,ik], [p1,..,pk]); +/// t->add(&val, [i1,..,ik], [p1,..,pk]); static void genAddEltCall(OpBuilder &builder, Operation *op, Type eltType, - Value ptr, Value val, Value ind, Value perm) { + Value ptr, Value valPtr, Value ind, Value perm) { SmallString<9> name{"addElt", primaryTypeFunctionSuffix(eltType)}; - SmallVector params{ptr, val, ind, perm}; + SmallVector params{ptr, valPtr, ind, perm}; Type pTp = getOpaquePointerType(builder); createFuncCall(builder, op, name, pTp, params, EmitCInterface::On); } @@ -674,6 +674,7 @@ } } Type eltType = stp.getElementType(); + Value elemPtr = genAllocaScalar(rewriter, loc, eltType); scf::buildLoopNest( rewriter, op.getLoc(), lo, hi, st, {}, [&](OpBuilder &builder, Location loc, ValueRange ivs, @@ -684,7 +685,8 @@ ivs, rank); else val = genIndexAndValueForDense(rewriter, loc, src, ind, ivs); - genAddEltCall(rewriter, op, eltType, coo, val, ind, perm); + builder.create(loc, val, elemPtr); + genAddEltCall(rewriter, op, eltType, coo, elemPtr, ind, perm); return {}; }); // Final call to construct sparse tensor storage. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -56,8 +56,8 @@ highs(numTensors, std::vector(numLoops)), pidxs(numTensors, std::vector(numLoops)), idxs(numTensors, std::vector(numLoops)), redVal(), sparseOut(op), - outerParNest(nest), lexIdx(), expValues(), expFilled(), expAdded(), - expCount(), curVecMask() {} + outerParNest(nest), lexIdx(), lexVal(), expValues(), expFilled(), + expAdded(), expCount(), curVecMask() {} /// Sparsification options. SparsificationOptions options; /// Universal dense indices and upper bounds (by index). The loops array @@ -89,6 +89,7 @@ OpOperand *sparseOut; unsigned outerParNest; Value lexIdx; + Value lexVal; Value expValues; Value expFilled; Value expAdded; @@ -543,6 +544,8 @@ auto dynShape = {ShapedType::kDynamicSize}; auto memTp = MemRefType::get(dynShape, builder.getIndexType()); codegen.lexIdx = builder.create(loc, memTp, rank); + codegen.lexVal = builder.create( + loc, MemRefType::get({}, elementType)); } else { // Annotated sparse tensors. auto dynShape = {ShapedType::kDynamicSize}; @@ -723,7 +726,8 @@ Location loc = op.getLoc(); // Direct insertion in lexicographic index order. if (!codegen.expValues) { - builder.create(loc, t->get(), codegen.lexIdx, rhs); + builder.create(loc, rhs, codegen.lexVal); + builder.create(loc, t->get(), codegen.lexIdx, codegen.lexVal); return; } // Generates insertion code along expanded access pattern. diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -1717,10 +1717,10 @@ #undef IMPL_GETOVERHEAD #define IMPL_ADDELT(VNAME, V) \ - void *_mlir_ciface_addElt##VNAME(void *coo, V value, \ + void *_mlir_ciface_addElt##VNAME(void *coo, StridedMemRefType *vref, \ StridedMemRefType *iref, \ StridedMemRefType *pref) { \ - assert(coo &&iref &&pref); \ + assert(coo &&vref &&iref &&pref); \ assert(iref->strides[0] == 1 && pref->strides[0] == 1); \ assert(iref->sizes[0] == pref->sizes[0]); \ const index_type *indx = iref->data + iref->offset; \ @@ -1729,25 +1729,12 @@ std::vector indices(isize); \ for (uint64_t r = 0; r < isize; r++) \ indices[perm[r]] = indx[r]; \ - static_cast *>(coo)->add(indices, value); \ + V *value = vref->data + vref->offset; \ + static_cast *>(coo)->add(indices, *value); \ return coo; \ } -FOREVERY_SIMPLEX_V(IMPL_ADDELT) -IMPL_ADDELT(C64, complex64) -// Marked static because it's not part of the public API. -// NOTE: the `static` keyword confuses clang-format here, causing -// the strange indentation of the `_mlir_ciface_addEltC32` prototype. -// In C++11 we can add a semicolon after the call to `IMPL_ADDELT` -// and that will correct clang-format. Alas, this file is compiled -// in C++98 mode where that semicolon is illegal (and there's no portable -// macro magic to license a no-op semicolon at the top level). -static IMPL_ADDELT(C32ABI, complex32) +FOREVERY_V(IMPL_ADDELT) #undef IMPL_ADDELT - void *_mlir_ciface_addEltC32(void *coo, float r, float i, - StridedMemRefType *iref, - StridedMemRefType *pref) { - return _mlir_ciface_addEltC32ABI(coo, complex32(r, i), iref, pref); -} #define IMPL_GETNEXT(VNAME, V) \ bool _mlir_ciface_getNext##VNAME(void *coo, \ @@ -1771,25 +1758,18 @@ #undef IMPL_GETNEXT #define IMPL_LEXINSERT(VNAME, V) \ - void _mlir_ciface_lexInsert##VNAME( \ - void *tensor, StridedMemRefType *cref, V val) { \ - assert(tensor &&cref); \ + void _mlir_ciface_lexInsert##VNAME(void *tensor, \ + StridedMemRefType *cref, \ + StridedMemRefType *vref) { \ + assert(tensor &&cref &&vref); \ assert(cref->strides[0] == 1); \ index_type *cursor = cref->data + cref->offset; \ assert(cursor); \ - static_cast(tensor)->lexInsert(cursor, val); \ + V *value = vref->data + vref->offset; \ + static_cast(tensor)->lexInsert(cursor, *value); \ } -FOREVERY_SIMPLEX_V(IMPL_LEXINSERT) -IMPL_LEXINSERT(C64, complex64) -// Marked static because it's not part of the public API. -// NOTE: see the note for `_mlir_ciface_addEltC32ABI` -static IMPL_LEXINSERT(C32ABI, complex32) +FOREVERY_V(IMPL_LEXINSERT) #undef IMPL_LEXINSERT - void _mlir_ciface_lexInsertC32(void *tensor, - StridedMemRefType *cref, - float r, float i) { - _mlir_ciface_lexInsertC32ABI(tensor, cref, complex32(r, i)); -} #define IMPL_EXPINSERT(VNAME, V) \ void _mlir_ciface_expInsert##VNAME( \ diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -190,12 +190,14 @@ // CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) // CHECK: %[[M:.*]] = memref.alloca() : memref<1xindex> // CHECK: %[[T:.*]] = memref.cast %[[M]] : memref<1xindex> to memref +// CHECK: %[[BUF:.*]] = memref.alloca() : memref // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[U]] step %[[C1]] { // CHECK: %[[E:.*]] = tensor.extract %[[A]][%[[I]]] : tensor // CHECK: %[[N:.*]] = arith.cmpi ne, %[[E]], %[[I0]] : i32 // CHECK: scf.if %[[N]] { // CHECK: memref.store %[[I]], %[[M]][%[[C0]]] : memref<1xindex> -// CHECK: call @addEltI32(%[[C]], %[[E]], %[[T]], %[[Z]]) +// CHECK: memref.store %[[E]], %[[BUF]][] : memref +// CHECK: call @addEltI32(%[[C]], %[[BUF]], %[[T]], %[[Z]]) // CHECK: } // CHECK: } // CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) @@ -274,12 +276,14 @@ // CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) // CHECK: %[[M:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[T:.*]] = memref.cast %[[M]] : memref<2xindex> to memref +// CHECK: %[[BUF:.*]] = memref.alloca() : memref // CHECK: scf.for %[[I:.*]] = %[[C0]] to %{{.*}} step %[[C1]] { // CHECK: scf.for %[[J:.*]] = %[[C0]] to %{{.*}} step %[[C1]] { // CHECK: %[[E:.*]] = tensor.extract %[[A]][%[[I]], %[[J]]] : tensor<2x4xf64> // CHECK: memref.store %[[I]], %[[M]][%[[C0]]] : memref<2xindex> // CHECK: memref.store %[[J]], %[[M]][%[[C1]]] : memref<2xindex> -// CHECK: call @addEltF64(%[[C]], %[[E]], %[[T]], %[[Z]]) +// CHECK: memref.store %[[E]], %[[BUF]][] : memref +// CHECK: call @addEltF64(%[[C]], %[[BUF]], %[[T]], %[[Z]]) // CHECK: } // CHECK: } // CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) @@ -306,11 +310,13 @@ // CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) // CHECK: %[[M:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[N:.*]] = memref.cast %[[M]] : memref<2xindex> to memref +// CHECK: %[[BUF:.*]] = memref.alloca() : memref // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C2]] step %[[C1]] { // CHECK: memref.store %{{.*}}, %[[M]][%[[C0]]] : memref<2xindex> // CHECK: memref.store %{{.*}}, %[[M]][%[[C1]]] : memref<2xindex> // CHECK: %[[V:.*]] = tensor.extract %{{.*}}[%[[I]]] : tensor<2xf32> -// CHECK: call @addEltF32(%{{.*}}, %[[V]], %[[N]], %{{.*}}) +// CHECK: memref.store %[[V]], %[[BUF]][] : memref +// CHECK: call @addEltF32(%{{.*}}, %[[BUF]], %[[N]], %{{.*}}) // CHECK: } // CHECK: %[[T:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[FromCOO]], %[[C]]) // CHECK: call @delSparseTensorCOOF32(%[[C]]) @@ -343,6 +349,7 @@ // CHECK: %[[C:.*]] = call @newSparseTensor(%[[X]], %[[Y]], %[[Z]], %{{.*}}, %{{.*}}, %{{.*}}, %[[EmptyCOO]], %[[NP]]) // CHECK: %[[M:.*]] = memref.alloca() : memref<3xindex> // CHECK: %[[N:.*]] = memref.cast %[[M]] : memref<3xindex> to memref +// CHECK: %[[BUF:.*]] = memref.alloca() : memref // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[U1]] step %[[C1]] { // CHECK: scf.for %[[J:.*]] = %[[C0]] to %[[U2]] step %[[C1]] { // CHECK: scf.for %[[K:.*]] = %[[C0]] to %[[U3]] step %[[C1]] { @@ -350,7 +357,8 @@ // CHECK: memref.store %[[I]], %[[M]][%[[C0]]] : memref<3xindex> // CHECK: memref.store %[[J]], %[[M]][%[[C1]]] : memref<3xindex> // CHECK: memref.store %[[K]], %[[M]][%[[C2]]] : memref<3xindex> -// CHECK: call @addEltF64(%[[C]], %[[E]], %[[N]], %[[Z]]) +// CHECK: memref.store %[[E]], %[[BUF]][] : memref +// CHECK: call @addEltF64(%[[C]], %[[BUF]], %[[N]], %[[Z]]) // CHECK: } // CHECK: } // CHECK: } @@ -493,13 +501,13 @@ // CHECK-LABEL: func @sparse_insert( // CHECK-SAME: %[[A:.*]]: !llvm.ptr, // CHECK-SAME: %[[B:.*]]: memref, -// CHECK-SAME: %[[C:.*]]: f32) { -// CHECK: call @lexInsertF32(%[[A]], %[[B]], %[[C]]) : (!llvm.ptr, memref, f32) -> () +// CHECK-SAME: %[[C:.*]]: memref) { +// CHECK: call @lexInsertF32(%[[A]], %[[B]], %[[C]]) : (!llvm.ptr, memref, memref) -> () // CHECK: return func.func @sparse_insert(%arg0: tensor<128xf32, #SparseVector>, %arg1: memref, - %arg2: f32) { - sparse_tensor.lex_insert %arg0, %arg1, %arg2 : tensor<128xf32, #SparseVector>, memref, f32 + %arg2: memref) { + sparse_tensor.lex_insert %arg0, %arg1, %arg2 : tensor<128xf32, #SparseVector>, memref, memref return } diff --git a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir @@ -360,6 +360,7 @@ // CHECK: %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_1]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref // CHECK: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref // CHECK: %[[VAL_8:.*]] = memref.alloca(%[[VAL_2]]) : memref +// CHECK: %[[BUF:.*]] = memref.alloca() : memref // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_1]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_2]] { @@ -374,7 +375,8 @@ // CHECK: %[[VAL_19:.*]] = math.log1p %[[VAL_18]] : f64 // CHECK: %[[VAL_20:.*]] = math.sin %[[VAL_19]] : f64 // CHECK: %[[VAL_21:.*]] = math.tanh %[[VAL_20]] : f64 -// CHECK: sparse_tensor.lex_insert %[[VAL_4]], %[[VAL_8]], %[[VAL_21]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>, memref, f64 +// CHECK: memref.store %[[VAL_21]], %[[BUF]][] : memref +// CHECK: sparse_tensor.lex_insert %[[VAL_4]], %[[VAL_8]], %[[BUF]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>, memref, memref // CHECK: } // CHECK: %[[VAL_22:.*]] = sparse_tensor.load %[[VAL_4]] hasInserts : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> // CHECK: return %[[VAL_22]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> diff --git a/mlir/test/Dialect/SparseTensor/sparse_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_index.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_index.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_index.mlir @@ -82,6 +82,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_2]] : tensor +// CHECK: %[[BUF:.*]] = memref.alloca() : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_1]]] : memref // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_2]]] : memref // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_2]] { @@ -98,7 +99,8 @@ // CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_20]]] : memref // CHECK: %[[VAL_25:.*]] = arith.muli %[[VAL_23]], %[[VAL_24]] : i64 // CHECK: %[[VAL_26:.*]] = arith.muli %[[VAL_22]], %[[VAL_25]] : i64 -// CHECK: sparse_tensor.lex_insert %[[VAL_6]], %[[VAL_12]], %[[VAL_26]] : tensor +// CHECK: sparse_tensor.lex_insert %[[VAL_6]], %[[VAL_12]], %[[BUF]] : tensor> // CHECK: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>> // CHECK: %[[VAL_11:.*]] = memref.alloca(%[[VAL_5]]) : memref +// CHECK: %[[BUF:.*]] = memref.alloca() : memref // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_2]] step %[[VAL_4]] { // CHECK: memref.store %[[VAL_12]], %[[VAL_11]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref @@ -121,7 +122,8 @@ // CHECK: memref.store %[[VAL_17]], %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_16]]] : memref // CHECK: %[[VAL_19:.*]] = arith.mulf %[[VAL_18]], %[[VAL_1]] : f32 -// CHECK: sparse_tensor.lex_insert %[[VAL_7]], %[[VAL_11]], %[[VAL_19]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>> +// CHECK: memref.store %[[VAL_19]], %[[BUF]][] : memref +// CHECK: sparse_tensor.lex_insert %[[VAL_7]], %[[VAL_11]], %[[BUF]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>> // CHECK: } // CHECK: } // CHECK: %[[VAL_20:.*]] = sparse_tensor.load %[[VAL_7]] hasInserts : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>> @@ -175,6 +177,7 @@ // CHECK: %[[VAL_21:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor> to memref // CHECK: %[[VAL_22:.*]] = sparse_tensor.values %[[VAL_1]] : tensor> to memref // CHECK: %[[VAL_23:.*]] = memref.alloca(%[[VAL_4]]) : memref +// CHECK: %[[BUF:.*]] = memref.alloca() : memref // CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_2]]] : memref @@ -255,7 +258,8 @@ // CHECK: %[[VAL_97:.*]] = arith.select %[[VAL_95]], %[[VAL_96]], %[[VAL_78]] : index // CHECK: scf.yield %[[VAL_94]], %[[VAL_97]], %[[VAL_98:.*]] : index, index, i32 // CHECK: } -// CHECK: sparse_tensor.lex_insert %[[VAL_8]], %[[VAL_23]], %[[VAL_99:.*]]#2 : tensor, memref, i32 +// CHECK: memref.store %[[VAL_70]]#2, %[[BUF]][] : memref +// CHECK: sparse_tensor.lex_insert %[[VAL_8]], %[[VAL_23]], %[[BUF]] : tensor, memref, memref // CHECK: } else { // CHECK: } // CHECK: %[[VAL_100:.*]] = arith.cmpi eq, %[[VAL_57]], %[[VAL_60]] : index