diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -196,13 +196,24 @@ return inBoundsCondition; } +// TODO: Parallelism and threadlocal considerations. +static Value setAllocAtFunctionEntry(MemRefType memRefMinorVectorType, + Operation *op) { + auto &b = ScopedContext::getBuilderRef(); + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(&op->getParentOfType().front()); + Value res = + std_alloca(memRefMinorVectorType, ValueRange{}, b.getI64IntegerAttr(128)); + return res; +} + template <> LogicalResult NDTransferOpHelper::doReplace() { Value alloc, result; if (options.unroll) result = std_splat(vectorType, xferOp.padding()); else - alloc = std_alloc(memRefMinorVectorType); + alloc = setAllocAtFunctionEntry(memRefMinorVectorType, op); emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, ValueRange majorOffsets, ValueRange minorOffsets, @@ -297,7 +308,7 @@ LogicalResult NDTransferOpHelper::doReplace() { Value alloc; if (!options.unroll) { - alloc = std_alloc(memRefMinorVectorType); + alloc = setAllocAtFunctionEntry(memRefMinorVectorType, op); std_store(xferOp.vector(), vector_type_cast(MemRefType::get({}, vectorType), alloc)); } diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -232,7 +232,7 @@ %f7 = constant 7.0: f32 // CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32> - // CHECK-DAG: %[[alloc:.*]] = alloc() : memref<3xvector<15xf32>> + // CHECK-DAG: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>> // CHECK-DAG: %[[dim:.*]] = dim %[[A]], 0 : memref // CHECK: affine.for %[[I:.*]] = 0 to 3 { // CHECK: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]] @@ -304,7 +304,7 @@ // FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index, // FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32> func @transfer_write_progressive(%A : memref, %base: index, %vec: vector<3x15xf32>) { - // CHECK: %[[alloc:.*]] = alloc() : memref<3xvector<15xf32>> + // CHECK: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>> // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref> // CHECK: store %[[vec]], %[[vmemref]][] : memref> // CHECK: %[[dim:.*]] = dim %[[A]], 0 : memref @@ -359,7 +359,7 @@ // FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32> func @transfer_write_progressive_not_masked(%A : memref, %base: index, %vec: vector<3x15xf32>) { // CHECK-NOT: scf.if - // CHECK-NEXT: %[[alloc:.*]] = alloc() : memref<3xvector<15xf32>> + // CHECK-NEXT: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>> // CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref> // CHECK-NEXT: store %[[vec]], %[[vmemref]][] : memref> // CHECK-NEXT: affine.for %[[I:.*]] = 0 to 3 {