diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -232,6 +232,85 @@ let hasCanonicalizer = 1; } +//===----------------------------------------------------------------------===// +// AllocaOp +//===----------------------------------------------------------------------===// + +def AllocaOp : Std_Op<"alloca"> { + let summary = "stack memory allocation operation"; + let description = [{ + The "alloca" operation allocates memory on the stack, to be automatically + released when the stack frame is discarded. The amount of memory allocated + is specified by its memref and additional operands. For example: + + %0 = alloca() : memref<8x64xf32> + + The optional list of dimension operands are bound to the dynamic dimensions + specified in its memref type. In the example below, the SSA value '%d' is + bound to the second dimension of the memref (which is dynamic). + + %0 = alloca(%d) : memref<8x?xf32> + + The optional list of symbol operands are bound to the symbols of the + memref's affine map. In the example below, the SSA value '%s' is bound to + the symbol 's0' in the affine map specified in the allocs memref type. + + %0 = alloca()[%s] : memref<8x64xf32, (d0, d1)[s0] -> ((d0 + s0), d1)> + + This operation returns a single SSA value of memref type, which can be used + by subsequent load and store operations. An optional alignment attribute, if + specified, guarantees alignment at least to that boundary. If not specified, + an alignment on any convenient boundary compatible with the type will be + chosen. + }]; + + let arguments = (ins Variadic:$operands, + Confined, + [IntMinValue<0>]>:$alignment); + let results = (outs AnyMemRef); + + let builders = [OpBuilder< + "Builder *builder, OperationState &result, MemRefType memrefType, " + "Optional alignment = None", [{ + result.types.push_back(memrefType); + if (alignment) { + auto alignmentAttr = builder->getI64IntegerAttr(alignment.getValue()); + result.addAttribute(getAlignmentAttrName(), alignmentAttr); + } + }]>, + OpBuilder< + "Builder *builder, OperationState &result, MemRefType memrefType, " # + "ArrayRef operands, IntegerAttr alignment = IntegerAttr()", [{ + result.addOperands(operands); + result.types.push_back(memrefType); + if (alignment) + result.addAttribute(getAlignmentAttrName(), alignment); + }]>]; + + let extraClassDeclaration = [{ + static StringRef getAlignmentAttrName() { return "alignment"; } + + MemRefType getType() { return getResult().getType().cast(); } + + /// Returns the number of symbolic operands (the ones in square brackets), + /// which bind to the symbols of the memref's layout map. + unsigned getNumSymbolicOperands() { + return getNumOperands() - getType().getNumDynamicDims(); + } + + /// Returns the symbolic operands (ones in square brackets), which bind + /// to the symbols of the memref's layout map. + operand_range getSymbolicOperands() { + return operands().drop_front(getType().getNumDynamicDims()); + } + + /// Returns the dynamic sizes for this alloca operation if specified. + operand_range getDynamicSizes() { + return operands().take_front(getType().getNumDynamicDims()); + } + }]; +} + //===----------------------------------------------------------------------===// // AndOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -1566,6 +1566,153 @@ bool useAlloca; }; +// An `alloca` is converted into a definition of a memref descriptor value and +// an llvm.alloca to allocate the underlying data buffer. The memref descriptor +// is of the LLVM structure type where the first element is a pointer to the +// (typed) data buffer, and the remaining elements serve to store offset, sizes, +// and strides of the memref using LLVM-converted `index` type. +struct AllocaOpLowering : public LLVMLegalizationPattern { + using LLVMLegalizationPattern::LLVMLegalizationPattern; + + LogicalResult match(Operation *op) const override { + MemRefType type = cast(op).getType(); + if (isSupportedMemRefType(type)) + return success(); + + int64_t offset; + SmallVector strides; + if (failed(getStridesAndOffset(type, strides, offset))) + return failure(); + + // Dynamic strides are ok if they can be deduced from dynamic sizes (which + // is guaranteed if getStridesAndOffset succeeds. Dynamic offset however can + // never be alloc'ed. + if (offset == MemRefType::getDynamicStrideOrOffset()) + return failure(); + + return success(); + } + + void rewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto allocaOp = cast(op); + auto loc = op->getLoc(); + MemRefType type = allocaOp.getType(); + + // Get actual sizes of the memref as values: static sizes are constant + // values and dynamic sizes are passed to 'alloc' as operands. In case of + // zero-dimensional memref, assume a scalar (size 1). + SmallVector sizes; + sizes.reserve(type.getRank()); + unsigned i = 0; + for (int64_t s : type.getShape()) + sizes.push_back(s == -1 ? operands[i++] + : createIndexConstant(rewriter, loc, s)); + if (sizes.empty()) + sizes.push_back(createIndexConstant(rewriter, loc, 1)); + + // Compute the total number of memref elements. + Value cumulativeSize = sizes.front(); + for (unsigned i = 1, e = sizes.size(); i < e; ++i) + cumulativeSize = rewriter.create( + loc, getIndexType(), ArrayRef{cumulativeSize, sizes[i]}); + + // Compute the size of an individual element. This emits the MLIR equivalent + // of the following sizeof(...) implementation in LLVM IR: + // %0 = getelementptr %elementType* null, %indexType 1 + // %1 = ptrtoint %elementType* %0 to %indexType + // which is a common pattern of getting the size of a type in bytes. + auto elementType = type.getElementType(); + auto convertedPtrType = typeConverter.convertType(elementType) + .cast() + .getPointerTo(); + auto nullPtr = rewriter.create(loc, convertedPtrType); + auto one = createIndexConstant(rewriter, loc, 1); + auto gep = rewriter.create(loc, convertedPtrType, + ArrayRef{nullPtr, one}); + auto elementSize = + rewriter.create(loc, getIndexType(), gep); + cumulativeSize = rewriter.create( + loc, getIndexType(), ArrayRef{cumulativeSize, elementSize}); + + Optional alignment; + if (auto attr = allocaOp.alignment()) + alignment = attr.getValue().getSExtValue(); + + // Allocate the underlying buffer and store a pointer to it in the MemRef + // descriptor. + auto structElementType = typeConverter.convertType(elementType); + auto elementPtrType = structElementType.cast().getPointerTo( + type.getMemorySpace()); + Value allocated = rewriter.create( + loc, elementPtrType, cumulativeSize, + alignment.hasValue() ? alignment.getValue() : 0); + + int64_t offset; + SmallVector strides; + auto successStrides = getStridesAndOffset(type, strides, offset); + assert(succeeded(successStrides) && "unexpected non-strided memref"); + (void)successStrides; + assert(offset != MemRefType::getDynamicStrideOrOffset() && + "unexpected dynamic offset"); + + // 0-D memref corner case: they have size 1. + assert(((type.getRank() == 0 && strides.empty() && sizes.size() == 1) || + (strides.size() == sizes.size())) && + "unexpected number of strides"); + + // Create the MemRef descriptor. + auto structType = typeConverter.convertType(type); + auto memRefDescriptor = MemRefDescriptor::undef(rewriter, loc, structType); + + // Field 1: Allocated pointer. + memRefDescriptor.setAllocatedPtr(rewriter, loc, allocated); + + // Field 2: The aligned pointer is the same as the allocated one here since + // the underlying alloca supports alignment. + memRefDescriptor.setAlignedPtr(rewriter, loc, allocated); + + // Field 3: Offset in aligned pointer. + memRefDescriptor.setOffset(rewriter, loc, + createIndexConstant(rewriter, loc, offset)); + + if (type.getRank() == 0) + // No size/stride descriptor in memref, return the descriptor value. + return rewriter.replaceOp(op, {memRefDescriptor}); + + // Store all sizes in the descriptor. Only dynamic sizes are passed in as + // operands to AllocOp. + Value runningStride = nullptr; + // Iterate strides in reverse order, compute runningStride and strideValues. + auto nStrides = strides.size(); + SmallVector strideValues(nStrides, nullptr); + for (auto indexedStride : llvm::enumerate(llvm::reverse(strides))) { + int64_t index = nStrides - 1 - indexedStride.index(); + if (strides[index] == MemRefType::getDynamicStrideOrOffset()) + // Identity layout map is enforced in the match function, so we compute: + // `runningStride *= sizes[index + 1]` + runningStride = runningStride + ? rewriter.create( + op->getLoc(), runningStride, sizes[index + 1]) + : createIndexConstant(rewriter, op->getLoc(), 1); + else + runningStride = + createIndexConstant(rewriter, op->getLoc(), strides[index]); + strideValues[index] = runningStride; + } + // Fill size and stride descriptors in memref. + for (auto indexedSize : llvm::enumerate(sizes)) { + int64_t index = indexedSize.index(); + memRefDescriptor.setSize(rewriter, loc, index, indexedSize.value()); + memRefDescriptor.setStride(rewriter, loc, index, strideValues[index]); + } + + // Return the final value of the descriptor. + rewriter.replaceOp(op, {memRefDescriptor}); + } +}; + + // A CallOp automatically promotes MemRefType to a sequence of alloca/store and // passes the pointer to the MemRef across function boundaries. template @@ -2791,6 +2938,7 @@ AbsFOpLowering, AddFOpLowering, AddIOpLowering, + AllocaOpLowering, AndOpLowering, AtomicCmpXchgOpLowering, AtomicRMWOpLowering, diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -245,8 +245,12 @@ // AllocOp //===----------------------------------------------------------------------===// -static void print(OpAsmPrinter &p, AllocOp op) { - p << "alloc"; +template +static void printAllocLikeOp(OpAsmPrinter &p, AllocLikeOp op, StringRef name) { + static_assert(std::is_same::value || + std::is_same::value, + "applies to only alloc or alloca"); + p << name; // Print dynamic dimension operands. MemRefType type = op.getType(); @@ -256,7 +260,12 @@ p << " : " << type; } -static ParseResult parseAllocOp(OpAsmParser &parser, OperationState &result) { +static void print(OpAsmPrinter &p, AllocOp op) { + printAllocLikeOp(p, op, "alloc"); +} + +static ParseResult parseAllocLikeOp(OpAsmParser &parser, + OperationState &result) { MemRefType type; // Parse the dimension operands and optional symbol operands, followed by a @@ -281,8 +290,16 @@ return success(); } -static LogicalResult verify(AllocOp op) { - auto memRefType = op.getResult().getType().dyn_cast(); +static ParseResult parseAllocOp(OpAsmParser &parser, OperationState &result) { + return parseAllocLikeOp(parser, result); +} + +template +static LogicalResult verify(AllocLikeOp op) { + static_assert(std::is_same::value || + std::is_same::value, + "applies to only alloc or alloca"); + auto memRefType = op.getResult().getType().template dyn_cast(); if (!memRefType) return op.emitOpError("result must be a memref"); @@ -389,6 +406,21 @@ results.insert(context); } +//===----------------------------------------------------------------------===// +// AllocaOp +//===----------------------------------------------------------------------===// + +static void print(OpAsmPrinter &p, AllocaOp op) { + printAllocLikeOp(p, op, "alloca"); +} + +static ParseResult parseAllocaOp(OpAsmParser &parser, OperationState &result) { + return parseAllocLikeOp(parser, result); +} + +// TODO: register canonicalization pattern to fold dynamic shapes into constant +// by reusing SimplifyAllocConst along with an AllocOpLikeInterface. + //===----------------------------------------------------------------------===// // AndOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir --- a/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir @@ -93,6 +93,42 @@ return %0 : memref } +// ----- + +// CHECK-LABEL: func @dynamic_alloca +// CHECK: %[[M:.*]]: !llvm.i64, %[[N:.*]]: !llvm.i64) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { +func @dynamic_alloca(%arg0: index, %arg1: index) -> memref { +// CHECK: %[[num_elems:.*]] = llvm.mul %[[M]], %[[N]] : !llvm.i64 +// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// CHECK-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> +// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// CHECK-NEXT: %[[sz_bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 +// CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[sz_bytes]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> +// CHECK-NEXT: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[allocated]], %{{.*}}[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[allocated]], %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK-NEXT: llvm.insertvalue %[[off]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK-NEXT: %[[st0:.*]] = llvm.mul %{{.*}}, %[[N]] : !llvm.i64 +// CHECK-NEXT: llvm.insertvalue %[[M]], %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[st0]], %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[N]], %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[st1]], %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + %0 = alloca(%arg0, %arg1) : memref + +// Test with explicitly specified alignment. llvm.alloca takes care of the +// alignment. The same pointer is thus used for allocation and aligned +// accesses. +// CHECK: %[[alloca_aligned:.*]] = llvm.alloca %{{.*}} x !llvm.float {alignment = 32 : i64} : (!llvm.i64) -> !llvm<"float*"> +// CHECK: %[[desc:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[desc1:.*]] = llvm.insertvalue %[[alloca_aligned]], %[[desc]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.insertvalue %[[alloca_aligned]], %[[desc1]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + alloca(%arg0, %arg1) {alignment = 32} : memref + return %0 : memref +} + // CHECK-LABEL: func @dynamic_dealloc func @dynamic_dealloc(%arg0: memref) { // CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> diff --git a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir --- a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir @@ -207,6 +207,32 @@ // ----- +// CHECK-LABEL: func @static_alloca() -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { +func @static_alloca() -> memref<32x18xf32> { +// CHECK-NEXT: %[[sz1:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 +// CHECK-NEXT: %[[sz2:.*]] = llvm.mlir.constant(18 : index) : !llvm.i64 +// CHECK-NEXT: %[[num_elems:.*]] = llvm.mul %0, %1 : !llvm.i64 +// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// CHECK-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> +// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// CHECK-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 +// CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[bytes]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> + %0 = alloca() : memref<32x18xf32> + + // Test with explicitly specified alignment. llvm.alloca takes care of the + // alignment. The same pointer is thus used for allocation and aligned + // accesses. + // CHECK: %[[alloca_aligned:.*]] = llvm.alloca %{{.*}} x !llvm.float {alignment = 32 : i64} : (!llvm.i64) -> !llvm<"float*"> + // CHECK: %[[desc:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[desc1:.*]] = llvm.insertvalue %[[alloca_aligned]], %[[desc]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %[[alloca_aligned]], %[[desc1]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + alloca() {alignment = 32} : memref<32x18xf32> + return %0 : memref<32x18xf32> +} + +// ----- + // CHECK-LABEL: func @static_dealloc // BAREPTR-LABEL: func @static_dealloc(%{{.*}}: !llvm<"float*">) { func @static_dealloc(%static: memref<10x8xf32>) { diff --git a/mlir/test/IR/memory-ops.mlir b/mlir/test/IR/memory-ops.mlir --- a/mlir/test/IR/memory-ops.mlir +++ b/mlir/test/IR/memory-ops.mlir @@ -33,6 +33,35 @@ return } +// CHECK-LABEL: func @alloca() { +func @alloca() { +^bb0: + // Test simple alloc. + // CHECK: %0 = alloca() : memref<1024x64xf32, 1> + %0 = alloca() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1> + + %c0 = "std.constant"() {value = 0: index} : () -> index + %c1 = "std.constant"() {value = 1: index} : () -> index + + // Test alloca with dynamic dimensions. + // CHECK: %1 = alloca(%c0, %c1) : memref + %1 = alloca(%c0, %c1) : memref (d0, d1)>, 1> + + // Test alloca with no dynamic dimensions and one symbol. + // CHECK: %2 = alloca()[%c0] : memref<2x4xf32, #map0, 1> + %2 = alloca()[%c0] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1> + + // Test alloca with dynamic dimensions and one symbol. + // CHECK: %3 = alloca(%c1)[%c0] : memref<2x?xf32, #map0, 1> + %3 = alloca(%c1)[%c0] : memref<2x?xf32, affine_map<(d0, d1)[s0] -> (d0 + s0, d1)>, 1> + + // Alloca with no mappings, but with alignment. + // CHECK: %4 = alloca() {alignment = 64 : i64} : memref<2xi32> + %4 = alloca() {alignment = 64} : memref<2 x i32> + + return +} + // CHECK-LABEL: func @dealloc() { func @dealloc() { ^bb0: