diff --git a/mlir/include/mlir/Conversion/LLVMCommon/LoweringOptions.h b/mlir/include/mlir/Conversion/LLVMCommon/LoweringOptions.h --- a/mlir/include/mlir/Conversion/LLVMCommon/LoweringOptions.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/LoweringOptions.h @@ -35,6 +35,10 @@ bool useBarePtrCallConv = false; bool emitCWrappers = false; + // Specifies the maximum rank for which the calling convention will realize + // stack-allocated buffers for unranked memory descriptior results. + int64_t maxUnrankedDescBufferRank = 8; + enum class AllocLowering { /// Use malloc for for heap allocations. Malloc, diff --git a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h --- a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h @@ -173,12 +173,12 @@ /// `unpack`. static unsigned getNumUnpackedValues() { return 2; } - /// Builds IR computing the sizes in bytes (suitable for opaque allocation) - /// and appends the corresponding values into `sizes`. - static void computeSizes(OpBuilder &builder, Location loc, - LLVMTypeConverter &typeConverter, - ArrayRef values, - SmallVectorImpl &sizes); + /// Builds IR computing the size in bytes (suitable for opaque allocation). + Value computeSize(OpBuilder &builder, Location loc, + LLVMTypeConverter &typeConverter); + + // Returns the size in bytes (suitable for opaque allocation). + static int64_t getSize(LLVMTypeConverter &typeConverter, int64_t rank); /// TODO: The following accessors don't take alignment rules between elements /// of the descriptor struct into account. For some architectures, it might be diff --git a/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h b/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h --- a/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h @@ -116,14 +116,30 @@ ArrayRef sizes, ArrayRef strides, ConversionPatternRewriter &rewriter) const; - /// Copies the memory descriptor for any operands that were unranked - /// descriptors originally to heap-allocated memory (if toDynamic is true) or - /// to stack-allocated memory (otherwise). Also frees the previously used - /// memory (that is assumed to be heap-allocated) if toDynamic is false. - LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, - TypeRange origTypes, - SmallVectorImpl &operands, - bool toDynamic) const; + /// Ensures that all unranked memory descriptors are on the stack. + /// This concerns the dynamically sized inner descriptors. If their rank is + /// sufficiently small, we know that they reside in stack-allocated buffers + /// already. Otherwise, if they are of a rank greater than the maximum rank + /// for stack-allocated descriptor buffers, they reside on the heap. In this + /// case, we have to copy them over to a newly stack-allocated buffer of the + /// right size and free the previously used buffer on the heap. + void copyUnrankedDescriptorsToStack(ConversionPatternRewriter &rewriter, + Location loc, int64_t maxRankOnStack, + TypeRange origTypes, + SmallVectorImpl &operands) const; + + /// Copies all unranked memory descriptors, using the given buffer arguments + /// or newly heap-allocated memory for the inner descriptors. This is to let + /// unranked memory descriptors escape a function. If their rank is + /// sufficiently small, we assume that their inner descriptor fits into the + /// provided buffer. Otherwise, if they are of a rank greater than the maximum + /// rank for stack-allocated descriptor buffers, we allocate a new buffer on + /// the heap. In both cases, we copy the inner descriptor and create a copy of + /// the unranked outer descriptor. + void copyUnrankedDescriptorsToBufferOrHeap( + ConversionPatternRewriter &rewriter, Location loc, int64_t maxRankOnStack, + TypeRange origTypes, ArrayRef descBuffers, + SmallVectorImpl &operands) const; }; /// Utility class for operation conversions targeting the LLVM dialect that diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -522,6 +522,11 @@ Option<"emitCWrappers", "emit-c-wrappers", "bool", /*default=*/"false", "Emit wrappers for C-compatible pointer-to-struct memref " "descriptors">, + Option<"maxUnrankedDescBufferRank", "max-unranked-desc-buffer-rank", + "int64_t", /*default=*/"8", + "Specifies the maximum rank for which the calling convention will " + "realize stack-allocated buffers for unranked memory descriptior " + "results.">, Option<"indexBitwidth", "index-bitwidth", "unsigned", /*default=kDeriveIndexBitwidthFromDataLayout*/"0", "Bitwidth of the index type, 0 to use size of machine word">, diff --git a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp --- a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp +++ b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp @@ -333,16 +333,11 @@ results.push_back(d.memRefDescPtr(builder, loc)); } -void UnrankedMemRefDescriptor::computeSizes( - OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, - ArrayRef values, SmallVectorImpl &sizes) { - if (values.empty()) - return; +Value UnrankedMemRefDescriptor::computeSize(OpBuilder &builder, Location loc, + LLVMTypeConverter &typeConverter) { - // Cache the index type. + // Get constants. Type indexType = typeConverter.getIndexType(); - - // Initialize shared constants. Value one = createIndexAttrConstant(builder, loc, indexType, 1); Value two = createIndexAttrConstant(builder, loc, indexType, 2); Value pointerSize = createIndexAttrConstant( @@ -351,31 +346,35 @@ createIndexAttrConstant(builder, loc, indexType, ceilDiv(typeConverter.getIndexTypeBitwidth(), 8)); - sizes.reserve(sizes.size() + values.size()); - for (UnrankedMemRefDescriptor desc : values) { - // Emit IR computing the memory necessary to store the descriptor. This - // assumes the descriptor to be - // { type*, type*, index, index[rank], index[rank] } - // and densely packed, so the total size is - // 2 * sizeof(pointer) + (1 + 2 * rank) * sizeof(index). - // TODO: consider including the actual size (including eventual padding due - // to data layout) into the unranked descriptor. - Value doublePointerSize = - builder.create(loc, indexType, two, pointerSize); - - // (1 + 2 * rank) * sizeof(index) - Value rank = desc.rank(builder, loc); - Value doubleRank = builder.create(loc, indexType, two, rank); - Value doubleRankIncremented = - builder.create(loc, indexType, doubleRank, one); - Value rankIndexSize = builder.create( - loc, indexType, doubleRankIncremented, indexSize); - - // Total allocation size. - Value allocationSize = builder.create( - loc, indexType, doublePointerSize, rankIndexSize); - sizes.push_back(allocationSize); - } + // Emit IR computing the memory necessary to store the descriptor. This + // assumes the descriptor to be + // { type*, type*, index, index[rank], index[rank] } + // and densely packed, so the total size is + // 2 * sizeof(pointer) + (1 + 2 * rank) * sizeof(index). + // TODO: consider including the actual size (including eventual padding due + // to data layout) into the unranked descriptor. + + // 2 * sizeof(pointer) + Value doublePointerSize = + builder.create(loc, indexType, two, pointerSize); + + // (1 + 2 * rank) * sizeof(index) + Value rank = this->rank(builder, loc); + Value doubleRank = builder.create(loc, indexType, two, rank); + Value doubleRankIncremented = + builder.create(loc, indexType, doubleRank, one); + Value rankIndexSize = builder.create( + loc, indexType, doubleRankIncremented, indexSize); + + return builder.create(loc, indexType, doublePointerSize, + rankIndexSize); +} + +int64_t UnrankedMemRefDescriptor::getSize(LLVMTypeConverter &typeConverter, + int64_t rank) { + int64_t ptrSize = ceilDiv(typeConverter.getPointerBitwidth(), 8); + int64_t indexSize = ceilDiv(typeConverter.getIndexTypeBitwidth(), 8); + return 2 * ptrSize + (1 + 2 * rank) * indexSize; } Value UnrankedMemRefDescriptor::allocatedPtr(OpBuilder &builder, Location loc, diff --git a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp --- a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp +++ b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp @@ -225,81 +225,176 @@ return memRefDescriptor; } -LogicalResult ConvertToLLVMPattern::copyUnrankedDescriptors( - OpBuilder &builder, Location loc, TypeRange origTypes, - SmallVectorImpl &operands, bool toDynamic) const { - assert(origTypes.size() == operands.size() && - "expected as may original types as operands"); - - // Find operands of unranked memref type and store them. - SmallVector unrankedMemrefs; - for (unsigned i = 0, e = operands.size(); i < e; ++i) - if (origTypes[i].isa()) - unrankedMemrefs.emplace_back(operands[i]); - - if (unrankedMemrefs.empty()) - return success(); - - // Compute allocation sizes. - SmallVector sizes; - UnrankedMemRefDescriptor::computeSizes(builder, loc, *getTypeConverter(), - unrankedMemrefs, sizes); - - // Get frequently used types. - MLIRContext *context = builder.getContext(); - Type voidPtrType = LLVM::LLVMPointerType::get(IntegerType::get(context, 8)); - auto i1Type = IntegerType::get(context, 1); - Type indexType = getTypeConverter()->getIndexType(); - - // Find the malloc and free, or declare them if necessary. - auto module = builder.getInsertionPoint()->getParentOfType(); - LLVM::LLVMFuncOp freeFunc, mallocFunc; - if (toDynamic) - mallocFunc = LLVM::lookupOrCreateMallocFn(module, indexType); - if (!toDynamic) - freeFunc = LLVM::lookupOrCreateFreeFn(module); - - // Initialize shared constants. - Value zero = - builder.create(loc, i1Type, builder.getBoolAttr(false)); - - unsigned unrankedMemrefPos = 0; - for (unsigned i = 0, e = operands.size(); i < e; ++i) { - Type type = origTypes[i]; - if (!type.isa()) +void ConvertToLLVMPattern::copyUnrankedDescriptorsToStack( + ConversionPatternRewriter &rewriter, Location loc, int64_t maxRankOnStack, + TypeRange origTypes, SmallVectorImpl &operands) const { + + // Check if there is any unranked operand to avoid shared constants. + if (llvm::none_of(origTypes, + [](Type ty) { return ty.isa(); })) { + return; + } + + OpBuilder::InsertionGuard guard(rewriter); + + // Find the free function. + auto module = rewriter.getInsertionPoint()->getParentOfType(); + LLVM::LLVMFuncOp freeFunc = LLVM::lookupOrCreateFreeFn(module); + + // Get common types and constants. + Type voidPtrTy = this->getVoidPtrType(); + Type i1Ty = rewriter.getI1Type(); + Value maxRankOnStackCst = rewriter.create( + loc, rewriter.getI64Type(), rewriter.getI64IntegerAttr(maxRankOnStack)); + + for (unsigned i = 0; i < operands.size(); i++) { + + // Only copy unranked descriptors. + if (!origTypes[i].isa()) continue; - Value allocationSize = sizes[unrankedMemrefPos++]; - UnrankedMemRefDescriptor desc(operands[i]); - // Allocate memory, copy, and free the source if necessary. - Value memory = - toDynamic - ? builder.create(loc, mallocFunc, allocationSize) - .getResult(0) - : builder.create(loc, voidPtrType, allocationSize, - /*alignment=*/0); - Value source = desc.memRefDescPtr(builder, loc); - builder.create(loc, memory, source, allocationSize, zero); - if (!toDynamic) - builder.create(loc, freeFunc, source); + // Split the block to insert descriptor copying logic. + Block *origBlock = rewriter.getBlock(); + Block *continuationBlock = + rewriter.splitBlock(origBlock, rewriter.getInsertionPoint()); + Type descTy = getTypeConverter()->convertType(origTypes[i]); + continuationBlock->addArgument(descTy); + + // Generate the block for large ranks. + // This is the case in which we expect the inner descriptor in dynamic + // memory. We copy it to stack-allocated memory and free the original + // inner descriptor before creating the outer descriptor copy. + Block *largeRankBlock = rewriter.createBlock(origBlock->getParent()); + + // Copy inner descriptor to stack. + UnrankedMemRefDescriptor desc(operands[i]); + Value allocationSize = desc.computeSize(rewriter, loc, *getTypeConverter()); + Value innerDescCpy = rewriter.create( + loc, voidPtrTy, allocationSize, /*alignment=*/0); + Value innerDesc = desc.memRefDescPtr(rewriter, loc); + Value zero = rewriter.create(loc, i1Ty, + rewriter.getBoolAttr(false)); + rewriter.create(loc, innerDescCpy, innerDesc, + allocationSize, zero); + rewriter.create(loc, freeFunc, innerDesc); // Create a new descriptor. The same descriptor can be returned multiple // times, attempting to modify its pointer can lead to memory leaks // (allocated twice and overwritten) or double frees (the caller does not // know if the descriptor points to the same memory). - Type descriptorType = getTypeConverter()->convertType(type); - if (!descriptorType) - return failure(); - auto updatedDesc = - UnrankedMemRefDescriptor::undef(builder, loc, descriptorType); - Value rank = desc.rank(builder, loc); - updatedDesc.setRank(builder, loc, rank); - updatedDesc.setMemRefDescPtr(builder, loc, memory); + auto descCpy = UnrankedMemRefDescriptor::undef(rewriter, loc, descTy); + descCpy.setRank(rewriter, loc, desc.rank(rewriter, loc)); + descCpy.setMemRefDescPtr(rewriter, loc, innerDescCpy); + + // Propagate the new descriptor. + rewriter.create(loc, Value(descCpy), continuationBlock); + + // Generate the condition to decide if the inner descriptor is already on + // the stack (for small ranks) or if we have to copy it over (for large + // ranks). + rewriter.setInsertionPointToEnd(origBlock); + Value rank = desc.rank(rewriter, loc); + Value pred = rewriter.create(loc, LLVM::ICmpPredicate::ule, + rank, maxRankOnStackCst); + rewriter.create(loc, pred, continuationBlock, operands[i], + largeRankBlock, ValueRange{}); + + // Continue with the original descriptor or its on-stack copy, which are + // passed as a block argument. + rewriter.setInsertionPointToStart(continuationBlock); + operands[i] = continuationBlock->getArgument(0); + } +} + +void ConvertToLLVMPattern::copyUnrankedDescriptorsToBufferOrHeap( + ConversionPatternRewriter &rewriter, Location loc, int64_t maxRankOnStack, + TypeRange origTypes, ArrayRef bufferDesc, + SmallVectorImpl &operands) const { - operands[i] = updatedDesc; + // Check if there is any unranked operand to avoid shared constants. + if (llvm::none_of(origTypes, + [](Type ty) { return ty.isa(); })) { + return; } - return success(); + OpBuilder::InsertionGuard guard(rewriter); + + // Get common types and constants. + Type indexTy = getTypeConverter()->getIndexType(); + Type voidPtrTy = LLVM::LLVMPointerType::get(rewriter.getI8Type()); + Type i1Ty = rewriter.getI1Type(); + Value maxRankOnStackCst = rewriter.create( + loc, rewriter.getI64Type(), rewriter.getI64IntegerAttr(maxRankOnStack)); + + // Find the malloc function. + auto module = rewriter.getInsertionPoint()->getParentOfType(); + LLVM::LLVMFuncOp mallocFunc = LLVM::lookupOrCreateMallocFn(module, indexTy); + + unsigned nextBuffer = 0; + for (unsigned i = 0; i < operands.size(); i++) { + + // Only copy unranked descriptors. + if (!origTypes[i].isa()) + continue; + + // Compute the size of the inner descriptor for allocation and copying. + UnrankedMemRefDescriptor desc(operands[i]); + Value allocationSize = desc.computeSize(rewriter, loc, *getTypeConverter()); + + // Split the block to insert descriptor copying logic. + Block *origBlock = rewriter.getBlock(); + Block *continuationBlock = + rewriter.splitBlock(origBlock, rewriter.getInsertionPoint()); + continuationBlock->addArgument(voidPtrTy); + + // Generate the block for small ranks. + // This is the case in which we can copy the inner descriptor to the + // available buffer. + Block *smallRankBlock = rewriter.createBlock(origBlock->getParent()); + Value buffer = bufferDesc[nextBuffer++]; + rewriter.create(loc, buffer, continuationBlock); + + // Generate the block for large ranks. + // This is the case in which we copy the inner descriptor to heap-allocated + // memory as the available buffer is too small. + Block *largeRankBlock = rewriter.createBlock(origBlock->getParent()); + Value newBuffer = + rewriter.create(loc, mallocFunc, allocationSize) + .getResult(0); + rewriter.create(loc, newBuffer, continuationBlock); + + // Generate the condition to decide if the inner descriptor can be copied to + // the available buffer (for small ranks) or if we need a bigger one (for + // large ranks). + rewriter.setInsertionPointToEnd(origBlock); + Value rank = desc.rank(rewriter, loc); + Value pred = rewriter.create(loc, LLVM::ICmpPredicate::ule, + rank, maxRankOnStackCst); + rewriter.create(loc, pred, smallRankBlock, largeRankBlock); + + // Continue with the selected buffer for the inner descriptor copy, which is + // passed as a block argument. + rewriter.setInsertionPointToStart(continuationBlock); + Value innerDescCpy = continuationBlock->getArgument(0); + + // Copy the inner descriptor to the new buffer. + Value innerDesc = desc.memRefDescPtr(rewriter, loc); + Value zero = rewriter.create(loc, i1Ty, + rewriter.getBoolAttr(false)); + rewriter.create(loc, innerDescCpy, innerDesc, + allocationSize, zero); + + // Create a new descriptor. The same descriptor can be returned multiple + // times, attempting to modify its pointer can lead to memory leaks + // (allocated twice and overwritten) or double frees (the caller does not + // know if the descriptor points to the same memory). + Type descTy = getTypeConverter()->convertType(origTypes[i]); + auto descCpy = UnrankedMemRefDescriptor::undef(rewriter, loc, descTy); + descCpy.setRank(rewriter, loc, rank); + descCpy.setMemRefDescPtr(rewriter, loc, innerDescCpy); + + operands[i] = descCpy; + } } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp --- a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp +++ b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp @@ -128,10 +128,14 @@ return LLVM::LLVMPointerType::get(converted); } -// Function types are converted to LLVM Function types by recursively converting -// argument and result types. If MLIR Function has zero results, the LLVM -// Function has one VoidType result. If MLIR Function has more than one result, -// they are into an LLVM StructType in their order of appearance. +// Function types are converted to LLVM function types by elementwise converting +// argument and result types. If the MLIR function has zero results, the LLVM +// function has one VoidType result. If the MLIR function has more than one +// result, they are packed into an LLVM StructType in their order of appearance. +// For every unranked memref result of the MLIR function, the LLVM function +// expects one preceeding buffer argument. These are used to avoid dynamic +// memory allocation for the inner descriptors if their rank is suffiently small +// (see option max-unranked-desc-buffer-rank). Type LLVMTypeConverter::convertFunctionSignature( FunctionType funcTy, bool isVariadic, LLVMTypeConverter::SignatureConversion &result) { @@ -150,6 +154,16 @@ SmallVector argTypes; argTypes.reserve(llvm::size(result.getConvertedTypes())); + + // Add one void ptr per unranked result. These are used to pass buffers for + // the inner descriptors. + auto voidPtrTy = + LLVM::LLVMPointerType::get(IntegerType::get(&getContext(), 8)); + for (Type ty : funcTy.getResults()) { + if (ty.isa()) + argTypes.push_back(voidPtrTy); + } + for (Type type : result.getConvertedTypes()) argTypes.push_back(type); diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp --- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp @@ -893,11 +893,10 @@ auto targetDesc = UnrankedMemRefDescriptor::undef( rewriter, loc, typeConverter->convertType(targetType)); targetDesc.setRank(rewriter, loc, resultRank); - SmallVector sizes; - UnrankedMemRefDescriptor::computeSizes(rewriter, loc, *getTypeConverter(), - targetDesc, sizes); + Value allocationSize = + targetDesc.computeSize(rewriter, loc, *getTypeConverter()); Value underlyingDescPtr = rewriter.create( - loc, getVoidPtrType(), sizes.front(), llvm::None); + loc, getVoidPtrType(), allocationSize, llvm::None); targetDesc.setMemRefDescPtr(rewriter, loc, underlyingDescPtr); // Extract pointers and offset from the source memref. diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -41,7 +41,6 @@ #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormatVariadic.h" -#include using namespace mlir; @@ -90,7 +89,38 @@ OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(wrapperFuncOp.addEntryBlock()); + // If any of the results is an unranked descriptor, extract the pre-allocated + // buffers form the result prts and pass them on as individual preceeding + // arguments. SmallVector args; + if (resultIsNowArg) { + Value resultPtr = wrapperFuncOp.getArgument(0); + if (type.getNumResults() == 1 && + type.getResults().front().isa()) { + Value loaded = rewriter.create(loc, resultPtr); + UnrankedMemRefDescriptor unrankedDescr(loaded); + Value innerDescrPtr = unrankedDescr.memRefDescPtr(rewriter, loc); + args.push_back(innerDescrPtr); + } else if (type.getNumResults() > 1 && + llvm::any_of(type.getResults(), [](Type ty) { + return ty.isa(); + })) { + Value loaded = rewriter.create(loc, resultPtr); + for (auto it : llvm::enumerate(type.getResults())) { + if (it.value().isa()) { + Type resultTy = loaded.getType() + .cast() + .getBody()[it.index()]; + Value loadedResult = rewriter.create( + loc, resultTy, loaded, rewriter.getI64ArrayAttr(it.index())); + UnrankedMemRefDescriptor unrankedDescr(loadedResult); + Value innerDescrPtr = unrankedDescr.memRefDescPtr(rewriter, loc); + args.push_back(innerDescrPtr); + } + } + } + } + size_t argOffset = resultIsNowArg ? 1 : 0; for (auto &en : llvm::enumerate(type.getInputs())) { Value arg = wrapperFuncOp.getArgument(en.index() + argOffset); @@ -119,15 +149,15 @@ } } -/// Creates an auxiliary function with pointer-to-memref-descriptor-struct -/// arguments instead of unpacked arguments. Creates a body for the (external) -/// `newFuncOp` that allocates a memref descriptor on stack, packs the -/// individual arguments into this descriptor and passes a pointer to it into -/// the auxiliary function. If the result of the function cannot be directly -/// returned, we write it to a special first argument that provides a pointer -/// to a corresponding struct. This auxiliary external function is now -/// compatible with functions defined in C using pointers to C structs -/// corresponding to a memref descriptor. +/// Creates an auxiliary function declaration with +/// pointer-to-memref-descriptor-struct arguments instead of unpacked arguments. +/// Creates a body for the (external) `newFuncOp` that allocates a memref +/// descriptor on stack, packs the individual arguments into this descriptor and +/// passes a pointer to it into the auxiliary function. If the result of the +/// function cannot be directly returned, we write it to a special first +/// argument that provides a pointer to a corresponding struct. This auxiliary +/// external function is now compatible with functions defined in C using +/// pointers to C structs corresponding to a memref descriptor. static void wrapExternalFunction(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, FuncOp funcOp, LLVM::LLVMFuncOp newFuncOp) { @@ -157,21 +187,54 @@ FunctionType type = funcOp.getType(); SmallVector args; args.reserve(type.getNumInputs()); - ValueRange wrapperArgsRange(newFuncOp.getArguments()); + + // Count the number of unranked results, which require special treatment. + int numUnrankedResults = llvm::count_if( + type.getResults(), [](Type ty) { return ty.isa(); }); if (resultIsNowArg) { + // Allocate the struct on the stack and pass the pointer. - Type resultType = + auto resultPtrTy = wrapperType.cast().getParamType(0); Value one = builder.create( loc, typeConverter.convertType(builder.getIndexType()), builder.getIntegerAttr(builder.getIndexType(), 1)); - Value result = builder.create(loc, resultType, one); - args.push_back(result); + Value resultPtr = builder.create(loc, resultPtrTy, one); + args.push_back(resultPtr); + + // If any of the results is an unranked descriptor, populate the + // pre-allocated result with the descriptor buffers that were passed as + // function arguments. + if (type.getNumResults() == 1 && + type.getResults().front().isa()) { + auto desc = UnrankedMemRefDescriptor::undef( + builder, loc, newFuncOp.getType().getReturnType()); + Value buffer = newFuncOp.getArgument(0); + desc.setMemRefDescPtr(builder, loc, buffer); + builder.create(loc, desc, resultPtr); + } else if (type.getNumResults() > 1 && numUnrankedResults > 0) { + int bufferIdx = 0; + Type resultTy = newFuncOp.getType().getReturnType(); + Value result = builder.create(loc, resultTy); + for (auto it : llvm::enumerate(type.getResults())) { + if (auto unrankedMemRefTy = it.value().dyn_cast()) { + Type descTy = typeConverter.convertType(unrankedMemRefTy); + auto desc = UnrankedMemRefDescriptor::undef(builder, loc, descTy); + Value buffer = newFuncOp.getArgument(bufferIdx++); + desc.setMemRefDescPtr(builder, loc, buffer); + result = builder.create( + loc, resultTy, result, desc, builder.getI64ArrayAttr(it.index())); + } + } + builder.create(loc, result, resultPtr); + } } // Iterate over the inputs of the original function and pack values into // memref descriptors if the original type is a memref. + ValueRange wrapperArgsRange( + newFuncOp.getArguments().drop_front(numUnrankedResults)); for (auto &en : llvm::enumerate(type.getInputs())) { Value arg; int numToDrop = 1; @@ -281,8 +344,21 @@ rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(), newFuncOp.end()); if (failed(rewriter.convertRegionTypes(&newFuncOp.getBody(), *typeConverter, - &result))) + &result))) { return nullptr; + } + + // For every unranked result, add a preceeding void ptr argument to pass the + // descriptor buffer. + if (!newFuncOp.getBody().empty()) { + auto loc = funcOp.getLoc(); + Block &entryBlock = newFuncOp.getBody().front(); + auto voidPtrTy = getVoidPtrType(); + for (Type ty : funcOp.getType().getResults()) { + if (ty.isa()) + entryBlock.insertArgument(static_cast(0), voidPtrTy, loc); + } + } return newFuncOp; } @@ -305,12 +381,13 @@ if (getTypeConverter()->getOptions().emitCWrappers || funcOp->getAttrOfType(kEmitIfaceAttrName)) { - if (newFuncOp.isExternal()) + if (newFuncOp.isExternal()) { wrapExternalFunction(rewriter, funcOp.getLoc(), *getTypeConverter(), funcOp, newFuncOp); - else + } else { wrapForExternalCallers(rewriter, funcOp.getLoc(), *getTypeConverter(), funcOp, newFuncOp); + } } rewriter.eraseOp(funcOp); @@ -526,23 +603,45 @@ LogicalResult matchAndRewrite(CallOpType callOp, typename CallOpType::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { + auto &typeConverter = *this->getTypeConverter(); + int64_t maxUnrankedDescBufferRank = + typeConverter.getOptions().maxUnrankedDescBufferRank; + // Pack the result types into a struct. Type packedResult = nullptr; unsigned numResults = callOp.getNumResults(); auto resultTypes = llvm::to_vector<4>(callOp.getResultTypes()); if (numResults != 0) { - if (!(packedResult = - this->getTypeConverter()->packFunctionResults(resultTypes))) + if (!(packedResult = typeConverter.packFunctionResults(resultTypes))) return failure(); } - auto promoted = this->getTypeConverter()->promoteOperands( + SmallVector args; + + // Create and pass a stack-allocated buffer for every unranked result. + int numUnrankedResults = + llvm::count_if(callOp.getResultTypes(), + [](Type ty) { return ty.isa(); }); + if (numUnrankedResults > 0) { + auto loc = callOp.getLoc(); + Value bufferSize = this->createIndexConstant( + rewriter, loc, + UnrankedMemRefDescriptor::getSize(typeConverter, + maxUnrankedDescBufferRank)); + for (int i = 0; i < numUnrankedResults; i++) { + args.push_back(rewriter.create( + callOp.getLoc(), this->getVoidPtrType(), bufferSize)); + } + } + + auto promoted = typeConverter.promoteOperands( callOp.getLoc(), /*opOperands=*/callOp->getOperands(), adaptor.getOperands(), rewriter); + args.append(promoted.begin(), promoted.end()); auto newOp = rewriter.create( callOp.getLoc(), packedResult ? TypeRange(packedResult) : TypeRange(), - promoted, callOp->getAttrs()); + args, callOp->getAttrs()); SmallVector results; if (numResults < 2) { @@ -553,25 +652,24 @@ // Extract individual results from the structure and return them as list. results.reserve(numResults); for (unsigned i = 0; i < numResults; ++i) { - auto type = - this->typeConverter->convertType(callOp.getResult(i).getType()); + auto type = typeConverter.convertType(callOp.getResult(i).getType()); results.push_back(rewriter.create( callOp.getLoc(), type, newOp->getResult(0), rewriter.getI64ArrayAttr(i))); } } - if (this->getTypeConverter()->getOptions().useBarePtrCallConv) { + if (typeConverter.getOptions().useBarePtrCallConv) { // For the bare-ptr calling convention, promote memref results to // descriptors. assert(results.size() == resultTypes.size() && "The number of arguments and types doesn't match"); - this->getTypeConverter()->promoteBarePtrsToDescriptors( - rewriter, callOp.getLoc(), resultTypes, results); - } else if (failed(this->copyUnrankedDescriptors(rewriter, callOp.getLoc(), - resultTypes, results, - /*toDynamic=*/false))) { - return failure(); + typeConverter.promoteBarePtrsToDescriptors(rewriter, callOp.getLoc(), + resultTypes, results); + } else { + this->copyUnrankedDescriptorsToStack(rewriter, callOp.getLoc(), + maxUnrankedDescBufferRank, + resultTypes, results); } rewriter.replaceOp(callOp, results); @@ -813,9 +911,14 @@ } } else { updatedOperands = llvm::to_vector<4>(adaptor.getOperands()); - (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(), - updatedOperands, - /*toDynamic=*/true); + + auto funcOp = op->getParentOfType(); + auto descBuffers = llvm::to_vector<8>(llvm::map_range( + funcOp.getArguments(), [](BlockArgument a) { return Value(a); })); + copyUnrankedDescriptorsToBufferOrHeap( + rewriter, loc, + getTypeConverter()->getOptions().maxUnrankedDescBufferRank, + op.getOperands().getTypes(), descBuffers, updatedOperands); } // If ReturnOp has 0 or 1 operand, create it and return immediately. @@ -1190,10 +1293,11 @@ struct LLVMLoweringPass : public ConvertStandardToLLVMBase { LLVMLoweringPass() = default; LLVMLoweringPass(bool useBarePtrCallConv, bool emitCWrappers, - unsigned indexBitwidth, bool useAlignedAlloc, - const llvm::DataLayout &dataLayout) { + int64_t maxUnrankedDescBufferRank, unsigned indexBitwidth, + bool useAlignedAlloc, const llvm::DataLayout &dataLayout) { this->useBarePtrCallConv = useBarePtrCallConv; this->emitCWrappers = emitCWrappers; + this->maxUnrankedDescBufferRank = maxUnrankedDescBufferRank; this->indexBitwidth = indexBitwidth; this->dataLayout = dataLayout.getStringRepresentation(); } @@ -1222,6 +1326,7 @@ dataLayoutAnalysis.getAtOrAbove(m)); options.useBarePtrCallConv = useBarePtrCallConv; options.emitCWrappers = emitCWrappers; + options.maxUnrankedDescBufferRank = maxUnrankedDescBufferRank; if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout) options.overrideIndexBitwidth(indexBitwidth); options.dataLayout = llvm::DataLayout(this->dataLayout); @@ -1257,5 +1362,6 @@ (allocLowering == LowerToLLVMOptions::AllocLowering::AlignedAlloc); return std::make_unique( options.useBarePtrCallConv, options.emitCWrappers, - options.getIndexBitwidth(), useAlignedAlloc, options.dataLayout); + options.maxUnrankedDescBufferRank, options.getIndexBitwidth(), + useAlignedAlloc, options.dataLayout); } diff --git a/mlir/test/Conversion/StandardToLLVM/calling-convention-external-c-function-callee.mlir b/mlir/test/Conversion/StandardToLLVM/calling-convention-external-c-function-callee.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Conversion/StandardToLLVM/calling-convention-external-c-function-callee.mlir @@ -0,0 +1,291 @@ +// RUN: mlir-opt %s \ +// RUN: --convert-memref-to-llvm \ +// RUN: --convert-std-to-llvm='max-unranked-desc-buffer-rank=5' | FileCheck %s + +func private @external_no_result(%arg0 : memref) + attributes { llvm.emit_c_interface } + +// CHECK-LABEL: llvm.func @external_no_result +// CHECK-SAME: %[[ALLOC:.*]]: !llvm.ptr, %[[ALIGN:.*]]: !llvm.ptr, %[[OFFSET:.*]]: i64, %[[SIZE0:.*]]: i64, %[[SIZE1:.*]]: i64, %[[STRIDE0:.*]]: i64, %[[STRIDE1:.*]]: i64 + +// Populate the descriptor for arg0. +// CHECK: %[[DESC0:.*]] = llvm.mlir.undef : [[DESC_TY:!llvm.struct<\(ptr, ptr, i64, array<2 x i64>, array<2 x i64>\)>]] +// CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[ALLOC]], %[[DESC0]][0] +// CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[ALIGN]], %[[DESC1]][1] +// CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[OFFSET]], %[[DESC2]][2] +// CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[SIZE0]], %[[DESC3]][3, 0] +// CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[STRIDE0]], %[[DESC4]][4, 0] +// CHECK: %[[DESC6:.*]] = llvm.insertvalue %[[SIZE1]], %[[DESC5]][3, 1] +// CHECK: %[[DESC7:.*]] = llvm.insertvalue %[[STRIDE1]], %[[DESC6]][4, 1] + +// Allocate on stack and store to comply with C calling convention. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[ARG_PTR:.*]] = llvm.alloca %[[C1]] x [[DESC_TY]] +// CHECK: llvm.store %[[DESC7]], %[[ARG_PTR]] + +// Call the interface function. +// CHECK: llvm.call @_mlir_ciface_external_no_result(%[[ARG_PTR]]) +// CHECK: llvm.return + +// Verify that an interface function is emitted. +// CHECK-LABEL: llvm.func @_mlir_ciface_external_no_result +// CHECK-SAME: (!llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>>) + + +func private @external_single_result(%arg0 : memref) -> memref + attributes { llvm.emit_c_interface } + +// CHECK-LABEL: llvm.func @external_single_result +// CHECK-SAME: %[[ALLOC:.*]]: !llvm.ptr, %[[ALIGN:.*]]: !llvm.ptr, %[[OFFSET:.*]]: i64, %[[SIZE0:.*]]: i64, %[[STRIDE0:.*]]: i64 + +// Allocate result on stack. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[RESUT_PTR:.*]] = llvm.alloca %[[C1]] x [[RESULT_DESC_TY:!llvm.struct<\(ptr, ptr, i64, array<1 x i64>, array<1 x i64>\)>]] + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef : [[ARG_DESC_TY:!llvm.struct<\(ptr, ptr, i64, array<1 x i64>, array<1 x i64>\)>]] +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ALLOC]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ALIGN]], %[[ARG_DESC1]][1] +// CHECK: %[[ARG_DESC3:.*]] = llvm.insertvalue %[[OFFSET]], %[[ARG_DESC2]][2] +// CHECK: %[[ARG_DESC4:.*]] = llvm.insertvalue %[[SIZE0]], %[[ARG_DESC3]][3, 0] +// CHECK: %[[ARG_DESC5:.*]] = llvm.insertvalue %[[STRIDE0]], %[[ARG_DESC4]][4, 0] + +// Allocate on stack and store to comply with C calling convention. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[ARG_PTR:.*]] = llvm.alloca %[[C1]] x [[ARG_DESC_TY]] +// CHECK: llvm.store %[[ARG_DESC5]], %[[ARG_PTR]] + +// Call the interface function. +// CHECK: llvm.call @_mlir_ciface_external_single_result(%[[RESUT_PTR]], %[[ARG_PTR]]) + +// Load and return the result. +// CHECK: %[[RESULT:.*]] = llvm.load %[[RESUT_PTR]] +// CHECK: llvm.return %[[RESULT]] + +// Verify that an interface function is emitted. +// CHECK-LABEL: llvm.func @_mlir_ciface_external_single_result +// CHECK: (!llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>>) + + +func private @external_multiple_result(%arg0 : memref) + -> (memref, memref, i64, f32) + attributes { llvm.emit_c_interface } + +// CHECK-LABEL: llvm.func @external_multiple_result +// CHECK-SAME: %[[ALLOC:.*]]: !llvm.ptr, %[[ALIGN:.*]]: !llvm.ptr, %[[OFFSET:.*]]: i64, %[[SIZE0:.*]]: i64, %[[SIZE1:.*]]: i64, %[[STRIDE0:.*]]: i64, %[[STRIDE1:.*]]: i64 + +// Allocate result on stack. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[RESULT_PTR:.*]] = llvm.alloca %[[C1]] x [[RESULT_DESC_TY:!llvm.struct<\(struct<\(ptr, ptr, i64, array<2 x i64>, array<2 x i64>\)>, struct<\(ptr, ptr, i64, array<1 x i64>, array<1 x i64>\)>, i64, f32\)>]] + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef : [[ARG_DESC_TY:!llvm.struct<\(ptr, ptr, i64, array<2 x i64>, array<2 x i64>\)>]] +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ALLOC]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ALIGN]], %[[ARG_DESC1]][1] +// CHECK: %[[ARG_DESC3:.*]] = llvm.insertvalue %[[OFFSET]], %[[ARG_DESC2]][2] +// CHECK: %[[ARG_DESC4:.*]] = llvm.insertvalue %[[SIZE0]], %[[ARG_DESC3]][3, 0] +// CHECK: %[[ARG_DESC5:.*]] = llvm.insertvalue %[[STRIDE0]], %[[ARG_DESC4]][4, 0] +// CHECK: %[[ARG_DESC6:.*]] = llvm.insertvalue %[[SIZE1]], %[[ARG_DESC5]][3, 1] +// CHECK: %[[ARG_DESC7:.*]] = llvm.insertvalue %[[STRIDE1]], %[[ARG_DESC6]][4, 1] + +// Allocate on stack and store to comply with C calling convention. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[ARG_PTR:.*]] = llvm.alloca %[[C1]] x [[ARG_DESC_TY]] +// CHECK: llvm.store %[[ARG_DESC7]], %[[ARG_PTR]] + +// Call the interface function. +// CHECK: llvm.call @_mlir_ciface_external_multiple_result(%[[RESULT_PTR]], %[[ARG_PTR]]) + +// Load and return the result. +// CHECK: %[[RESULT:.*]] = llvm.load %[[RESULT_PTR]] +// CHECK: llvm.return %[[RESULT]] + +// Verify that an interface function is emitted. +// CHECK-LABEL: llvm.func @_mlir_ciface_external_multiple_result +// CHECK-SAME: (!llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>, i64, f32)>>, !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>>) + +func private @external_multiple_args(%arg0 : i64, %arg1 : memref, + %arg2 : memref, %arg3 : f32) attributes { llvm.emit_c_interface } + +// CHECK-LABEL: llvm.func @external_multiple_args +// CHECK-SAME: %[[IARG:arg0]]: i64, +// CHECK-SAME: %[[ALLOC0:arg1]]: !llvm.ptr, %[[ALIGN0:arg2]]: !llvm.ptr, %[[OFFSET0:arg3]]: i64, %[[SIZE00:arg4]]: i64, %[[SIZE01:arg5]]: i64, %[[STRIDE00:arg6]]: i64, %[[STRIDE01:arg7]]: i64, +// CHECK-SAME: %[[ALLOC1:arg8]]: !llvm.ptr, %[[ALIGN1:arg9]]: !llvm.ptr, %[[OFFSET1:arg10]]: i64, %[[SIZE10:arg11]]: i64, %[[STRIDE10:arg12]]: i64, +// CHECK-SAME: %[[FARG:arg13]]: f32 + +// Populate the descriptor for arg0. +// CHECK: %[[ARG0_DESC0:.*]] = llvm.mlir.undef : [[ARG0_DESC_TY:!llvm.struct<\(ptr, ptr, i64, array<2 x i64>, array<2 x i64>\)>]] +// CHECK: %[[ARG0_DESC1:.*]] = llvm.insertvalue %[[ALLOC0]], %[[ARG0_DESC0]][0] +// CHECK: %[[ARG0_DESC2:.*]] = llvm.insertvalue %[[ALIGN0]], %[[ARG0_DESC1]][1] +// CHECK: %[[ARG0_DESC3:.*]] = llvm.insertvalue %[[OFFSET0]], %[[ARG0_DESC2]][2] +// CHECK: %[[ARG0_DESC4:.*]] = llvm.insertvalue %[[SIZE00]], %[[ARG0_DESC3]][3, 0] +// CHECK: %[[ARG0_DESC5:.*]] = llvm.insertvalue %[[STRIDE00]], %[[ARG0_DESC4]][4, 0] +// CHECK: %[[ARG0_DESC6:.*]] = llvm.insertvalue %[[SIZE01]], %[[ARG0_DESC5]][3, 1] +// CHECK: %[[ARG0_DESC7:.*]] = llvm.insertvalue %[[STRIDE01]], %[[ARG0_DESC6]][4, 1] + +// Allocate on stack and store to comply with C calling convention. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[ARG0_PTR:.*]] = llvm.alloca %[[C1]] x [[ARG0_DESC_TY]] +// CHECK: llvm.store %[[ARG0_DESC7]], %[[ARG0_PTR]] + +// Populate the descriptor for arg1. +// CHECK: %[[ARG1_DESC0:.*]] = llvm.mlir.undef : [[ARG1_DESC_TY:!llvm.struct<\(ptr, ptr, i64, array<1 x i64>, array<1 x i64>\)>]] +// CHECK: %[[ARG1_DESC1:.*]] = llvm.insertvalue %[[ALLOC1]], %[[ARG1_DESC0]][0] +// CHECK: %[[ARG1_DESC2:.*]] = llvm.insertvalue %[[ALIGN1]], %[[ARG1_DESC1]][1] +// CHECK: %[[ARG1_DESC3:.*]] = llvm.insertvalue %[[OFFSET1]], %[[ARG1_DESC2]][2] +// CHECK: %[[ARG1_DESC4:.*]] = llvm.insertvalue %[[SIZE10]], %[[ARG1_DESC3]][3, 0] +// CHECK: %[[ARG1_DESC5:.*]] = llvm.insertvalue %[[STRIDE10]], %[[ARG1_DESC4]][4, 0] + +// Allocate on stack and store to comply with C calling convention. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[ARG1_PTR:.*]] = llvm.alloca %[[C1]] x [[ARG1_DESC_TY]] +// CHECK: llvm.store %[[ARG1_DESC5]], %[[ARG1_PTR]] + +// Call the interface function. +// CHECK: llvm.call @_mlir_ciface_external_multiple_args(%[[IARG]], %[[ARG0_PTR]], %[[ARG1_PTR]], %[[FARG]]) +// CHECK: llvm.return + +// Verify that an interface function is emitted. +// CHECK-LABEL: llvm.func @_mlir_ciface_external_multiple_args +// CHECK-SAME: (i64, !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>>, !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>>, f32) + + +func private @external_no_result_unranked(%arg0 : memref<*xf32>) + attributes { llvm.emit_c_interface } + +// CHECK-LABEL: llvm.func @external_no_result_unranked +// CHECK-SAME: %[[RANK:.*]]: i64, %[[INNER_DESC:.*]]: !llvm.ptr + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef : [[ARG_DESC_TY:!llvm.struct<\(i64, ptr\)>]] +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[RANK]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[INNER_DESC]], %[[ARG_DESC1]][1] + +// Allocate on stack and store to comply with C calling convention. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[ARG_PTR:.*]] = llvm.alloca %[[C1]] x [[ARG_DESC_TY]] +// CHECK: llvm.store %[[ARG_DESC2]], %[[ARG_PTR]] + +// Call the interface function. +// CHECK: llvm.call @_mlir_ciface_external_no_result_unranked(%[[ARG_PTR]]) +// CHECK: llvm.return + +// Verify that an interface function is emitted. +// CHECK-LABEL: llvm.func @_mlir_ciface_external_no_result_unranked +// CHECK-SAME: (!llvm.ptr)>>) + + +func private @external_single_result_unranked(%arg0 : memref<*xf32>) + -> memref<*xf32> attributes { llvm.emit_c_interface } + +// CHECK-LABEL: llvm.func @external_single_result_unranked +// CHECK-SAME: %[[RESULT_INNER_DESC_BUFFER:.*]]: !llvm.ptr, %[[ARG_RANK:.*]]: i64, %[[ARG_INNER_DESC:.*]]: !llvm.ptr + +// Allocate result on stack and populate buffer for inner descriptor. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[RESULT_PTR:.*]] = llvm.alloca %[[C1]] x [[RESULT_DESC_TY:!llvm.struct<\(i64, ptr\)>]] +// CHECK: %[[RESULT0:.*]] = llvm.mlir.undef : [[RESULT_DESC_TY]] +// CHECK: %[[RESULT1:.*]] = llvm.insertvalue %[[RESULT_INNER_DESC_BUFFER]], %[[RESULT0]][1] +// CHECK: llvm.store %[[RESULT1]], %[[RESULT_PTR]] + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef : [[ARG_DESC_TY:!llvm.struct<\(i64, ptr\)>]] +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ARG_RANK]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ARG_INNER_DESC]], %[[ARG_DESC1]][1] + +// Allocate on stack and store to comply with C calling convention. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[ARG_PTR:.*]] = llvm.alloca %[[C1]] x [[ARG_DESC_TY]] +// CHECK: llvm.store %[[ARG_DESC2]], %[[ARG_PTR]] + +// Call the interface function. +// CHECK: llvm.call @_mlir_ciface_external_single_result_unranked(%[[RESULT_PTR]], %[[ARG_PTR]]) + +// Load and return the result. +// CHECK: %[[RESULT:.*]] = llvm.load %[[RESULT_PTR]] +// CHECK: llvm.return %[[RESULT]] + +// Verify that an interface function is emitted. +// CHECK-LABEL: llvm.func @_mlir_ciface_external_single_result_unranked +// CHECK-SAME: (!llvm.ptr)>>, !llvm.ptr)>>) + + +func private @external_multiple_result_unranked(%arg0 : memref<*xf32>) + -> (f32, i64, memref<*xf32>, memref<*xf32>) + attributes { llvm.emit_c_interface } + +// CHECK-LABEL: llvm.func @external_multiple_result_unranked +// CHECK-SAME: %[[RESULT_INNER_DESC_BUFFER0:.*]]: !llvm.ptr, %[[RESULT_INNER_DESC_BUFFER1:.*]]: !llvm.ptr, +// CHECK-SAME: %[[ARG_RANK:.*]]: i64, %[[ARG_INNER_DESC:.*]]: !llvm.ptr + +// Allocate result on stack and populate buffers for inner descriptors. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[RESULT_PTR:.*]] = llvm.alloca %[[C1]] x [[RESULT_TY:!llvm.struct<\(f32, i64, struct<\(i64, ptr\)>, struct<\(i64, ptr\)>\)>]] +// CHECK: %[[RESULT0:.*]] = llvm.mlir.undef : [[RESULT_TY]] +// CHECK: %[[RESULT_DESC00:.*]] = llvm.mlir.undef : [[RESULT_DESC0_TY:!llvm.struct<\(i64, ptr\)>]] +// CHECK: %[[RESULT_DESC01:.*]] = llvm.insertvalue %[[RESULT_INNER_DESC_BUFFER0]], %[[RESULT_DESC00]][1] +// CHECK: %[[RESULT1:.*]] = llvm.insertvalue %[[RESULT_DESC01]], %[[RESULT0]][2] +// CHECK: %[[RESULT_DESC10:.*]] = llvm.mlir.undef : [[RESULT_DESC1_TY:!llvm.struct<\(i64, ptr\)>]] +// CHECK: %[[RESULT_DESC11:.*]] = llvm.insertvalue %[[RESULT_INNER_DESC_BUFFER1]], %[[RESULT_DESC10]][1] +// CHECK: %[[RESULT2:.*]] = llvm.insertvalue %[[RESULT_DESC11]], %[[RESULT1]][3] +// CHECK: llvm.store %[[RESULT2]], %[[RESULT_PTR]] + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef : [[ARG_DESC_TY:!llvm.struct<\(i64, ptr\)>]] +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ARG_RANK]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ARG_INNER_DESC]], %[[ARG_DESC1]][1] + +// Allocate on stack and store to comply with C calling convention. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[ARG_PTR:.*]] = llvm.alloca %[[C1]] x [[ARG_DESC_TY]] +// CHECK: llvm.store %[[ARG_DESC2]], %[[ARG_PTR]] + +// Call the interface function. +// CHECK: llvm.call @_mlir_ciface_external_multiple_result_unranked(%[[RESULT_PTR]], %[[ARG_PTR]]) + +// Load and return the result. +// CHECK: %[[RESULT:.*]] = llvm.load %[[RESULT_PTR]] +// CHECK: llvm.return %[[RESULT]] + +// Verify that an interface function is emitted. +// CHECK-LABEL: llvm.func @_mlir_ciface_external_multiple_result_unranked +// CHECK-SAME: (!llvm.ptr)>, struct<(i64, ptr)>)>>, !llvm.ptr)>>) + + +func private @external_multiple_args_unranked(%arg0 : memref<*xf32>, + %arg1 : f32, %arg2 : memref<*xf32>, %arg3 : i64) + attributes { llvm.emit_c_interface } + +// CHECK-LABEL: llvm.func @external_multiple_args_unranked +// CHECK-SAME: %[[ARG0_RANK:.*]]: i64, %[[ARG0_INNER_DESC:arg1]]: !llvm.ptr, +// CHECK-SAME: %[[FARG:arg2]]: f32, +// CHECK-SAME: %[[ARG2_RANK:.*]]: i64, %[[ARG2_INNER_DESC:arg4]]: !llvm.ptr, +// CHECK-SAME: %[[IARG:.*]]: i64 + +// Populate the descriptor for arg0. +// CHECK: %[[ARG0_DESC0:.*]] = llvm.mlir.undef : [[ARG0_DESC_TY:!llvm.struct<\(i64, ptr\)>]] +// CHECK: %[[ARG0_DESC1:.*]] = llvm.insertvalue %[[ARG0_RANK]], %[[ARG0_DESC0]][0] +// CHECK: %[[ARG0_DESC2:.*]] = llvm.insertvalue %[[ARG0_INNER_DESC]], %[[ARG0_DESC1]][1] + +// Allocate on stack and store to comply with C calling convention. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[ARG0_PTR:.*]] = llvm.alloca %[[C1]] x [[ARG0_DESC_TY]] +// CHECK: llvm.store %[[ARG0_DESC2]], %[[ARG0_PTR]] + +// Populate the descriptor for arg2. +// CHECK: %[[ARG2_DESC0:.*]] = llvm.mlir.undef : [[ARG2_DESC_TY:!llvm.struct<\(i64, ptr\)>]] +// CHECK: %[[ARG2_DESC1:.*]] = llvm.insertvalue %[[ARG2_RANK]], %[[ARG2_DESC0]][0] +// CHECK: %[[ARG2_DESC2:.*]] = llvm.insertvalue %[[ARG2_INNER_DESC]], %[[ARG2_DESC1]][1] + +// Allocate on stack and store to comply with C calling convention. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[ARG2_PTR:.*]] = llvm.alloca %[[C1]] x [[ARG2_DESC_TY]] +// CHECK: llvm.store %[[ARG2_DESC2]], %[[ARG2_PTR]] + +// Call the interface function. +// CHECK: llvm.call @_mlir_ciface_external_multiple_args_unranked(%[[ARG0_PTR]], %[[FARG]], %[[ARG2_PTR]], %[[IARG]]) +// CHECK: llvm.return + +// Verify that an interface function is emitted. +// CHECK-LABEL: llvm.func @_mlir_ciface_external_multiple_args_unranked +// CHECK-SAME: (!llvm.ptr)>>, f32, !llvm.ptr)>>, i64) diff --git a/mlir/test/Conversion/StandardToLLVM/calling-convention-external-c-function-caller.mlir b/mlir/test/Conversion/StandardToLLVM/calling-convention-external-c-function-caller.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Conversion/StandardToLLVM/calling-convention-external-c-function-caller.mlir @@ -0,0 +1,496 @@ +// RUN: mlir-opt %s \ +// RUN: --convert-memref-to-llvm \ +// RUN: --convert-std-to-llvm='max-unranked-desc-buffer-rank=5' | FileCheck %s + +func @callee_no_result(%arg0 : memref) + attributes { llvm.emit_c_interface } { + %c0 = constant 0 : index + %c1 = constant 1 : index + %0 = memref.load %arg0[%c0, %c1] : memref + return +} + +// CHECK-LABEL: llvm.func @callee_no_result +// CHECK-SAME: %[[ALLOC:.*]]: !llvm.ptr, %[[ALIGN:.*]]: !llvm.ptr, %[[OFFSET:.*]]: i64, %[[SIZE0:.*]]: i64, %[[SIZE1:.*]]: i64, %[[STRIDE0:.*]]: i64, %[[STRIDE1:.*]]: i64 + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ALLOC]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ALIGN]], %[[ARG_DESC1]][1] +// CHECK: %[[ARG_DESC3:.*]] = llvm.insertvalue %[[OFFSET]], %[[ARG_DESC2]][2] +// CHECK: %[[ARG_DESC4:.*]] = llvm.insertvalue %[[SIZE0]], %[[ARG_DESC3]][3, 0] +// CHECK: %[[ARG_DESC5:.*]] = llvm.insertvalue %[[STRIDE0]], %[[ARG_DESC4]][4, 0] +// CHECK: %[[ARG_DESC6:.*]] = llvm.insertvalue %[[SIZE1]], %[[ARG_DESC5]][3, 1] +// CHECK: %[[ARG_DESC7:.*]] = llvm.insertvalue %[[STRIDE1]], %[[ARG_DESC6]][4, 1] + +// CHECK: %{{.*}} = llvm.load %{{.*}} +// CHECK: llvm.return + +// CHECK-LABEL: llvm.func @_mlir_ciface_callee_no_result +// CHECK-SAME: %[[ARG_PTR:.*]]: !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>> + +// Unpack descriptor for arg0. +// CHECK: %[[ARG_DESC:.*]] = llvm.load %[[ARG_PTR]] +// CHECK: %[[ALLOC:.*]] = llvm.extractvalue %[[ARG_DESC]][0] +// CHECK: %[[ALIGN:.*]] = llvm.extractvalue %[[ARG_DESC]][1] +// CHECK: %[[OFFSET:.*]] = llvm.extractvalue %[[ARG_DESC]][2] +// CHECK: %[[SIZE0:.*]] = llvm.extractvalue %[[ARG_DESC]][3, 0] +// CHECK: %[[SIZE1:.*]] = llvm.extractvalue %[[ARG_DESC]][3, 1] +// CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[ARG_DESC]][4, 0] +// CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[ARG_DESC]][4, 1] + +// Call the function. +// CHECK: llvm.call @callee_no_result(%[[ALLOC]], %[[ALIGN]], %[[OFFSET]], %[[SIZE0]], %[[SIZE1]], %[[STRIDE0]], %[[STRIDE1]]) +// CHECK: llvm.return + + +func @callee_single_result(%arg0 : memref) -> memref + attributes { llvm.emit_c_interface } { + return %arg0 : memref +} + +// CHECK-LABEL: llvm.func @callee_single_result +// CHECK-SAME: %[[ALLOC:.*]]: !llvm.ptr, %[[ALIGN:.*]]: !llvm.ptr, %[[OFFSET:.*]]: i64, %[[SIZE0:.*]]: i64, %[[STRIDE0:.*]]: i64 + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ALLOC]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ALIGN]], %[[ARG_DESC1]][1] +// CHECK: %[[ARG_DESC3:.*]] = llvm.insertvalue %[[OFFSET]], %[[ARG_DESC2]][2] +// CHECK: %[[ARG_DESC4:.*]] = llvm.insertvalue %[[SIZE0]], %[[ARG_DESC3]][3, 0] +// CHECK: %[[ARG_DESC5:.*]] = llvm.insertvalue %[[STRIDE0]], %[[ARG_DESC4]][4, 0] + +// CHECK: llvm.return %[[ARG_DESC5]] + +// CHECK-LABEL: llvm.func @_mlir_ciface_callee_single_result +// CHECK-SAME: %[[RESULT_PTR:.*]]: !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>>, +// CHECK-SAME: %[[ARG_PTR:.*]]: !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + +// Unpack descriptor for arg0. +// CHECK: %[[ARG_DESC:.*]] = llvm.load %[[ARG_PTR]] +// CHECK: %[[ALLOC:.*]] = llvm.extractvalue %[[ARG_DESC]][0] +// CHECK: %[[ALIGN:.*]] = llvm.extractvalue %[[ARG_DESC]][1] +// CHECK: %[[OFFSET:.*]] = llvm.extractvalue %[[ARG_DESC]][2] +// CHECK: %[[SIZE0:.*]] = llvm.extractvalue %[[ARG_DESC]][3, 0] +// CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[ARG_DESC]][4, 0] + +// Call the function. +// CHECK: %[[RESULT:.*]] = llvm.call @callee_single_result(%[[ALLOC]], %[[ALIGN]], %[[OFFSET]], %[[SIZE0]], %[[STRIDE0]]) + +// Store the result and return. +// CHECK: llvm.store %[[RESULT]], %[[RESULT_PTR]] +// CHECK: llvm.return + + +func @callee_multiple_result(%arg0 : memref, + %arg1 : memref) -> (memref, memref, i64, f32) + attributes { llvm.emit_c_interface } { + %c3 = constant 3 : i64 + %pi = constant 3.141 : f32 + return %arg0, %arg1, %c3, %pi : memref, memref, i64, f32 +} + +// CHECK-LABEL: llvm.func @callee_multiple_result +// CHECK-SAME: %[[ALLOC0:.*]]: !llvm.ptr, %[[ALIGN0:.*]]: !llvm.ptr, %[[OFFSET0:.*]]: i64, %[[SIZE00:.*]]: i64, %[[SIZE01:.*]]: i64, %[[STRIDE00:.*]]: i64, %[[STRIDE01:arg6]]: i64, +// CHECK-SAME: %[[ALLOC1:.*]]: !llvm.ptr, %[[ALIGN1:.*]]: !llvm.ptr, %[[OFFSET1:.*]]: i64, %[[SIZE10:.*]]: i64, %[[STRIDE10:arg11]]: i64 + +// Populate the descriptor for arg0. +// CHECK: %[[ARG0_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG0_DESC1:.*]] = llvm.insertvalue %[[ALLOC0]], %[[ARG0_DESC0]][0] +// CHECK: %[[ARG0_DESC2:.*]] = llvm.insertvalue %[[ALIGN0]], %[[ARG0_DESC1]][1] +// CHECK: %[[ARG0_DESC3:.*]] = llvm.insertvalue %[[OFFSET0]], %[[ARG0_DESC2]][2] +// CHECK: %[[ARG0_DESC4:.*]] = llvm.insertvalue %[[SIZE00]], %[[ARG0_DESC3]][3, 0] +// CHECK: %[[ARG0_DESC5:.*]] = llvm.insertvalue %[[STRIDE00]], %[[ARG0_DESC4]][4, 0] +// CHECK: %[[ARG0_DESC6:.*]] = llvm.insertvalue %[[SIZE01]], %[[ARG0_DESC5]][3, 1] +// CHECK: %[[ARG0_DESC7:.*]] = llvm.insertvalue %[[STRIDE01]], %[[ARG0_DESC6]][4, 1] + +// Populate the descriptor for arg1. +// CHECK: %[[ARG1_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG1_DESC1:.*]] = llvm.insertvalue %[[ALLOC1]], %[[ARG1_DESC0]][0] +// CHECK: %[[ARG1_DESC2:.*]] = llvm.insertvalue %[[ALIGN1]], %[[ARG1_DESC1]][1] +// CHECK: %[[ARG1_DESC3:.*]] = llvm.insertvalue %[[OFFSET1]], %[[ARG1_DESC2]][2] +// CHECK: %[[ARG1_DESC4:.*]] = llvm.insertvalue %[[SIZE10]], %[[ARG1_DESC3]][3, 0] +// CHECK: %[[ARG1_DESC5:.*]] = llvm.insertvalue %[[STRIDE10]], %[[ARG1_DESC4]][4, 0] + +// Populate and return result. +// CHECK: %[[RESULT0:.*]] = llvm.mlir.undef +// CHECK: %[[RESULT1:.*]] = llvm.insertvalue %[[ARG0_DESC7]], %[[RESULT0]][0] +// CHECK: %[[RESULT2:.*]] = llvm.insertvalue %[[ARG1_DESC5]], %[[RESULT1]][1] +// CHECK: %[[RESULT3:.*]] = llvm.insertvalue %{{.*}}, %[[RESULT2]][2] +// CHECK: %[[RESULT4:.*]] = llvm.insertvalue %{{.*}}, %[[RESULT3]][3] +// CHECK: llvm.return %[[RESULT4]] + +// CHECK-LABEL: llvm.func @_mlir_ciface_callee_multiple_result +// CHECK-SAME: %[[RESULT_PTR:.*]]: !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>, i64, f32)>>, +// CHECK-SAME: %[[ARG0_PTR:.*]]: !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>>, +// CHECK-SAME: %[[ARG1_PTR:.*]]: !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + +// Unpack descriptor for arg0. +// CHECK: %[[ARG0_DESC:.*]] = llvm.load %[[ARG0_PTR]] +// CHECK: %[[ALLOC0:.*]] = llvm.extractvalue %[[ARG0_DESC]][0] +// CHECK: %[[ALIGN0:.*]] = llvm.extractvalue %[[ARG0_DESC]][1] +// CHECK: %[[OFFSET0:.*]] = llvm.extractvalue %[[ARG0_DESC]][2] +// CHECK: %[[SIZE00:.*]] = llvm.extractvalue %[[ARG0_DESC]][3, 0] +// CHECK: %[[SIZE01:.*]] = llvm.extractvalue %[[ARG0_DESC]][3, 1] +// CHECK: %[[STRIDE00:.*]] = llvm.extractvalue %[[ARG0_DESC]][4, 0] +// CHECK: %[[STRIDE01:.*]] = llvm.extractvalue %[[ARG0_DESC]][4, 1] + +// Unpack descriptor for arg1. +// CHECK: %[[ARG1_DESC:.*]] = llvm.load %[[ARG1_PTR]] +// CHECK: %[[ALLOC1:.*]] = llvm.extractvalue %[[ARG1_DESC]][0] +// CHECK: %[[ALIGN1:.*]] = llvm.extractvalue %[[ARG1_DESC]][1] +// CHECK: %[[OFFSET1:.*]] = llvm.extractvalue %[[ARG1_DESC]][2] +// CHECK: %[[SIZE10:.*]] = llvm.extractvalue %[[ARG1_DESC]][3, 0] +// CHECK: %[[STRIDE10:.*]] = llvm.extractvalue %[[ARG1_DESC]][4, 0] + +// Call the function. +// CHECK: %[[RESULT:.*]] = llvm.call @callee_multiple_result(%[[ALLOC0]], %[[ALIGN0]], %[[OFFSET0]], %[[SIZE00]], %[[SIZE01]], %[[STRIDE00]], %[[STRIDE01]], %[[ALLOC1]], %[[ALIGN1]], %[[OFFSET1]], %[[SIZE10]], %[[STRIDE10]]) + +// Store the result and return. +// CHECK: llvm.store %[[RESULT]], %[[RESULT_PTR]] +// CHECK: llvm.return + + +func @callee_multiple_args(%arg0 : index, %arg1 : memref, + %arg2 : memref, %arg3 : f32) attributes { llvm.emit_c_interface } { + %c0 = constant 0 : index + %0 = memref.load %arg1[%c0, %arg0] : memref + %1 = memref.load %arg2[%arg0] : memref + return +} + +// CHECK-LABEL: llvm.func @callee_multiple_args +// CHECK-SAME: %[[IARG:arg0]]: i64, +// CHECK-SAME: %[[ALLOC0:.*]]: !llvm.ptr, %[[ALIGN0:.*]]: !llvm.ptr, %[[OFFSET0:.*]]: i64, %[[SIZE00:.*]]: i64, %[[SIZE01:.*]]: i64, %[[STRIDE00:.*]]: i64, %[[STRIDE01:arg7]]: i64, +// CHECK-SAME: %[[ALLOC1:.*]]: !llvm.ptr, %[[ALIGN1:.*]]: !llvm.ptr, %[[OFFSET1:.*]]: i64, %[[SIZE10:.*]]: i64, %[[STRIDE10:arg12]]: i64, +// CHECK-SAME: %[[FARG:.*]]: f32 + +// Populate the descriptor for arg1. +// CHECK: %[[ARG0_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG0_DESC1:.*]] = llvm.insertvalue %[[ALLOC0]], %[[ARG0_DESC0]][0] +// CHECK: %[[ARG0_DESC2:.*]] = llvm.insertvalue %[[ALIGN0]], %[[ARG0_DESC1]][1] +// CHECK: %[[ARG0_DESC3:.*]] = llvm.insertvalue %[[OFFSET0]], %[[ARG0_DESC2]][2] +// CHECK: %[[ARG0_DESC4:.*]] = llvm.insertvalue %[[SIZE00]], %[[ARG0_DESC3]][3, 0] +// CHECK: %[[ARG0_DESC5:.*]] = llvm.insertvalue %[[STRIDE00]], %[[ARG0_DESC4]][4, 0] +// CHECK: %[[ARG0_DESC6:.*]] = llvm.insertvalue %[[SIZE01]], %[[ARG0_DESC5]][3, 1] +// CHECK: %[[ARG0_DESC7:.*]] = llvm.insertvalue %[[STRIDE01]], %[[ARG0_DESC6]][4, 1] + +// Populate the descriptor for arg2. +// CHECK: %[[ARG1_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG1_DESC1:.*]] = llvm.insertvalue %[[ALLOC1]], %[[ARG1_DESC0]][0] +// CHECK: %[[ARG1_DESC2:.*]] = llvm.insertvalue %[[ALIGN1]], %[[ARG1_DESC1]][1] +// CHECK: %[[ARG1_DESC3:.*]] = llvm.insertvalue %[[OFFSET1]], %[[ARG1_DESC2]][2] +// CHECK: %[[ARG1_DESC4:.*]] = llvm.insertvalue %[[SIZE10]], %[[ARG1_DESC3]][3, 0] +// CHECK: %[[ARG1_DESC5:.*]] = llvm.insertvalue %[[STRIDE10]], %[[ARG1_DESC4]][4, 0] + +// CHECK: %{{.*}} = llvm.load %{{.*}} +// CHECK: %{{.*}} = llvm.load %{{.*}} +// CHECK: llvm.return + +// CHECK-LABEL: llvm.func @_mlir_ciface_callee_multiple_args +// CHECK-SAME: %[[IARG:arg0]]: i64, +// CHECK-SAME: %[[ARG1_PTR:.*]]: !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>>, +// CHECK-SAME: %[[ARG2_PTR:.*]]: !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>>, +// CHECK-SAME: %[[FARG:.*]]: f32 + +// Unpack descriptor for arg1. +// CHECK: %[[ARG1_DESC:.*]] = llvm.load %[[ARG1_PTR]] +// CHECK: %[[ALLOC0:.*]] = llvm.extractvalue %[[ARG1_DESC]][0] +// CHECK: %[[ALIGN0:.*]] = llvm.extractvalue %[[ARG1_DESC]][1] +// CHECK: %[[OFFSET0:.*]] = llvm.extractvalue %[[ARG1_DESC]][2] +// CHECK: %[[SIZE00:.*]] = llvm.extractvalue %[[ARG1_DESC]][3, 0] +// CHECK: %[[SIZE01:.*]] = llvm.extractvalue %[[ARG1_DESC]][3, 1] +// CHECK: %[[STRIDE00:.*]] = llvm.extractvalue %[[ARG1_DESC]][4, 0] +// CHECK: %[[STRIDE01:.*]] = llvm.extractvalue %[[ARG1_DESC]][4, 1] + +// Unpack descriptor for arg2. +// CHECK: %[[ARG2_DESC:.*]] = llvm.load %[[ARG2_PTR]] +// CHECK: %[[ALLOC1:.*]] = llvm.extractvalue %[[ARG2_DESC]][0] +// CHECK: %[[ALIGN1:.*]] = llvm.extractvalue %[[ARG2_DESC]][1] +// CHECK: %[[OFFSET1:.*]] = llvm.extractvalue %[[ARG2_DESC]][2] +// CHECK: %[[SIZE10:.*]] = llvm.extractvalue %[[ARG2_DESC]][3, 0] +// CHECK: %[[STRIDE10:.*]] = llvm.extractvalue %[[ARG2_DESC]][4, 0] + +// Call the function. +// CHECK: llvm.call @callee_multiple_args(%[[IARG]], %[[ALLOC0]], %[[ALIGN0]], %[[OFFSET0]], %[[SIZE00]], %[[SIZE01]], %[[STRIDE00]], %[[STRIDE01]], %[[ALLOC1]], %[[ALIGN1]], %[[OFFSET1]], %[[SIZE10]], %[[STRIDE10]], %[[FARG]]) +// CHECK: llvm.return + + +func @callee_no_result_unranked(%arg0 : memref<*xf32>) + attributes { llvm.emit_c_interface } { + %c0 = constant 0 : index + %c1 = constant 1 : index + %0 = memref.cast %arg0 : memref<*xf32> to memref + %1 = memref.load %0[%c0, %c1] : memref + return +} + +// CHECK-LABEL: llvm.func @callee_no_result_unranked +// CHECK-SAME: %[[ARG_RANK:.*]]: i64, %[[ARG_INNER_DESC:.*]]: !llvm.ptr + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ARG_RANK]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ARG_INNER_DESC]], %[[ARG_DESC1]][1] + +// CHECK: %{{.*}} = llvm.load %{{.*}} +// CHECK: llvm.return + +// CHECK-LABEL: llvm.func @_mlir_ciface_callee_no_result_unranked +// CHECK-SAME: %[[ARG_PTR:.*]]: !llvm.ptr)>> + +// Unpack descriptor for arg0. +// CHECK: %[[ARG_DESC:.*]] = llvm.load %[[ARG_PTR]] +// CHECK: %[[ARG_RANK:.*]] = llvm.extractvalue %[[ARG_DESC]][0] +// CHECK: %[[ARG_INNER_DESC:.*]] = llvm.extractvalue %[[ARG_DESC]][1] + +// Call the function. +// CHECK: llvm.call @callee_no_result_unranked(%[[ARG_RANK]], %[[ARG_INNER_DESC]]) +// CHECK: llvm.return + + +func @callee_single_result_unranked(%arg0 : memref<*xf32>) -> memref<*xf32> + attributes { llvm.emit_c_interface } { + return %arg0 : memref<*xf32> +} + +// CHECK-LABEL: llvm.func @callee_single_result_unranked +// CHECK-SAME: %[[RESULT_INNER_DESC_BUFFER:.*]]: !llvm.ptr, +// CHECK-SAME: %[[ARG_RANK:.*]]: i64, %[[ARG_INNER_DESC:.*]]: !llvm.ptr + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ARG_RANK]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ARG_INNER_DESC]], %[[ARG_DESC1]][1] + +// Common constant. +// CHECK: %[[MAX_SUPPORTED_RANK:.*]] = llvm.mlir.constant(5 : i64) + +// Compute the result's inner descriptor size. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) +// CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[C8_:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[SIZE_PTRS:.*]] = llvm.mul %[[C2]], %[[C8]] +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[ARG_DESC2]][0] +// CHECK: %[[RANK_TWICE:.*]] = llvm.mul %[[C2]], %[[RANK]] +// CHECK: %[[NUM_I64_FIELDS:.*]] = llvm.add %[[RANK_TWICE]], %[[C1]] +// CHECK: %[[SIZE_I64_FIELDS:.*]] = llvm.mul %[[NUM_I64_FIELDS]], %[[C8_]] +// CHECK: %[[SIZE:.*]] = llvm.add %[[SIZE_PTRS]], %[[SIZE_I64_FIELDS]] + +// Check if the inner descriptor fits into the buffer argument. +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[ARG_DESC2]][0] +// CHECK: %[[PRED:.*]] = llvm.icmp "ule" %[[RANK]], %[[MAX_SUPPORTED_RANK]] +// CHECK: llvm.cond_br %[[PRED]], ^bb2, ^bb3 + +// Copy the inner descriptor to the selected buffer and return a copy of the +// unranked outer descriptor. +// CHECK: ^bb1(%[[SELECTED_BUFFER:.*]]: !llvm.ptr): +// CHECK: %[[ARG_INNER_DESC:.*]] = llvm.extractvalue %[[ARG_DESC2]][1] +// CHECK: %[[C0:.*]] = llvm.mlir.constant(false) +// CHECK: "llvm.intr.memcpy"(%[[SELECTED_BUFFER]], %[[ARG_INNER_DESC]], %[[SIZE]], %[[C0]]) +// CHECK: %[[RESULT_DESC0:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> +// CHECK: %[[RESULT_DESC1:.*]] = llvm.insertvalue %[[RANK]], %[[RESULT_DESC0]][0] +// CHECK: %[[RESULT_DESC2:.*]] = llvm.insertvalue %[[SELECTED_BUFFER]], %[[RESULT_DESC1]][1] +// CHECK: llvm.return %[[RESULT_DESC2]] + +// Select the buffer argument to copy the inner descriptor to. +// CHECK: ^bb2: +// CHECK: llvm.br ^bb1(%[[RESULT_INNER_DESC_BUFFER]] : !llvm.ptr) + +// Allocate a new buffer to copy the inner descriptor to. +// CHECK: ^bb3: +// CHECK: %[[NEW_BUFFER:.*]] = llvm.call @malloc(%[[SIZE]]) +// CHECK: llvm.br ^bb1(%[[NEW_BUFFER]] : !llvm.ptr) + +// CHECK-LABEL: llvm.func @_mlir_ciface_callee_single_result_unranked +// CHECK-SAME: %[[RESULT_PTR:.*]]: !llvm.ptr)>>, +// CHECK-SAME: %[[ARG_PTR:.*]]: !llvm.ptr)>> + +// Extract inner descriptor buffer from pre-allocated result. +// CHECK: %[[RESULT:.*]] = llvm.load %[[RESULT_PTR]] +// CHECK: %[[RESULT_INNER_DESC_BUFFER:.*]] = llvm.extractvalue %[[RESULT]][1] + +// Unpack descriptor for arg0. +// CHECK: %[[ARG_DESC:.*]] = llvm.load %[[ARG_PTR]] +// CHECK: %[[ARG_RANK:.*]] = llvm.extractvalue %[[ARG_DESC]][0] +// CHECK: %[[ARG_INNER_DESC:.*]] = llvm.extractvalue %[[ARG_DESC]][1] + +// Call the function. +// CHECK: %[[RESULT:.*]] = llvm.call @callee_single_result_unranked(%[[RESULT_INNER_DESC_BUFFER]], %[[ARG_RANK]], %[[ARG_INNER_DESC]]) + +// Store the result and return. +// CHECK: llvm.store %[[RESULT]], %[[RESULT_PTR]] +// CHECK: llvm.return + + +func @callee_multiple_result_unranked(%arg0 : memref<*xf32>) -> (f32, i64, + memref<*xf32>, memref<*xf32>) attributes { llvm.emit_c_interface } { + %pi = constant 3.141 : f32 + %c3 = constant 3 : i64 + return %pi, %c3, %arg0, %arg0 : f32, i64, memref<*xf32>, memref<*xf32> +} + +// CHECK-LABEL: llvm.func @callee_multiple_result_unranked +// CHECK-SAME: %[[RESULT_INNER_DESC_BUFFER0:.*]]: !llvm.ptr, %[[RESULT_INNER_DESC_BUFFER1:.*]]: !llvm.ptr, +// CHECK-SAME: %[[ARG_RANK:.*]]: i64, %[[ARG_INNER_DESC:.*]]: !llvm.ptr + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ARG_RANK]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ARG_INNER_DESC]], %[[ARG_DESC1]][1] + +// Common constant. +// CHECK: %[[MAX_SUPPORTED_RANK:.*]] = llvm.mlir.constant(5 : i64) + +// Compute first result's inner descriptor size. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) +// CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[C8_:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[SIZE_PTRS:.*]] = llvm.mul %[[C2]], %[[C8]] +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[ARG_DESC2]][0] +// CHECK: %[[RANK_TWICE:.*]] = llvm.mul %[[C2]], %[[RANK]] +// CHECK: %[[NUM_I64_FIELDS:.*]] = llvm.add %[[RANK_TWICE]], %[[C1]] +// CHECK: %[[SIZE_I64_FIELDS:.*]] = llvm.mul %[[NUM_I64_FIELDS]], %[[C8_]] +// CHECK: %[[SIZE0:.*]] = llvm.add %[[SIZE_PTRS]], %[[SIZE_I64_FIELDS]] + +// Check if the inner descriptor fits into the buffer argument. +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[ARG_DESC2]][0] +// CHECK: %[[PRED:.*]] = llvm.icmp "ule" %[[RANK]], %[[MAX_SUPPORTED_RANK]] +// CHECK: llvm.cond_br %[[PRED]], ^bb3, ^bb4 + +// Copy the inner descriptor to the selected buffer and create a copy of the +// unranked outer descriptor. +// CHECK: ^bb1(%[[SELECTED_BUFFER:.*]]: !llvm.ptr): +// CHECK: %[[ARG_INNER_DESC:.*]] = llvm.extractvalue %[[ARG_DESC2]][1] +// CHECK: %[[C0:.*]] = llvm.mlir.constant(false) +// CHECK: "llvm.intr.memcpy"(%[[SELECTED_BUFFER]], %[[ARG_INNER_DESC]], %[[SIZE0]], %[[C0]]) +// CHECK: %[[RESULT0_DESC0:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> +// CHECK: %[[RESULT0_DESC1:.*]] = llvm.insertvalue %[[RANK]], %[[RESULT0_DESC0]][0] +// CHECK: %[[RESULT0_DESC2:.*]] = llvm.insertvalue %[[SELECTED_BUFFER]], %[[RESULT0_DESC1]][1] + +// Compute second result's inner descriptor size. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) +// CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[C8_:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[SIZE_PTRS:.*]] = llvm.mul %[[C2]], %[[C8]] +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[ARG_DESC2]][0] +// CHECK: %[[RANK_TWICE:.*]] = llvm.mul %[[C2]], %[[RANK]] +// CHECK: %[[NUM_I64_FIELDS:.*]] = llvm.add %[[RANK_TWICE]], %[[C1]] +// CHECK: %[[SIZE_I64_FIELDS:.*]] = llvm.mul %[[NUM_I64_FIELDS]], %[[C8_]] +// CHECK: %[[SIZE1:.*]] = llvm.add %[[SIZE_PTRS]], %[[SIZE_I64_FIELDS]] + +// Check if the inner descriptor fits into the buffer argument. +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[ARG_DESC2]][0] +// CHECK: %[[PRED:.*]] = llvm.icmp "ule" %[[RANK]], %[[MAX_SUPPORTED_RANK]] +// CHECK: llvm.cond_br %[[PRED]], ^bb5, ^bb6 + +// Copy the inner descriptor to the selected buffer and create a copy of the +// unranked outer descriptor. +// CHECK: ^bb2(%[[SELECTED_BUFFER:.*]]: !llvm.ptr): +// CHECK: %[[ARG_INNER_DESC:.*]] = llvm.extractvalue %[[ARG_DESC2]][1] +// CHECK: %[[C0:.*]] = llvm.mlir.constant(false) +// CHECK: "llvm.intr.memcpy"(%[[SELECTED_BUFFER]], %[[ARG_INNER_DESC]], %[[SIZE1]], %[[C0]]) +// CHECK: %[[RESULT1_DESC0:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> +// CHECK: %[[RESULT1_DESC1:.*]] = llvm.insertvalue %[[RANK]], %[[RESULT1_DESC0]][0] +// CHECK: %[[RESULT1_DESC2:.*]] = llvm.insertvalue %[[SELECTED_BUFFER]], %[[RESULT1_DESC1]][1] + +// Populate and return result. +// CHECK: %[[RESULT0:.*]] = llvm.mlir.undef +// CHECK: %[[RESULT1:.*]] = llvm.insertvalue %{{.*}}, %[[RESULT0]][0] +// CHECK: %[[RESULT2:.*]] = llvm.insertvalue %{{.*}}, %[[RESULT1]][1] +// CHECK: %[[RESULT3:.*]] = llvm.insertvalue %[[RESULT0_DESC2]], %[[RESULT2]][2] +// CHECK: %[[RESULT4:.*]] = llvm.insertvalue %[[RESULT1_DESC2]], %[[RESULT3]][3] +// CHECK: llvm.return %[[RESULT4]] + +// Select the buffer argument to copy the inner descriptor to (first result). +// CHECK: ^bb3: +// CHECK: llvm.br ^bb1(%[[RESULT_INNER_DESC_BUFFER0]] : !llvm.ptr) + +// Allocate a new buffer to copy the inner descriptor to (first result). +// CHECK: ^bb4: +// CHECK: %[[NEW_BUFFER:.*]] = llvm.call @malloc(%[[SIZE0]]) +// CHECK: llvm.br ^bb1(%[[NEW_BUFFER]] : !llvm.ptr) + +// Select the buffer argument to copy the inner descriptor to (second result). +// CHECK: ^bb5: +// CHECK: llvm.br ^bb2(%[[RESULT_INNER_DESC_BUFFER1]] : !llvm.ptr) + +// Allocate a new buffer to copy the inner descriptor to (second result). +// CHECK: ^bb6: +// CHECK: %[[NEW_BUFFER:.*]] = llvm.call @malloc(%[[SIZE1]]) +// CHECK: llvm.br ^bb2(%[[NEW_BUFFER]] : !llvm.ptr) + +// CHECK-LABEL: llvm.func @_mlir_ciface_callee_multiple_result_unranked +// CHECK-SAME: %[[RESULT_PTR:.*]]: !llvm.ptr)>, struct<(i64, ptr)>)>>, +// CHECK-SAME: %[[ARG_PTR:.*]]: !llvm.ptr)>> + +// Extract inner descriptor buffers from the pre-allocated result. +// CHECK: %[[RESULT:.*]] = llvm.load %[[RESULT_PTR]] +// CHECK: %[[RESULT_DESC0:.*]] = llvm.extractvalue %[[RESULT]][2] +// CHECK: %[[RESULT_INNER_DESC_BUFFER0:.*]] = llvm.extractvalue %[[RESULT_DESC0]][1] +// CHECK: %[[RESULT_DESC1:.*]] = llvm.extractvalue %[[RESULT]][3] +// CHECK: %[[RESULT_INNER_DESC_BUFFER1:.*]] = llvm.extractvalue %[[RESULT_DESC1]][1] + +// Unpack descriptor for arg0. +// CHECK: %[[ARG_DESC:.*]] = llvm.load %[[ARG_PTR]] +// CHECK: %[[ARG_RANK:.*]] = llvm.extractvalue %[[ARG_DESC]][0] +// CHECK: %[[ARG_INNER_DESC:.*]] = llvm.extractvalue %[[ARG_DESC]][1] + +// Call the function. +// CHECK: %[[RESULT:.*]] = llvm.call @callee_multiple_result_unranked(%[[RESULT_INNER_DESC_BUFFER0]], %[[RESULT_INNER_DESC_BUFFER1]], %[[ARG_RANK]], %[[ARG_INNER_DESC]]) + +// Store the result and return. +// CHECK: llvm.store %[[RESULT]], %[[RESULT_PTR]] +// CHECK: llvm.return + + +func @callee_multiple_args_unranked(%arg0 : memref<*xf32>, %arg1 : f32, + %arg2 : memref<*xf32>, %arg3 : index) attributes { llvm.emit_c_interface } { + %c0 = constant 0 : index + %0 = memref.cast %arg0 : memref<*xf32> to memref + %1 = memref.load %0[%c0, %arg3] : memref + %2 = memref.cast %arg2 : memref<*xf32> to memref + %3 = memref.load %2[%arg3] : memref + return +} + +// CHECK-LABEL: llvm.func @callee_multiple_args_unranked +// CHECK-SAME: %[[ARG0_RANK:.*]]: i64, %[[ARG0_INNER_DESC:arg1]]: !llvm.ptr, +// CHECK-SAME: %[[FARG:arg2]]: f32, +// CHECK-SAME: %[[ARG1_RANK:.*]]: i64, %[[ARG1_INNER_DESC:arg4]]: !llvm.ptr, +// CHECK-SAME: %[[IARG:.*]]: i64 + +// Populate the descriptor for arg0. +// CHECK: %[[ARG0_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG0_DESC1:.*]] = llvm.insertvalue %[[ARG0_RANK]], %[[ARG0_DESC0]][0] +// CHECK: %[[ARG0_DESC2:.*]] = llvm.insertvalue %[[ARG0_INNER_DESC]], %[[ARG0_DESC1]][1] + +// Populate the descriptor for arg1. +// CHECK: %[[ARG1_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG1_DESC1:.*]] = llvm.insertvalue %[[ARG1_RANK]], %[[ARG1_DESC0]][0] +// CHECK: %[[ARG1_DESC2:.*]] = llvm.insertvalue %[[ARG1_INNER_DESC]], %[[ARG1_DESC1]][1] + +// CHECK: llvm.return + +// CHECK-LABEL: llvm.func @_mlir_ciface_callee_multiple_args_unranked +// CHECK-SAME: %[[ARG0_PTR:arg0]]: !llvm.ptr)>>, +// CHECK-SAME: %[[FARG:arg1]]: f32, +// CHECK-SAME: %[[ARG1_PTR:arg2]]: !llvm.ptr)>>, +// CHECK-SAME: %[[IARG:arg3]]: i64 + +// Unpack descriptor for arg0. +// CHECK: %[[ARG0_DESC:.*]] = llvm.load %[[ARG0_PTR]] +// CHECK: %[[ARG0_RANK:.*]] = llvm.extractvalue %[[ARG0_DESC]][0] +// CHECK: %[[ARG0_INNER_DESC:.*]] = llvm.extractvalue %[[ARG0_DESC]][1] + +// Unpack descriptor for arg1. +// CHECK: %[[ARG1_DESC:.*]] = llvm.load %[[ARG1_PTR]] +// CHECK: %[[ARG1_RANK:.*]] = llvm.extractvalue %[[ARG1_DESC]][0] +// CHECK: %[[ARG1_INNER_DESC:.*]] = llvm.extractvalue %[[ARG1_DESC]][1] + +// Call the function. +// CHECK: llvm.call @callee_multiple_args_unranked(%[[ARG0_RANK]], %[[ARG0_INNER_DESC]], %[[FARG]], %[[ARG1_RANK]], %[[ARG1_INNER_DESC]], %[[IARG]]) +// CHECK: llvm.return diff --git a/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir b/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir --- a/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir +++ b/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir @@ -1,251 +1,607 @@ -// RUN: mlir-opt -convert-memref-to-llvm -convert-std-to-llvm='emit-c-wrappers=1' -reconcile-unrealized-casts %s | FileCheck %s -// RUN: mlir-opt -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts %s | FileCheck %s --check-prefix=EMIT_C_ATTRIBUTE - -// This tests the default memref calling convention and the emission of C -// wrappers. We don't need to separate runs because the wrapper-emission -// version subsumes the calling convention and only adds new functions, that we -// can also file-check in the same run. - -// An external function is transformed into the glue around calling an interface function. -// CHECK-LABEL: @external -// CHECK: %[[ALLOC0:.*]]: !llvm.ptr, %[[ALIGN0:.*]]: !llvm.ptr, %[[OFFSET0:.*]]: i64, %[[SIZE00:.*]]: i64, %[[SIZE01:.*]]: i64, %[[STRIDE00:.*]]: i64, %[[STRIDE01:.*]]: i64, -// CHECK: %[[ALLOC1:.*]]: !llvm.ptr, %[[ALIGN1:.*]]: !llvm.ptr, %[[OFFSET1:.*]]: i64) -func private @external(%arg0: memref, %arg1: memref) - // Populate the descriptor for arg0. - // CHECK: %[[DESC00:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK: %[[DESC01:.*]] = llvm.insertvalue %arg0, %[[DESC00]][0] - // CHECK: %[[DESC02:.*]] = llvm.insertvalue %arg1, %[[DESC01]][1] - // CHECK: %[[DESC03:.*]] = llvm.insertvalue %arg2, %[[DESC02]][2] - // CHECK: %[[DESC04:.*]] = llvm.insertvalue %arg3, %[[DESC03]][3, 0] - // CHECK: %[[DESC05:.*]] = llvm.insertvalue %arg5, %[[DESC04]][4, 0] - // CHECK: %[[DESC06:.*]] = llvm.insertvalue %arg4, %[[DESC05]][3, 1] - // CHECK: %[[DESC07:.*]] = llvm.insertvalue %arg6, %[[DESC06]][4, 1] - - // Allocate on stack and store to comply with C calling convention. - // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) - // CHECK: %[[DESC0_ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK: llvm.store %[[DESC07]], %[[DESC0_ALLOCA]] - - // Populate the descriptor for arg1. - // CHECK: %[[DESC10:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64)> - // CHECK: %[[DESC11:.*]] = llvm.insertvalue %arg7, %[[DESC10]][0] : !llvm.struct<(ptr, ptr, i64)> - // CHECK: %[[DESC12:.*]] = llvm.insertvalue %arg8, %[[DESC11]][1] : !llvm.struct<(ptr, ptr, i64)> - // CHECK: %[[DESC13:.*]] = llvm.insertvalue %arg9, %[[DESC12]][2] : !llvm.struct<(ptr, ptr, i64)> - - // Allocate on stack and store to comply with C calling convention. - // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) - // CHECK: %[[DESC1_ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr, ptr, i64)> - // CHECK: llvm.store %[[DESC13]], %[[DESC1_ALLOCA]] - - // Call the interface function. - // CHECK: llvm.call @_mlir_ciface_external - -// Verify that an interface function is emitted. -// CHECK-LABEL: llvm.func @_mlir_ciface_external -// CHECK: (!llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>>, !llvm.ptr, ptr, i64)>>) - -// Verify that the return value is not affected. -// CHECK-LABEL: @returner -// CHECK: -> !llvm.struct<(struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, struct<(ptr, ptr, i64)>)> -func private @returner() -> (memref, memref) - -// CHECK-LABEL: @caller -func @caller() { - %0:2 = call @returner() : () -> (memref, memref) - // Extract individual values from the descriptor for the first memref. - // CHECK: %[[ALLOC0:.*]] = llvm.extractvalue %[[DESC0:.*]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - // CHECK: %[[ALIGN0:.*]] = llvm.extractvalue %[[DESC0]][1] - // CHECK: %[[OFFSET0:.*]] = llvm.extractvalue %[[DESC0]][2] - // CHECK: %[[SIZE00:.*]] = llvm.extractvalue %[[DESC0]][3, 0] - // CHECK: %[[SIZE01:.*]] = llvm.extractvalue %[[DESC0]][3, 1] - // CHECK: %[[STRIDE00:.*]] = llvm.extractvalue %[[DESC0]][4, 0] - // CHECK: %[[STRIDE01:.*]] = llvm.extractvalue %[[DESC0]][4, 1] - - // Extract individual values from the descriptor for the second memref. - // CHECK: %[[ALLOC1:.*]] = llvm.extractvalue %[[DESC1:.*]][0] : !llvm.struct<(ptr, ptr, i64)> - // CHECK: %[[ALIGN1:.*]] = llvm.extractvalue %[[DESC1]][1] - // CHECK: %[[OFFSET1:.*]] = llvm.extractvalue %[[DESC1]][2] - - // Forward the values to the call. - // CHECK: llvm.call @external(%[[ALLOC0]], %[[ALIGN0]], %[[OFFSET0]], %[[SIZE00]], %[[SIZE01]], %[[STRIDE00]], %[[STRIDE01]], %[[ALLOC1]], %[[ALIGN1]], %[[OFFSET1]]) : (!llvm.ptr, !llvm.ptr, i64, i64, i64, i64, i64, !llvm.ptr, !llvm.ptr, i64) -> () - call @external(%0#0, %0#1) : (memref, memref) -> () +// RUN: mlir-opt %s \ +// RUN: --convert-memref-to-llvm \ +// RUN: --convert-std-to-llvm='max-unranked-desc-buffer-rank=5' | FileCheck %s + +func @callee_no_result(%arg0 : memref) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %0 = memref.load %arg0[%c0, %c1] : memref return } -// CHECK-LABEL: @callee -// EMIT_C_ATTRIBUTE-LABEL: @callee -func @callee(%arg0: memref, %arg1: index) { - %0 = memref.load %arg0[%arg1] : memref +func @caller_no_result(%arg0 : memref) { + call @callee_no_result(%arg0) : (memref) -> () return } -// Verify that an interface function is emitted. -// CHECK-LABEL: @_mlir_ciface_callee -// CHECK: %[[ARG0:.*]]: !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> - // Load the memref descriptor pointer. - // CHECK: %[[DESC:.*]] = llvm.load %[[ARG0]] : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> +// CHECK-LABEL: llvm.func @caller_no_result +// CHECK-SAME: %[[ALLOC:.*]]: !llvm.ptr, %[[ALIGN:.*]]: !llvm.ptr, %[[OFFSET:.*]]: i64, %[[SIZE0:.*]]: i64, %[[SIZE1:.*]]: i64, %[[STRIDE0:.*]]: i64, %[[STRIDE1:.*]]: i64 + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef : [[ARG_DESC_TY:!llvm.struct<\(ptr, ptr, i64, array<2 x i64>, array<2 x i64>\)>]] +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ALLOC]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ALIGN]], %[[ARG_DESC1]][1] +// CHECK: %[[ARG_DESC3:.*]] = llvm.insertvalue %[[OFFSET]], %[[ARG_DESC2]][2] +// CHECK: %[[ARG_DESC4:.*]] = llvm.insertvalue %[[SIZE0]], %[[ARG_DESC3]][3, 0] +// CHECK: %[[ARG_DESC5:.*]] = llvm.insertvalue %[[STRIDE0]], %[[ARG_DESC4]][4, 0] +// CHECK: %[[ARG_DESC6:.*]] = llvm.insertvalue %[[SIZE1]], %[[ARG_DESC5]][3, 1] +// CHECK: %[[ARG_DESC7:.*]] = llvm.insertvalue %[[STRIDE1]], %[[ARG_DESC6]][4, 1] + +// Unpack descriptor. +// CHECK: %[[ALLOC_:.*]] = llvm.extractvalue %[[ARG_DESC7]][0] +// CHECK: %[[ALIGN_:.*]] = llvm.extractvalue %[[ARG_DESC7]][1] +// CHECK: %[[OFFSET_:.*]] = llvm.extractvalue %[[ARG_DESC7]][2] +// CHECK: %[[SIZE0_:.*]] = llvm.extractvalue %[[ARG_DESC7]][3, 0] +// CHECK: %[[SIZE1_:.*]] = llvm.extractvalue %[[ARG_DESC7]][3, 1] +// CHECK: %[[STRIDE0_:.*]] = llvm.extractvalue %[[ARG_DESC7]][4, 0] +// CHECK: %[[STRIDE1_:.*]] = llvm.extractvalue %[[ARG_DESC7]][4, 1] + +// Call the function. +// CHECK: llvm.call @callee_no_result(%[[ALLOC_]], %[[ALIGN_]], %[[OFFSET_]], %[[SIZE0_]], %[[SIZE1_]], %[[STRIDE0_]], %[[STRIDE1_]]) +// CHECK: llvm.return + + +func @callee_single_result(%arg0 : memref) -> memref { + return %arg0 : memref +} + +func @caller_single_result(%arg0 : memref) -> memref { + %0 = call @callee_single_result(%arg0) : (memref) -> memref + return %0 : memref +} - // Extract individual components of the descriptor. - // CHECK: %[[ALLOC:.*]] = llvm.extractvalue %[[DESC]][0] - // CHECK: %[[ALIGN:.*]] = llvm.extractvalue %[[DESC]][1] - // CHECK: %[[OFFSET:.*]] = llvm.extractvalue %[[DESC]][2] - // CHECK: %[[SIZE:.*]] = llvm.extractvalue %[[DESC]][3, 0] - // CHECK: %[[STRIDE:.*]] = llvm.extractvalue %[[DESC]][4, 0] +// CHECK-LABEL: llvm.func @caller_single_result +// CHECK-SAME: %[[ALLOC:.*]]: !llvm.ptr, %[[ALIGN:.*]]: !llvm.ptr, %[[OFFSET:.*]]: i64, %[[SIZE0:.*]]: i64, %[[STRIDE0:.*]]: i64 + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ALLOC]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ALIGN]], %[[ARG_DESC1]][1] +// CHECK: %[[ARG_DESC3:.*]] = llvm.insertvalue %[[OFFSET]], %[[ARG_DESC2]][2] +// CHECK: %[[ARG_DESC4:.*]] = llvm.insertvalue %[[SIZE0]], %[[ARG_DESC3]][3, 0] +// CHECK: %[[ARG_DESC5:.*]] = llvm.insertvalue %[[STRIDE0]], %[[ARG_DESC4]][4, 0] + +// Unpack descriptor. +// CHECK: %[[ALLOC_:.*]] = llvm.extractvalue %[[ARG_DESC5]][0] +// CHECK: %[[ALIGN_:.*]] = llvm.extractvalue %[[ARG_DESC5]][1] +// CHECK: %[[OFFSET_:.*]] = llvm.extractvalue %[[ARG_DESC5]][2] +// CHECK: %[[SIZE0_:.*]] = llvm.extractvalue %[[ARG_DESC5]][3, 0] +// CHECK: %[[STRIDE0_:.*]] = llvm.extractvalue %[[ARG_DESC5]][4, 0] + +// Call the function. +// CHECK: %[[RESULT:.*]] = llvm.call @callee_single_result(%[[ALLOC_]], %[[ALIGN_]], %[[OFFSET_]], %[[SIZE0_]], %[[STRIDE0_]]) +// CHECK: llvm.return %[[RESULT]] + + +func @callee_multiple_result(%arg0 : memref, + %arg1 : memref) -> (memref, memref, i64, f32) { + %c3 = constant 3 : i64 + %pi = constant 3.141 : f32 + return %arg0, %arg1, %c3, %pi : memref, memref, i64, f32 +} + +func @caller_multiple_result(%arg0 : memref, %arg1 : memref) + -> (memref, memref, i64, f32) { + %0:4 = call @callee_multiple_result(%arg0, %arg1) + : (memref, memref) + -> (memref, memref, i64, f32) + return %0#0, %0#1, %0#2, %0#3 : memref, memref, i64, f32 +} - // Forward the descriptor components to the call. - // CHECK: llvm.call @callee(%[[ALLOC]], %[[ALIGN]], %[[OFFSET]], %[[SIZE]], %[[STRIDE]], %{{.*}}) : (!llvm.ptr, !llvm.ptr, i64, i64, i64, i64) -> () +// CHECK-LABEL: llvm.func @caller_multiple_result +// CHECK-SAME: %[[ALLOC0:.*]]: !llvm.ptr, %[[ALIGN0:.*]]: !llvm.ptr, %[[OFFSET0:.*]]: i64, %[[SIZE00:.*]]: i64, %[[SIZE01:.*]]: i64, %[[STRIDE00:.*]]: i64, %[[STRIDE01:arg6]]: i64, +// CHECK-SAME: %[[ALLOC1:.*]]: !llvm.ptr, %[[ALIGN1:.*]]: !llvm.ptr, %[[OFFSET1:.*]]: i64, %[[SIZE10:.*]]: i64, %[[STRIDE10:arg11]]: i64 + +// Populate the descriptor for arg0. +// CHECK: %[[ARG0_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG0_DESC1:.*]] = llvm.insertvalue %[[ALLOC0]], %[[ARG0_DESC0]][0] +// CHECK: %[[ARG0_DESC2:.*]] = llvm.insertvalue %[[ALIGN0]], %[[ARG0_DESC1]][1] +// CHECK: %[[ARG0_DESC3:.*]] = llvm.insertvalue %[[OFFSET0]], %[[ARG0_DESC2]][2] +// CHECK: %[[ARG0_DESC4:.*]] = llvm.insertvalue %[[SIZE00]], %[[ARG0_DESC3]][3, 0] +// CHECK: %[[ARG0_DESC5:.*]] = llvm.insertvalue %[[STRIDE00]], %[[ARG0_DESC4]][4, 0] +// CHECK: %[[ARG0_DESC6:.*]] = llvm.insertvalue %[[SIZE01]], %[[ARG0_DESC5]][3, 1] +// CHECK: %[[ARG0_DESC7:.*]] = llvm.insertvalue %[[STRIDE01]], %[[ARG0_DESC6]][4, 1] + +// Populate the descriptor for arg1. +// CHECK: %[[ARG1_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG1_DESC1:.*]] = llvm.insertvalue %[[ALLOC1]], %[[ARG1_DESC0]][0] +// CHECK: %[[ARG1_DESC2:.*]] = llvm.insertvalue %[[ALIGN1]], %[[ARG1_DESC1]][1] +// CHECK: %[[ARG1_DESC3:.*]] = llvm.insertvalue %[[OFFSET1]], %[[ARG1_DESC2]][2] +// CHECK: %[[ARG1_DESC4:.*]] = llvm.insertvalue %[[SIZE10]], %[[ARG1_DESC3]][3, 0] +// CHECK: %[[ARG1_DESC5:.*]] = llvm.insertvalue %[[STRIDE10]], %[[ARG1_DESC4]][4, 0] + +// Unpack descriptor. +// CHECK: %[[ALLOC0_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][0] +// CHECK: %[[ALIGN0_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][1] +// CHECK: %[[OFFSET0_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][2] +// CHECK: %[[SIZE00_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][3, 0] +// CHECK: %[[SIZE01_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][3, 1] +// CHECK: %[[STRIDE00_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][4, 0] +// CHECK: %[[STRIDE01_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][4, 1] + +// Unpack descriptor. +// CHECK: %[[ALLOC1_:.*]] = llvm.extractvalue %[[ARG1_DESC5]][0] +// CHECK: %[[ALIGN1_:.*]] = llvm.extractvalue %[[ARG1_DESC5]][1] +// CHECK: %[[OFFSET1_:.*]] = llvm.extractvalue %[[ARG1_DESC5]][2] +// CHECK: %[[SIZE10_:.*]] = llvm.extractvalue %[[ARG1_DESC5]][3, 0] +// CHECK: %[[STRIDE10_:.*]] = llvm.extractvalue %[[ARG1_DESC5]][4, 0] + +// Call the function. +// CHECK: %[[RESULT:.*]] = llvm.call @callee_multiple_result(%[[ALLOC0_]], %[[ALIGN0_]], %[[OFFSET0_]], %[[SIZE00_]], %[[SIZE01_]], %[[STRIDE00_]], %[[STRIDE01_]], %[[ALLOC1_]], %[[ALIGN1_]], %[[OFFSET1_]], %[[SIZE10_]], %[[STRIDE10_]]) + +// Unpack results. +// CHECK: %[[RESULT0:.*]] = llvm.extractvalue %[[RESULT]][0] +// CHECK: %[[RESULT1:.*]] = llvm.extractvalue %[[RESULT]][1] +// CHECK: %[[RESULT2:.*]] = llvm.extractvalue %[[RESULT]][2] +// CHECK: %[[RESULT3:.*]] = llvm.extractvalue %[[RESULT]][3] + +// Re-pack results. +// CHECK: %[[REPACKED0:.*]] = llvm.mlir.undef +// CHECK: %[[REPACKED1:.*]] = llvm.insertvalue %[[RESULT0]], %[[REPACKED0]][0] +// CHECK: %[[REPACKED2:.*]] = llvm.insertvalue %[[RESULT1]], %[[REPACKED1]][1] +// CHECK: %[[REPACKED3:.*]] = llvm.insertvalue %[[RESULT2]], %[[REPACKED2]][2] +// CHECK: %[[REPACKED4:.*]] = llvm.insertvalue %[[RESULT3]], %[[REPACKED3]][3] + +// CHECK: llvm.return %[[REPACKED4]] + + +func @callee_multiple_args(%arg0 : index, %arg1 : memref, + %arg2 : memref, %arg3 : f32) { + %c0 = constant 0 : index + %0 = memref.load %arg1[%c0, %arg0] : memref + %1 = memref.load %arg2[%arg0] : memref + return +} -// EMIT_C_ATTRIBUTE-NOT: @mlir_ciface_callee +func @caller_multiple_args(%arg0 : index, %arg1 : memref, + %arg2 : memref, %arg3 : f32) { + call @callee_multiple_args(%arg0, %arg1, %arg2, %arg3) + : (index, memref, memref, f32) -> () + return +} -// CHECK-LABEL: @other_callee -// EMIT_C_ATTRIBUTE-LABEL: @other_callee -func @other_callee(%arg0: memref, %arg1: index) attributes { llvm.emit_c_interface } { - %0 = memref.load %arg0[%arg1] : memref +// CHECK-LABEL: llvm.func @caller_multiple_args +// CHECK-SAME: %[[IARG:arg0]]: i64, +// CHECK-SAME: %[[ALLOC0:.*]]: !llvm.ptr, %[[ALIGN0:.*]]: !llvm.ptr, %[[OFFSET0:.*]]: i64, %[[SIZE00:.*]]: i64, %[[SIZE01:.*]]: i64, %[[STRIDE00:.*]]: i64, %[[STRIDE01:arg7]]: i64, +// CHECK-SAME: %[[ALLOC1:.*]]: !llvm.ptr, %[[ALIGN1:.*]]: !llvm.ptr, %[[OFFSET1:.*]]: i64, %[[SIZE10:.*]]: i64, %[[STRIDE10:arg12]]: i64, +// CHECK-SAME: %[[FARG:arg13]]: f32 + +// Populate the descriptor for arg0. +// CHECK: %[[ARG0_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG0_DESC1:.*]] = llvm.insertvalue %[[ALLOC0]], %[[ARG0_DESC0]][0] +// CHECK: %[[ARG0_DESC2:.*]] = llvm.insertvalue %[[ALIGN0]], %[[ARG0_DESC1]][1] +// CHECK: %[[ARG0_DESC3:.*]] = llvm.insertvalue %[[OFFSET0]], %[[ARG0_DESC2]][2] +// CHECK: %[[ARG0_DESC4:.*]] = llvm.insertvalue %[[SIZE00]], %[[ARG0_DESC3]][3, 0] +// CHECK: %[[ARG0_DESC5:.*]] = llvm.insertvalue %[[STRIDE00]], %[[ARG0_DESC4]][4, 0] +// CHECK: %[[ARG0_DESC6:.*]] = llvm.insertvalue %[[SIZE01]], %[[ARG0_DESC5]][3, 1] +// CHECK: %[[ARG0_DESC7:.*]] = llvm.insertvalue %[[STRIDE01]], %[[ARG0_DESC6]][4, 1] + +// Populate the descriptor for arg1. +// CHECK: %[[ARG1_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG1_DESC1:.*]] = llvm.insertvalue %[[ALLOC1]], %[[ARG1_DESC0]][0] +// CHECK: %[[ARG1_DESC2:.*]] = llvm.insertvalue %[[ALIGN1]], %[[ARG1_DESC1]][1] +// CHECK: %[[ARG1_DESC3:.*]] = llvm.insertvalue %[[OFFSET1]], %[[ARG1_DESC2]][2] +// CHECK: %[[ARG1_DESC4:.*]] = llvm.insertvalue %[[SIZE10]], %[[ARG1_DESC3]][3, 0] +// CHECK: %[[ARG1_DESC5:.*]] = llvm.insertvalue %[[STRIDE10]], %[[ARG1_DESC4]][4, 0] + +// Unpack descriptor. +// CHECK: %[[ALLOC0_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][0] +// CHECK: %[[ALIGN0_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][1] +// CHECK: %[[OFFSET0_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][2] +// CHECK: %[[SIZE00_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][3, 0] +// CHECK: %[[SIZE01_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][3, 1] +// CHECK: %[[STRIDE00_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][4, 0] +// CHECK: %[[STRIDE01_:.*]] = llvm.extractvalue %[[ARG0_DESC7]][4, 1] + +// Unpack descriptor. +// CHECK: %[[ALLOC1_:.*]] = llvm.extractvalue %[[ARG1_DESC5]][0] +// CHECK: %[[ALIGN1_:.*]] = llvm.extractvalue %[[ARG1_DESC5]][1] +// CHECK: %[[OFFSET1_:.*]] = llvm.extractvalue %[[ARG1_DESC5]][2] +// CHECK: %[[SIZE10_:.*]] = llvm.extractvalue %[[ARG1_DESC5]][3, 0] +// CHECK: %[[STRIDE10_:.*]] = llvm.extractvalue %[[ARG1_DESC5]][4, 0] + +// Call the function. +// CHECK: llvm.call @callee_multiple_args(%[[IARG]], %[[ALLOC0_]], %[[ALIGN0_]], %[[OFFSET0_]], %[[SIZE00_]], %[[SIZE01_]], %[[STRIDE00_]], %[[STRIDE01_]], %[[ALLOC1_]], %[[ALIGN1_]], %[[OFFSET1_]], %[[SIZE10_]], %[[STRIDE10_]], %[[FARG]]) +// CHECK: llvm.return + + +func @callee_no_result_unranked(%arg0 : memref<*xf32>) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %0 = memref.cast %arg0 : memref<*xf32> to memref + %1 = memref.load %0[%c0, %c1] : memref return } -// CHECK: @_mlir_ciface_other_callee -// CHECK: llvm.call @other_callee - -// EMIT_C_ATTRIBUTE: @_mlir_ciface_other_callee -// EMIT_C_ATTRIBUTE: llvm.call @other_callee - -//===========================================================================// -// Calling convention on returning unranked memrefs. -//===========================================================================// - -// CHECK-LABEL: llvm.func @return_var_memref_caller -func @return_var_memref_caller(%arg0: memref<4x3xf32>) { - // CHECK: %[[CALL_RES:.*]] = llvm.call @return_var_memref - %0 = call @return_var_memref(%arg0) : (memref<4x3xf32>) -> memref<*xf32> - - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : index) - // CHECK: %[[TWO:.*]] = llvm.mlir.constant(2 : index) - // These sizes may depend on the data layout, not matching specific values. - // CHECK: %[[PTR_SIZE:.*]] = llvm.mlir.constant - // CHECK: %[[IDX_SIZE:.*]] = llvm.mlir.constant - - // CHECK: %[[DOUBLE_PTR_SIZE:.*]] = llvm.mul %[[TWO]], %[[PTR_SIZE]] - // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RES]][0] : !llvm.struct<(i64, ptr)> - // CHECK: %[[DOUBLE_RANK:.*]] = llvm.mul %[[TWO]], %[[RANK]] - // CHECK: %[[DOUBLE_RANK_INC:.*]] = llvm.add %[[DOUBLE_RANK]], %[[ONE]] - // CHECK: %[[TABLES_SIZE:.*]] = llvm.mul %[[DOUBLE_RANK_INC]], %[[IDX_SIZE]] - // CHECK: %[[ALLOC_SIZE:.*]] = llvm.add %[[DOUBLE_PTR_SIZE]], %[[TABLES_SIZE]] - // CHECK: %[[FALSE:.*]] = llvm.mlir.constant(false) - // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOC_SIZE]] x i8 - // CHECK: %[[SOURCE:.*]] = llvm.extractvalue %[[CALL_RES]][1] - // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SOURCE]], %[[ALLOC_SIZE]], %[[FALSE]]) - // CHECK: llvm.call @free(%[[SOURCE]]) - // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> - // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RES]][0] : !llvm.struct<(i64, ptr)> - // CHECK: %[[DESC_1:.*]] = llvm.insertvalue %[[RANK]], %[[DESC]][0] - // CHECK: llvm.insertvalue %[[ALLOCA]], %[[DESC_1]][1] +func @caller_no_result_unranked(%arg0 : memref<*xf32>) { + call @callee_no_result_unranked(%arg0) : (memref<*xf32>) -> () return } -// CHECK-LABEL: llvm.func @return_var_memref -func @return_var_memref(%arg0: memref<4x3xf32>) -> memref<*xf32> attributes { llvm.emit_c_interface } { - // Match the construction of the unranked descriptor. - // CHECK: %[[ALLOCA:.*]] = llvm.alloca - // CHECK: %[[MEMORY:.*]] = llvm.bitcast %[[ALLOCA]] - // CHECK: %[[DESC_0:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> - // CHECK: %[[DESC_1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_0]][0] - // CHECK: %[[DESC_2:.*]] = llvm.insertvalue %[[MEMORY]], %[[DESC_1]][1] - %0 = memref.cast %arg0: memref<4x3xf32> to memref<*xf32> - - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : index) - // CHECK: %[[TWO:.*]] = llvm.mlir.constant(2 : index) - // These sizes may depend on the data layout, not matching specific values. - // CHECK: %[[PTR_SIZE:.*]] = llvm.mlir.constant - // CHECK: %[[IDX_SIZE:.*]] = llvm.mlir.constant - - // CHECK: %[[DOUBLE_PTR_SIZE:.*]] = llvm.mul %[[TWO]], %[[PTR_SIZE]] - // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_2]][0] : !llvm.struct<(i64, ptr)> - // CHECK: %[[DOUBLE_RANK:.*]] = llvm.mul %[[TWO]], %[[RANK]] - // CHECK: %[[DOUBLE_RANK_INC:.*]] = llvm.add %[[DOUBLE_RANK]], %[[ONE]] - // CHECK: %[[TABLES_SIZE:.*]] = llvm.mul %[[DOUBLE_RANK_INC]], %[[IDX_SIZE]] - // CHECK: %[[ALLOC_SIZE:.*]] = llvm.add %[[DOUBLE_PTR_SIZE]], %[[TABLES_SIZE]] - // CHECK: %[[FALSE:.*]] = llvm.mlir.constant(false) - // CHECK: %[[ALLOCATED:.*]] = llvm.call @malloc(%[[ALLOC_SIZE]]) - // CHECK: %[[SOURCE:.*]] = llvm.extractvalue %[[DESC_2]][1] - // CHECK: "llvm.intr.memcpy"(%[[ALLOCATED]], %[[SOURCE]], %[[ALLOC_SIZE]], %[[FALSE]]) - // CHECK: %[[NEW_DESC:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> - // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_2]][0] : !llvm.struct<(i64, ptr)> - // CHECK: %[[NEW_DESC_1:.*]] = llvm.insertvalue %[[RANK]], %[[NEW_DESC]][0] - // CHECK: %[[NEW_DESC_2:.*]] = llvm.insertvalue %[[ALLOCATED]], %[[NEW_DESC_1]][1] - // CHECK: llvm.return %[[NEW_DESC_2]] +// CHECK-LABEL: llvm.func @caller_no_result_unranked +// CHECK-SAME: %[[ARG_RANK:.*]]: i64, %[[ARG_INNER_DESC:.*]]: !llvm.ptr + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ARG_RANK]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ARG_INNER_DESC]], %[[ARG_DESC1]][1] + +// Unpack descriptor. +// CHECK: %[[ARG_RANK_:.*]] = llvm.extractvalue %[[ARG_DESC2]][0] +// CHECK: %[[ARG_INNER_DESC_:.*]] = llvm.extractvalue %[[ARG_DESC2]][1] + +// Call the function. +// CHECK: llvm.call @callee_no_result_unranked(%[[ARG_RANK_]], %[[ARG_INNER_DESC_]]) +// CHECK: llvm.return + + +func @callee_single_result_unranked(%arg0 : memref<*xf32>) -> memref<*xf32> { + return %arg0 : memref<*xf32> +} + +func @caller_single_result_unranked(%arg0 : memref<*xf32>) -> memref<*xf32> { + %0 = call @callee_single_result_unranked(%arg0) + : (memref<*xf32>) -> memref<*xf32> return %0 : memref<*xf32> } -// Check that the result memref is passed as parameter -// CHECK-LABEL: @_mlir_ciface_return_var_memref -// CHECK-SAME: (%{{.*}}: !llvm.ptr)>>, %{{.*}}: !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>>) - -// CHECK-LABEL: llvm.func @return_two_var_memref_caller -func @return_two_var_memref_caller(%arg0: memref<4x3xf32>) { - // Only check that we create two different descriptors using different - // memory, and deallocate both sources. The size computation is same as for - // the single result. - // CHECK: %[[CALL_RES:.*]] = llvm.call @return_two_var_memref - // CHECK: %[[RES_1:.*]] = llvm.extractvalue %[[CALL_RES]][0] - // CHECK: %[[RES_2:.*]] = llvm.extractvalue %[[CALL_RES]][1] - %0:2 = call @return_two_var_memref(%arg0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>) - - // CHECK: %[[ALLOCA_1:.*]] = llvm.alloca %{{.*}} x i8 - // CHECK: %[[SOURCE_1:.*]] = llvm.extractvalue %[[RES_1:.*]][1] : ![[DESC_TYPE:.*]] - // CHECK: "llvm.intr.memcpy"(%[[ALLOCA_1]], %[[SOURCE_1]], %{{.*}}, %[[FALSE:.*]]) - // CHECK: llvm.call @free(%[[SOURCE_1]]) - // CHECK: %[[DESC_1:.*]] = llvm.mlir.undef : ![[DESC_TYPE]] - // CHECK: %[[DESC_11:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_1]][0] - // CHECK: llvm.insertvalue %[[ALLOCA_1]], %[[DESC_11]][1] - - // CHECK: %[[ALLOCA_2:.*]] = llvm.alloca %{{.*}} x i8 - // CHECK: %[[SOURCE_2:.*]] = llvm.extractvalue %[[RES_2:.*]][1] - // CHECK: "llvm.intr.memcpy"(%[[ALLOCA_2]], %[[SOURCE_2]], %{{.*}}, %[[FALSE]]) - // CHECK: llvm.call @free(%[[SOURCE_2]]) - // CHECK: %[[DESC_2:.*]] = llvm.mlir.undef : ![[DESC_TYPE]] - // CHECK: %[[DESC_21:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_2]][0] - // CHECK: llvm.insertvalue %[[ALLOCA_2]], %[[DESC_21]][1] +// CHECK-LABEL: llvm.func @caller_single_result_unranked +// CHECK-SAME: %[[RESULT_INNER_DESC_BUFFER:.*]]: !llvm.ptr, %[[ARG_RANK:.*]]: i64, %[[ARG_INNER_DESC:.*]]: !llvm.ptr + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ARG_RANK]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ARG_INNER_DESC]], %[[ARG_DESC1]][1] + +// Allocate descriptor buffers on the stack. +// CHECK: %[[DEFAULT_DESC_BUFFER_SIZE:.*]] = llvm.mlir.constant(104 : index) +// CHECK: %[[CALL_INNER_DESC_BUFFER:.*]] = llvm.alloca %[[DEFAULT_DESC_BUFFER_SIZE]] x i8 + +// Unpack descriptor. +// CHECK: %[[ARG_RANK_:.*]] = llvm.extractvalue %[[ARG_DESC2]][0] +// CHECK: %[[ARG_INNER_DESC_:.*]] = llvm.extractvalue %[[ARG_DESC2]][1] + +// Call the function. +// CHECK: %[[CALL_RESULT_DESC:.*]] = llvm.call @callee_single_result_unranked(%[[CALL_INNER_DESC_BUFFER]], %[[ARG_RANK_]], %[[ARG_INNER_DESC_]]) + +// Common constant. +// CHECK: %[[MAX_SUPPORTED_RANK:.*]] = llvm.mlir.constant(5 : i64) + +// Check if the inner descriptor fits into the buffer argument. +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC]][0] +// CHECK: %[[PRED:.*]] = llvm.icmp "ule" %[[RANK]], %[[MAX_SUPPORTED_RANK]] +// CHECK: llvm.cond_br %[[PRED]], ^bb1(%[[CALL_RESULT_DESC]] : !llvm.struct<(i64, ptr)>), ^bb3 + +// At this point, we have the call result descriptor or its copy. In both cases +// the descriptor, including its inner descriptor, is on the stack. +// To return it, we still have to copy it to the descriptor buffer or to +// dynamically allocated memory. +// CHECK: ^bb1(%[[DESC_OR_CPY:.*]]: !llvm.struct<(i64, ptr)>): + +// Common constant. +// CHECK: %[[MAX_SUPPORTED_RANK_:.*]] = llvm.mlir.constant(5 : i64) + +// Compute the final result's inner descriptor size. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) +// CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[C8_:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[SIZE_PTRS:.*]] = llvm.mul %[[C2]], %[[C8]] +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_OR_CPY]][0] +// CHECK: %[[RANK_TWICE:.*]] = llvm.mul %[[C2]], %[[RANK]] +// CHECK: %[[NUM_I64_FIELDS:.*]] = llvm.add %[[RANK_TWICE]], %[[C1]] +// CHECK: %[[SIZE_I64_FIELDS:.*]] = llvm.mul %[[NUM_I64_FIELDS]], %[[C8_]] +// CHECK: %[[RESULT_INNER_DESC_SIZE:.*]] = llvm.add %[[SIZE_PTRS]], %[[SIZE_I64_FIELDS]] + +// Check if the inner descriptor fits into the stack-allocated buffer argument. +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_OR_CPY]][0] +// CHECK: %[[PRED:.*]] = llvm.icmp "ule" %[[RANK]], %[[MAX_SUPPORTED_RANK_]] +// CHECK: llvm.cond_br %[[PRED]], ^bb4, ^bb5 + +// Copy the inner descriptor to the selected buffer and return a copy of the +// unranked outer descriptor. +// CHECK: ^bb2(%[[SELECTED_BUFFER:.*]]: !llvm.ptr): +// CHECK: %[[CALL_RESULT_INNER_DESC:.*]] = llvm.extractvalue %[[DESC_OR_CPY]][1] +// CHECK: %[[C0:.*]] = llvm.mlir.constant(false) +// CHECK: "llvm.intr.memcpy"(%[[SELECTED_BUFFER]], %[[CALL_RESULT_INNER_DESC]], %[[RESULT_INNER_DESC_SIZE]], %[[C0]]) +// CHECK: %[[RESULT_DESC0:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> +// CHECK: %[[RESULT_DESC1:.*]] = llvm.insertvalue %[[RANK]], %[[RESULT_DESC0]][0] +// CHECK: %[[RESULT_DESC2:.*]] = llvm.insertvalue %[[SELECTED_BUFFER]], %[[RESULT_DESC1]][1] +// CHECK: llvm.return %[[RESULT_DESC2]] + +// Copy the call result descriptor to stack-allocated memory. +// This is the case in which it did not fit into the pre-allocated buffer. We +// have to free the dynamically allocated inner descriptor and copy it over to +// the stack. +// CHECK: ^bb3: + +// Compute the call result's inner descriptor size. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) +// CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[C8_:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[SIZE_PTRS:.*]] = llvm.mul %[[C2]], %[[C8]] +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC]][0] +// CHECK: %[[RANK_TWICE:.*]] = llvm.mul %[[C2]], %[[RANK]] +// CHECK: %[[NUM_I64_FIELDS:.*]] = llvm.add %[[RANK_TWICE]], %[[C1]] +// CHECK: %[[SIZE_I64_FIELDS:.*]] = llvm.mul %[[NUM_I64_FIELDS]], %[[C8_]] +// CHECK: %[[CALL_RESULT_INNER_DESC_SIZE:.*]] = llvm.add %[[SIZE_PTRS]], %[[SIZE_I64_FIELDS]] + +// Stack-allocate a buffer for the call result's inner descriptor and copy it +// over. Also, free the previously dynamically allocated inner descriptor. +// CHECK: %[[INNER_DESC:.*]] = llvm.alloca %[[CALL_RESULT_INNER_DESC_SIZE]] x i8 +// CHECK: %[[DYN_INNER_DESC:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC]][1] +// CHECK: %[[C0:.*]] = llvm.mlir.constant(false) +// CHECK: "llvm.intr.memcpy"(%[[INNER_DESC]], %[[DYN_INNER_DESC]], %[[CALL_RESULT_INNER_DESC_SIZE]], %[[C0]]) +// CHECK: llvm.call @free(%[[DYN_INNER_DESC]]) +// CHECK: %[[CALL_RESULT_DESC_CPY0:.*]] = llvm.mlir.undef +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC]][0] +// CHECK: %[[CALL_RESULT_DESC_CPY1:.*]] = llvm.insertvalue %[[RANK]], %[[CALL_RESULT_DESC_CPY0]][0] +// CHECK: %[[CALL_RESULT_DESC_CPY2:.*]] = llvm.insertvalue %[[INNER_DESC]], %[[CALL_RESULT_DESC_CPY1]][1] +// CHECK: llvm.br ^bb1(%[[CALL_RESULT_DESC_CPY2]] : !llvm.struct<(i64, ptr)>) + +// Select the buffer argument to copy the result's inner descriptor to. +// CHECK: ^bb4: +// CHECK: llvm.br ^bb2(%[[RESULT_INNER_DESC_BUFFER]] : !llvm.ptr) + +// Dynamically allocate a new buffer to copy the result's inner descriptor to. +// CHECK: ^bb5: +// CHECK: %[[NEW_BUFFER:.*]] = llvm.call @malloc(%[[RESULT_INNER_DESC_SIZE]]) +// CHECK: llvm.br ^bb2(%[[NEW_BUFFER]] : !llvm.ptr) + + +func @callee_multiple_result_unranked(%arg0 : memref<*xf32>) -> (f32, i64, + memref<*xf32>, memref<*xf32>) { + %pi = constant 3.141 : f32 + %c3 = constant 3 : i64 + return %pi, %c3, %arg0, %arg0 : f32, i64, memref<*xf32>, memref<*xf32> +} + +func @caller_multiple_result_unranked(%arg0 : memref<*xf32>) + -> (f32, i64, memref<*xf32>, memref<*xf32>) { + %0:4 = call @callee_multiple_result_unranked(%arg0) : (memref<*xf32>) + -> (f32, i64, memref<*xf32>, memref<*xf32>) + return %0#0, %0#1, %0#2, %0#3 : f32, i64, memref<*xf32>, memref<*xf32> +} + +// CHECK-LABEL: llvm.func @caller_multiple_result_unranked +// CHECK-SAME: %[[RESULT_INNER_DESC_BUFFER0:arg0]]: !llvm.ptr, +// CHECK-SAME: %[[RESULT_INNER_DESC_BUFFER1:arg1]]: !llvm.ptr, +// CHECK-SAME: %[[ARG_RANK:.*]]: i64, %[[ARG_INNER_DESC:.*]]: !llvm.ptr + +// Populate the descriptor for arg0. +// CHECK: %[[ARG_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG_DESC1:.*]] = llvm.insertvalue %[[ARG_RANK]], %[[ARG_DESC0]][0] +// CHECK: %[[ARG_DESC2:.*]] = llvm.insertvalue %[[ARG_INNER_DESC]], %[[ARG_DESC1]][1] + +// Allocate descriptor buffers on the stack. +// CHECK: %[[DEFAULT_DESC_BUFFER_SIZE:.*]] = llvm.mlir.constant(104 : index) +// CHECK: %[[CALL_INNER_DESC_BUFFER0:.*]] = llvm.alloca %[[DEFAULT_DESC_BUFFER_SIZE]] x i8 +// CHECK: %[[CALL_INNER_DESC_BUFFER1:.*]] = llvm.alloca %[[DEFAULT_DESC_BUFFER_SIZE]] x i8 + +// Unpack descriptor. +// CHECK: %[[ARG_RANK_:.*]] = llvm.extractvalue %[[ARG_DESC2]][0] +// CHECK: %[[ARG_INNER_DESC_:.*]] = llvm.extractvalue %[[ARG_DESC2]][1] + +// Call the function. +// CHECK: %[[CALL_RESULT:.*]] = llvm.call @callee_multiple_result_unranked(%[[CALL_INNER_DESC_BUFFER0]], %[[CALL_INNER_DESC_BUFFER1]], %[[ARG_RANK_]], %[[ARG_INNER_DESC_]]) + +// Unpack call result. +// CHECK: %[[FRESULT:.*]] = llvm.extractvalue %[[CALL_RESULT]][0] +// CHECK: %[[IRESULT:.*]] = llvm.extractvalue %[[CALL_RESULT]][1] +// CHECK: %[[CALL_RESULT_DESC0:.*]] = llvm.extractvalue %[[CALL_RESULT]][2] +// CHECK: %[[CALL_RESULT_DESC1:.*]] = llvm.extractvalue %[[CALL_RESULT]][3] + +// Common constant. +// CHECK: %[[MAX_SUPPORTED_RANK:.*]] = llvm.mlir.constant(5 : i64) + +// Check if the first call result inner descriptor fits into its buffer argument +// and copy it to a new stack-allocated buffer otherwise. +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC0]][0] +// CHECK: %[[PRED:.*]] = llvm.icmp "ule" %[[RANK]], %[[MAX_SUPPORTED_RANK]] +// CHECK: llvm.cond_br %[[PRED]], ^bb1(%[[CALL_RESULT_DESC0]] : !llvm.struct<(i64, ptr)>), ^bb5 + +// At this point, we have the first call result descriptor or its copy. +// CHECK: ^bb1(%[[DESC_OR_CPY0:.*]]: !llvm.struct<(i64, ptr)>): + +// Check if the second call result inner descriptor fits into its buffer +// argument and copy it to a new stack-allocated buffer otherwise. +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC1]][0] +// CHECK: %[[PRED:.*]] = llvm.icmp "ule" %[[RANK]], %[[MAX_SUPPORTED_RANK]] +// CHECK: llvm.cond_br %[[PRED]], ^bb2(%[[CALL_RESULT_DESC1]] : !llvm.struct<(i64, ptr)>), ^bb6 + +// At this point, we have the call result descriptors or their copy. In both +// cases the descriptors, including its inner descriptors, are on the stack. +// To return them, we still have to copy them to the argument buffer or to +// dynamically allocated memory. +// CHECK: ^bb2(%[[DESC_OR_CPY1:.*]]: !llvm.struct<(i64, ptr)>): + +// Common constant. +// CHECK: %[[MAX_SUPPORTED_RANK_:.*]] = llvm.mlir.constant(5 : i64) + +// Compute the result's first inner descriptor size. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) +// CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[C8_:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[SIZE_PTRS:.*]] = llvm.mul %[[C2]], %[[C8]] +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_OR_CPY0]][0] +// CHECK: %[[RANK_TWICE:.*]] = llvm.mul %[[C2]], %[[RANK]] +// CHECK: %[[NUM_I64_FIELDS:.*]] = llvm.add %[[RANK_TWICE]], %[[C1]] +// CHECK: %[[SIZE_I64_FIELDS:.*]] = llvm.mul %[[NUM_I64_FIELDS]], %[[C8_]] +// CHECK: %[[RESULT_INNER_DESC_SIZE0:.*]] = llvm.add %[[SIZE_PTRS]], %[[SIZE_I64_FIELDS]] + +// Check if the inner descriptor fits into the buffer argument. +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_OR_CPY0]][0] +// CHECK: %[[PRED:.*]] = llvm.icmp "ule" %[[RANK]], %[[MAX_SUPPORTED_RANK_]] +// CHECK: llvm.cond_br %[[PRED]], ^bb7, ^bb8 + +// Copy the call result's first inner descriptor to the selected buffer and +// create a copy of the unranked outer descriptor. +// CHECK: ^bb3(%[[SELECTED_BUFFER0:.*]]: !llvm.ptr): +// CHECK: %[[CALL_RESULT_INNER_DESC0:.*]] = llvm.extractvalue %[[DESC_OR_CPY0]][1] +// CHECK: %[[C0:.*]] = llvm.mlir.constant(false) +// CHECK: "llvm.intr.memcpy"(%[[SELECTED_BUFFER0]], %[[CALL_RESULT_INNER_DESC0]], %[[RESULT_INNER_DESC_SIZE0]], %[[C0]]) +// CHECK: %[[RESULT0_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[RESULT0_DESC1:.*]] = llvm.insertvalue %[[RANK]], %[[RESULT0_DESC0]][0] +// CHECK: %[[RESULT0_DESC2:.*]] = llvm.insertvalue %[[SELECTED_BUFFER0]], %[[RESULT0_DESC1]][1] + +// Compute the result's second inner descriptor size. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) +// CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[C8_:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[SIZE_PTRS:.*]] = llvm.mul %[[C2]], %[[C8]] +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_OR_CPY1]][0] +// CHECK: %[[RANK_TWICE:.*]] = llvm.mul %[[C2]], %[[RANK]] +// CHECK: %[[NUM_I64_FIELDS:.*]] = llvm.add %[[RANK_TWICE]], %[[C1]] +// CHECK: %[[SIZE_I64_FIELDS:.*]] = llvm.mul %[[NUM_I64_FIELDS]], %[[C8_]] +// CHECK: %[[RESULT_INNER_DESC_SIZE1:.*]] = llvm.add %[[SIZE_PTRS]], %[[SIZE_I64_FIELDS]] + +// Check if the inner descriptor fits into the buffer argument. +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_OR_CPY1]][0] +// CHECK: %[[PRED:.*]] = llvm.icmp "ule" %[[RANK]], %[[MAX_SUPPORTED_RANK_]] +// CHECK: llvm.cond_br %[[PRED]], ^bb9, ^bb10 + +// Copy the call result's second inner descriptor to the selected buffer and +// create a copy of the unranked outer descriptor. +// CHECK: ^bb4(%[[SELECTED_BUFFER1:.*]]: !llvm.ptr): +// CHECK: %[[CALL_RESULT_INNER_DESC1:.*]] = llvm.extractvalue %[[DESC_OR_CPY1]][1] +// CHECK: %[[C0:.*]] = llvm.mlir.constant(false) +// CHECK: "llvm.intr.memcpy"(%[[SELECTED_BUFFER1]], %[[CALL_RESULT_INNER_DESC1]], %[[RESULT_INNER_DESC_SIZE1]], %[[C0]]) +// CHECK: %[[RESULT1_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[RESULT1_DESC1:.*]] = llvm.insertvalue %[[RANK]], %[[RESULT1_DESC0]][0] +// CHECK: %[[RESULT1_DESC2:.*]] = llvm.insertvalue %[[SELECTED_BUFFER1]], %[[RESULT1_DESC1]][1] + +// Pack the final result and return it. +// CHECK: %[[RESULT0:.*]] = llvm.mlir.undef +// CHECK: %[[RESULT1:.*]] = llvm.insertvalue %[[FRESULT]], %[[RESULT0]][0] +// CHECK: %[[RESULT2:.*]] = llvm.insertvalue %[[IRESULT]], %[[RESULT1]][1] +// CHECK: %[[RESULT3:.*]] = llvm.insertvalue %[[RESULT0_DESC2]], %[[RESULT2]][2] +// CHECK: %[[RESULT4:.*]] = llvm.insertvalue %[[RESULT1_DESC2]], %[[RESULT3]][3] +// CHECK: llvm.return %[[RESULT4]] + +// Copy the call result's first descriptor to stack-allocated memory. +// This is the case in which it did not fit into the pre-allocated buffer. +// CHECK: ^bb5: + +// Compute the descriptor size. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) +// CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[C8_:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[SIZE_PTRS:.*]] = llvm.mul %[[C2]], %[[C8]] +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC0]][0] +// CHECK: %[[RANK_TWICE:.*]] = llvm.mul %[[C2]], %[[RANK]] +// CHECK: %[[NUM_I64_FIELDS:.*]] = llvm.add %[[RANK_TWICE]], %[[C1]] +// CHECK: %[[SIZE_I64_FIELDS:.*]] = llvm.mul %[[NUM_I64_FIELDS]], %[[C8_]] +// CHECK: %[[CALL_RESULT_INNER_DESC_SIZE0:.*]] = llvm.add %[[SIZE_PTRS]], %[[SIZE_I64_FIELDS]] + +// Stack-allocate a buffer for the call result's first inner descriptor and copy +// it over. Also, free the previously dynamically allocated inner descriptor. +// CHECK: %[[INNER_DESC:.*]] = llvm.alloca %[[CALL_RESULT_INNER_DESC_SIZE0]] x i8 +// CHECK: %[[DYN_INNER_DESC:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC0]][1] +// CHECK: %[[C0:.*]] = llvm.mlir.constant(false) +// CHECK: "llvm.intr.memcpy"(%[[INNER_DESC]], %[[DYN_INNER_DESC]], %[[CALL_RESULT_INNER_DESC_SIZE0]], %[[C0]]) +// CHECK: llvm.call @free(%[[DYN_INNER_DESC]]) +// CHECK: %[[CALL_RESULT_DESC0_CPY0:.*]] = llvm.mlir.undef +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC0]][0] +// CHECK: %[[CALL_RESULT_DESC0_CPY1:.*]] = llvm.insertvalue %[[RANK]], %[[CALL_RESULT_DESC0_CPY0]][0] +// CHECK: %[[CALL_RESULT_DESC0_CPY2:.*]] = llvm.insertvalue %[[INNER_DESC]], %[[CALL_RESULT_DESC0_CPY1]][1] +// CHECK: llvm.br ^bb1(%[[CALL_RESULT_DESC0_CPY2]] : !llvm.struct<(i64, ptr)>) + +// Copy the call result's second descriptor to stack-allocated memory. +// This is the case in which it did not fit into the pre-allocated buffer. +// CHECK: ^bb6: + +// Compute the descriptor size. +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) +// CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[C8_:.*]] = llvm.mlir.constant(8 : index) +// CHECK: %[[SIZE_PTRS:.*]] = llvm.mul %[[C2]], %[[C8]] +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC1]][0] +// CHECK: %[[RANK_TWICE:.*]] = llvm.mul %[[C2]], %[[RANK]] +// CHECK: %[[NUM_I64_FIELDS:.*]] = llvm.add %[[RANK_TWICE]], %[[C1]] +// CHECK: %[[SIZE_I64_FIELDS:.*]] = llvm.mul %[[NUM_I64_FIELDS]], %[[C8_]] +// CHECK: %[[CALL_RESULT_INNER_DESC_SIZE1:.*]] = llvm.add %[[SIZE_PTRS]], %[[SIZE_I64_FIELDS]] + +// Stack-allocate a buffer for the call result's second inner descriptor and +// copy it over. Also, free the previously dynamically allocated inner +// descriptor. +// CHECK: %[[INNER_DESC:.*]] = llvm.alloca %[[CALL_RESULT_INNER_DESC_SIZE1]] x i8 +// CHECK: %[[DYN_INNER_DESC:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC1]][1] +// CHECK: %[[C0:.*]] = llvm.mlir.constant(false) +// CHECK: "llvm.intr.memcpy"(%[[INNER_DESC]], %[[DYN_INNER_DESC]], %[[CALL_RESULT_INNER_DESC_SIZE1]], %[[C0]]) +// CHECK: llvm.call @free(%[[DYN_INNER_DESC]]) +// CHECK: %[[CALL_RESULT_DESC1_CPY0:.*]] = llvm.mlir.undef +// CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RESULT_DESC1]][0] +// CHECK: %[[CALL_RESULT_DESC1_CPY1:.*]] = llvm.insertvalue %[[RANK]], %[[CALL_RESULT_DESC1_CPY0]][0] +// CHECK: %[[CALL_RESULT_DESC1_CPY2:.*]] = llvm.insertvalue %[[INNER_DESC]], %[[CALL_RESULT_DESC1_CPY1]][1] +// CHECK: llvm.br ^bb2(%[[CALL_RESULT_DESC1_CPY2]] : !llvm.struct<(i64, ptr)>) + +// Select the buffer argument to copy the result's first inner descriptor to. +// CHECK: ^bb7: +// CHECK: llvm.br ^bb3(%[[RESULT_INNER_DESC_BUFFER0]] : !llvm.ptr) + +// Dynamically allocate a new buffer to copy the result's first inner descriptor +// to. +// CHECK: ^bb8: +// CHECK: %[[NEW_BUFFER:.*]] = llvm.call @malloc(%[[RESULT_INNER_DESC_SIZE0]]) +// CHECK: llvm.br ^bb3(%[[NEW_BUFFER]] : !llvm.ptr) + +// Select the buffer argument to copy the result's first inner descriptor to. +// CHECK: ^bb9: +// CHECK: llvm.br ^bb4(%[[RESULT_INNER_DESC_BUFFER1]] : !llvm.ptr) + +// Dynamically allocate a new buffer to copy the result's first inner descriptor +// to. +// CHECK: ^bb10: +// CHECK: %[[NEW_BUFFER:.*]] = llvm.call @malloc(%[[RESULT_INNER_DESC_SIZE1]]) +// CHECK: llvm.br ^bb4(%[[NEW_BUFFER]] : !llvm.ptr) + + +func @callee_multiple_args_unranked(%arg0 : memref<*xf32>, %arg1 : f32, + %arg2 : memref<*xf32>, %arg3 : index) { + %c0 = constant 0 : index + %0 = memref.cast %arg0 : memref<*xf32> to memref + %1 = memref.load %0[%c0, %arg3] : memref + %2 = memref.cast %arg2 : memref<*xf32> to memref + %3 = memref.load %2[%arg3] : memref return } -// CHECK-LABEL: llvm.func @return_two_var_memref -func @return_two_var_memref(%arg0: memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>) attributes { llvm.emit_c_interface } { - // Match the construction of the unranked descriptor. - // CHECK: %[[ALLOCA:.*]] = llvm.alloca - // CHECK: %[[MEMORY:.*]] = llvm.bitcast %[[ALLOCA]] - // CHECK: %[[DESC_0:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> - // CHECK: %[[DESC_1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_0]][0] - // CHECK: %[[DESC_2:.*]] = llvm.insertvalue %[[MEMORY]], %[[DESC_1]][1] - %0 = memref.cast %arg0 : memref<4x3xf32> to memref<*xf32> - - // Only check that we allocate the memory for each operand of the "return" - // separately, even if both operands are the same value. The calling - // convention requires the caller to free them and the caller cannot know - // whether they are the same value or not. - // CHECK: %[[ALLOCATED_1:.*]] = llvm.call @malloc(%{{.*}}) - // CHECK: %[[SOURCE_1:.*]] = llvm.extractvalue %[[DESC_2]][1] - // CHECK: "llvm.intr.memcpy"(%[[ALLOCATED_1]], %[[SOURCE_1]], %{{.*}}, %[[FALSE:.*]]) - // CHECK: %[[RES_1:.*]] = llvm.mlir.undef - // CHECK: %[[RES_11:.*]] = llvm.insertvalue %{{.*}}, %[[RES_1]][0] - // CHECK: %[[RES_12:.*]] = llvm.insertvalue %[[ALLOCATED_1]], %[[RES_11]][1] - - // CHECK: %[[ALLOCATED_2:.*]] = llvm.call @malloc(%{{.*}}) - // CHECK: %[[SOURCE_2:.*]] = llvm.extractvalue %[[DESC_2]][1] - // CHECK: "llvm.intr.memcpy"(%[[ALLOCATED_2]], %[[SOURCE_2]], %{{.*}}, %[[FALSE]]) - // CHECK: %[[RES_2:.*]] = llvm.mlir.undef - // CHECK: %[[RES_21:.*]] = llvm.insertvalue %{{.*}}, %[[RES_2]][0] - // CHECK: %[[RES_22:.*]] = llvm.insertvalue %[[ALLOCATED_2]], %[[RES_21]][1] - - // CHECK: %[[RESULTS:.*]] = llvm.mlir.undef : !llvm.struct<(struct<(i64, ptr)>, struct<(i64, ptr)>)> - // CHECK: %[[RESULTS_1:.*]] = llvm.insertvalue %[[RES_12]], %[[RESULTS]] - // CHECK: %[[RESULTS_2:.*]] = llvm.insertvalue %[[RES_22]], %[[RESULTS_1]] - // CHECK: llvm.return %[[RESULTS_2]] - return %0, %0 : memref<*xf32>, memref<*xf32> -} - -// Check that the result memrefs are passed as parameter -// CHECK-LABEL: @_mlir_ciface_return_two_var_memref -// CHECK-SAME: (%{{.*}}: !llvm.ptr)>, struct<(i64, ptr)>)>>, -// CHECK-SAME: %{{.*}}: !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>>) +func @caller_multiple_args_unranked(%arg0 : memref<*xf32>, %arg1 : f32, + %arg2 : memref<*xf32>, %arg3 : index) { + call @callee_multiple_args_unranked(%arg0, %arg1, %arg2, %arg3) + : (memref<*xf32>, f32, memref<*xf32>, index) -> () + return +} +// CHECK-LABEL: llvm.func @caller_multiple_args_unranked +// CHECK-SAME: %[[ARG0_RANK:.*]]: i64, %[[ARG0_INNER_DESC:arg1]]: !llvm.ptr, +// CHECK-SAME: %[[FARG:arg2]]: f32, +// CHECK-SAME: %[[ARG1_RANK:.*]]: i64, %[[ARG1_INNER_DESC:arg4]]: !llvm.ptr, +// CHECK-SAME: %[[IARG:.*]]: i64 + +// Populate the descriptor for arg0. +// CHECK: %[[ARG0_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG0_DESC1:.*]] = llvm.insertvalue %[[ARG0_RANK]], %[[ARG0_DESC0]][0] +// CHECK: %[[ARG0_DESC2:.*]] = llvm.insertvalue %[[ARG0_INNER_DESC]], %[[ARG0_DESC1]][1] + +// Populate the descriptor for arg2. +// CHECK: %[[ARG1_DESC0:.*]] = llvm.mlir.undef +// CHECK: %[[ARG1_DESC1:.*]] = llvm.insertvalue %[[ARG1_RANK]], %[[ARG1_DESC0]][0] +// CHECK: %[[ARG1_DESC2:.*]] = llvm.insertvalue %[[ARG1_INNER_DESC]], %[[ARG1_DESC1]][1] + +// Unpack descriptor for arg0. +// CHECK: %[[ARG0_RANK:.*]] = llvm.extractvalue %[[ARG0_DESC2]][0] +// CHECK: %[[ARG0_INNER_DESC:.*]] = llvm.extractvalue %[[ARG0_DESC2]][1] + +// Unpack descriptor for arg2. +// CHECK: %[[ARG1_RANK:.*]] = llvm.extractvalue %[[ARG1_DESC2]][0] +// CHECK: %[[ARG1_INNER_DESC:.*]] = llvm.extractvalue %[[ARG1_DESC2]][1] + +// Call the function and return. +// CHECK: llvm.call @callee_multiple_args_unranked(%[[ARG0_RANK]], %[[ARG0_INNER_DESC]], %[[FARG]], %[[ARG1_RANK]], %[[ARG1_INNER_DESC]], %[[IARG]]) +// CHECK: llvm.return