diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp @@ -37,11 +37,15 @@ return false; } -/// Move all alloca operations with a constant size in the former entry block of -/// the newly inlined callee into the entry block of the caller, and insert -/// lifetime intrinsics that limit their scope to the inlined blocks. -static void moveConstantAllocasToEntryBlock( - iterator_range inlinedBlocks) { +/// Handles alloca operations in the inlined blocks: Moves all alloca operations +/// with a constant size in the former entry block of the newly inlined callee +/// into the entry block of the caller so they become part of the function +/// prologue/epilogue during code generation; inserts lifetime intrinsics that +/// limit the scope of inlined static allocas to the inlined blocks; and inserts +/// StackSave and StackRestore operations if dynamic allocas were inlined. +static void +handleInlinedAllocas(Operation *call, + iterator_range inlinedBlocks) { Block *calleeEntryBlock = &(*inlinedBlocks.begin()); Block *callerEntryBlock = &(*calleeEntryBlock->getParent()->begin()); if (calleeEntryBlock == callerEntryBlock) @@ -49,21 +53,42 @@ return; SmallVector> allocasToMove; bool shouldInsertLifetimes = false; + bool hasDynamicAlloca = false; // Conservatively only move alloca operations that are part of the entry block // and do not inspect nested regions, since they may execute conditionally or // have other unknown semantics. for (auto allocaOp : calleeEntryBlock->getOps()) { IntegerAttr arraySize; - if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) + if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) { + hasDynamicAlloca = true; continue; + } bool shouldInsertLifetime = arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp); shouldInsertLifetimes |= shouldInsertLifetime; allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime); } - if (allocasToMove.empty()) + // Check the remaining inlined blocks for dynamic allocas as well. + if (!hasDynamicAlloca) { + for (auto &block : llvm::drop_begin(inlinedBlocks)) { + for (auto allocaOp : block.getOps()) { + if (!matchPattern(allocaOp.getArraySize(), m_Constant())) { + hasDynamicAlloca = true; + break; + } + } + if (hasDynamicAlloca) + break; + } + } + if (allocasToMove.empty() && !hasDynamicAlloca) return; - OpBuilder builder(callerEntryBlock, callerEntryBlock->begin()); + OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin()); + Value stackPtr; + if (hasDynamicAlloca) + stackPtr = builder.create( + call->getLoc(), LLVM::LLVMPointerType::get(call->getContext())); + builder.setInsertionPoint(callerEntryBlock, callerEntryBlock->begin()); for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { auto newConstant = builder.create( allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize); @@ -78,19 +103,20 @@ allocaOp->moveAfter(newConstant); allocaOp.getArraySizeMutable().assign(newConstant.getResult()); } - if (!shouldInsertLifetimes) + if (!shouldInsertLifetimes && !hasDynamicAlloca) return; // Insert a lifetime end intrinsic before each return in the callee function. for (Block &block : inlinedBlocks) { if (!block.getTerminator()->hasTrait()) continue; builder.setInsertionPoint(block.getTerminator()); + if (hasDynamicAlloca) + builder.create(call->getLoc(), stackPtr); for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { - if (!shouldInsertLifetime) - continue; - builder.create( - allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), - allocaOp.getResult()); + if (shouldInsertLifetime) + builder.create( + allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), + allocaOp.getResult()); } } } @@ -308,6 +334,8 @@ LLVM::MemcpyOp, LLVM::MemmoveOp, LLVM::MemsetOp, + LLVM::StackRestoreOp, + LLVM::StackSaveOp, LLVM::StoreOp, LLVM::UnreachableOp>(op)) return true; @@ -369,12 +397,7 @@ void processInlinedCallBlocks( Operation *call, iterator_range inlinedBlocks) const override { - // Alloca operations with a constant size that were in the entry block of - // the callee should be moved to the entry block of the caller, as this will - // fold into prologue/epilogue code during code generation. - // This is not implemented as a standalone pattern because we need to know - // which newly inlined block was previously the entry block of the callee. - moveConstantAllocasToEntryBlock(inlinedBlocks); + handleInlinedAllocas(call, inlinedBlocks); } // Keeping this (immutable) state on the interface allows us to look up diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir --- a/mlir/test/Dialect/LLVMIR/inlining.mlir +++ b/mlir/test/Dialect/LLVMIR/inlining.mlir @@ -6,6 +6,7 @@ func.func @inner_func_inlinable(%ptr : !llvm.ptr) -> i32 { %0 = llvm.mlir.constant(42 : i32) : i32 + %stack = llvm.intr.stacksave : !llvm.ptr llvm.store %0, %ptr { alignment = 8 } : i32, !llvm.ptr %1 = llvm.load %ptr { alignment = 8 } : !llvm.ptr -> i32 llvm.intr.dbg.value #variable = %0 : i32 @@ -19,12 +20,14 @@ ^bb1: llvm.unreachable ^bb2: + llvm.intr.stackrestore %stack : !llvm.ptr return %1 : i32 } // CHECK-LABEL: func.func @test_inline( // CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]] // CHECK: %[[CST:.*]] = llvm.mlir.constant(42 +// CHECK: %[[STACK:.+]] = llvm.intr.stacksave // CHECK: llvm.store %[[CST]], %[[PTR]] // CHECK: %[[RES:.+]] = llvm.load %[[PTR]] // CHECK: llvm.intr.dbg.value #{{.+}} = %[[CST]] @@ -33,6 +36,7 @@ // CHECK: "llvm.intr.memmove"(%[[PTR]], %[[PTR]] // CHECK: "llvm.intr.memcpy"(%[[PTR]], %[[PTR]] // CHECK: llvm.unreachable +// CHECK: llvm.intr.stackrestore %[[STACK]] func.func @test_inline(%ptr : !llvm.ptr) -> i32 { %0 = call @inner_func_inlinable(%ptr) : (!llvm.ptr) -> i32 return %0 : i32 @@ -253,20 +257,23 @@ // CHECK: llvm.intr.lifetime.start %0 = llvm.call @static_alloca() : () -> f32 // CHECK: llvm.intr.lifetime.end - // CHECK: llvm.br + // CHECK: llvm.br ^[[BB3:[a-zA-Z0-9_]+]] llvm.br ^bb3(%0: f32) // CHECK: ^{{.+}}: ^bb2: // Check that the dynamic alloca was inlined, but that it was not moved to the // entry block. + // CHECK: %[[STACK:[a-zA-Z0-9_]+]] = llvm.intr.stacksave // CHECK: llvm.add - // CHECK-NEXT: llvm.alloca + // CHECK: llvm.alloca + // CHECK: llvm.intr.stackrestore %[[STACK]] // CHECK-NOT: llvm.call @dynamic_alloca %1 = llvm.call @dynamic_alloca(%size) : (i32) -> f32 - // CHECK: llvm.br + // CHECK: llvm.br ^[[BB3]] llvm.br ^bb3(%1: f32) - // CHECK: ^{{.+}}: + // CHECK: ^[[BB3]] ^bb3(%arg : f32): + // CHECK-NEXT: return llvm.return %arg : f32 }