diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -312,6 +312,20 @@ insert(*I); } + /// Returns the value associated to the key in the map if it exists. If it + /// does not exist, emplace a default value for the key and returns a + /// reference to the newly created value. + ValueT &getOrInsertDefault(KeyT &&Key) { + return try_emplace(Key).first->second; + } + + /// Returns the value associated to the key in the map if it exists. If it + /// does not exist, emplace a default value for the key and returns a + /// reference to the newly created value. + ValueT &getOrInsertDefault(const KeyT &Key) { + return try_emplace(Key).first->second; + } + bool erase(const KeyT &Val) { BucketT *TheBucket; if (!LookupBucketFor(Val, TheBucket)) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h @@ -29,6 +29,7 @@ #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #include "mlir/Support/ThreadLocalCache.h" +#include "mlir/Transforms/Mem2Reg.h" #include "llvm/ADT/PointerEmbeddedInt.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -6,6 +6,7 @@ include "mlir/Dialect/LLVMIR/LLVMEnums.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Interfaces/InferTypeOpInterface.td" +include "mlir/Interfaces/Mem2RegInterfaces.td" // Operations that correspond to LLVM intrinsics. With MLIR operation set being // extendable, there is no reason to introduce a hard boundary between "core" @@ -214,7 +215,8 @@ /// Base operation for lifetime markers. The LLVM intrinsics require the size /// operand to be an immediate. In MLIR it is encoded as an attribute. -class LLVM_LifetimeBaseOp : LLVM_ZeroResultIntrOp { +class LLVM_LifetimeBaseOp : LLVM_ZeroResultIntrOp]> { let arguments = (ins I64Attr:$size, LLVM_AnyPointer:$ptr); // Custom builder to convert the size attribute to an integer. @@ -322,7 +324,8 @@ // Debug function intrinsics. // -class LLVM_DbgIntrOp : LLVM_IntrOp { +class LLVM_DbgIntrOp traits = []> + : LLVM_IntrOp { let llvmBuilder = [{ llvm::Module *module = builder.GetInsertBlock()->getModule(); llvm::LLVMContext &ctx = module->getContext(); @@ -363,7 +366,8 @@ }]; } -def LLVM_DbgDeclareOp : LLVM_DbgIntrOp<"dbg.declare", "addr"> { +def LLVM_DbgDeclareOp : LLVM_DbgIntrOp< "dbg.declare", "addr", + [DeclareOpInterfaceMethods]> { let summary = "Declare the address of a local debug info variable."; let arguments = (ins LLVM_AnyPointer:$addr, LLVM_DILocalVariableAttr:$varInfo); } diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -22,6 +22,7 @@ include "mlir/Interfaces/CallInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/InferTypeOpInterface.td" +include "mlir/Interfaces/Mem2RegInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" class LLVM_Builder { @@ -171,7 +172,9 @@ LLVM_ScalarOrVectorOf, "fneg", "FNeg">; // Memory-related operations. -def LLVM_AllocaOp : LLVM_Op<"alloca">, LLVM_MemOpPatterns { +def LLVM_AllocaOp : LLVM_Op<"alloca", + [DeclareOpInterfaceMethods]>, + LLVM_MemOpPatterns { let arguments = (ins AnyInteger:$arraySize, OptionalAttr:$alignment, OptionalAttr:$elem_type, @@ -228,7 +231,8 @@ let hasVerifier = 1; } -def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure]> { +def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure, + DeclareOpInterfaceMethods]> { let arguments = (ins LLVM_ScalarOrVectorOf:$base, Variadic>:$dynamicIndices, DenseI32ArrayAttr:$rawConstantIndices, @@ -311,7 +315,8 @@ let hasVerifier = 1; } -def LLVM_LoadOp : LLVM_MemAccessOpBase<"load"> { +def LLVM_LoadOp : LLVM_MemAccessOpBase<"load", + [DeclareOpInterfaceMethods]> { dag args = (ins Arg, "", [MemRead]>:$addr, OptionalAttr:$alignment, UnitAttr:$volatile_, @@ -382,7 +387,8 @@ let hasVerifier = 1; } -def LLVM_StoreOp : LLVM_MemAccessOpBase<"store"> { +def LLVM_StoreOp : LLVM_MemAccessOpBase<"store", + [DeclareOpInterfaceMethods]> { dag args = (ins LLVM_LoadableType:$value, Arg,"",[MemWrite]>:$addr, OptionalAttr:$alignment, @@ -465,14 +471,15 @@ $_location, $_resultType, $arg); }]; } -def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "BitCast", - LLVM_AnyNonAggregate, LLVM_AnyNonAggregate> { +def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "BitCast", LLVM_AnyNonAggregate, + LLVM_AnyNonAggregate, [DeclareOpInterfaceMethods]> { let hasFolder = 1; let hasVerifier = 1; } def LLVM_AddrSpaceCastOp : LLVM_CastOp<"addrspacecast", "AddrSpaceCast", - LLVM_ScalarOrVectorOf, - LLVM_ScalarOrVectorOf> { + LLVM_ScalarOrVectorOf, + LLVM_ScalarOrVectorOf, + [DeclareOpInterfaceMethods]> { let hasFolder = 1; } def LLVM_IntToPtrOp : LLVM_CastOp<"inttoptr", "IntToPtr", diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt --- a/mlir/include/mlir/Interfaces/CMakeLists.txt +++ b/mlir/include/mlir/Interfaces/CMakeLists.txt @@ -7,6 +7,7 @@ add_mlir_interface(InferIntRangeInterface) add_mlir_interface(InferTypeOpInterface) add_mlir_interface(LoopLikeInterface) +add_mlir_interface(Mem2RegInterfaces) add_mlir_interface(ParallelCombiningOpInterface) add_mlir_interface(RuntimeVerifiableOpInterface) add_mlir_interface(ShapedOpInterfaces) diff --git a/mlir/include/mlir/Interfaces/Mem2RegInterfaces.h b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.h @@ -0,0 +1,39 @@ +//===-- Mem2RegInterfaces.h - Mem2Reg interfaces ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_INTERFACES_MEM2REGINTERFACES_H +#define MLIR_INTERFACES_MEM2REGINTERFACES_H + +#include "mlir/IR/Dominance.h" +#include "mlir/IR/OpDefinition.h" + +namespace mlir { + +/// Represents a slot in memory. This is generated by an allocating operation +/// (for example alloca). +struct MemorySlot { + /// Pointer to the memory slot, used by operations to refer to it. + Value ptr; + /// Type of the value contained in the slot. + Type elemType; +}; + +/// Returned by operation promotion logic requesting the deletion of an +/// operation. +enum class DeletionKind { + /// Keep the operation after promotion. + Keep, + /// Delete the operation after promotion. + Delete, +}; + +} // namespace mlir + +#include "mlir/Interfaces/Mem2RegInterfaces.h.inc" + +#endif // MLIR_INTERFACES_MEM2REGINTERFACES_H diff --git a/mlir/include/mlir/Interfaces/Mem2RegInterfaces.td b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.td new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.td @@ -0,0 +1,196 @@ +//===-- Mem2RegInterfaces.td - Mem2Reg interfaces ----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_INTERFACES_MEM2REGINTERFACES +#define MLIR_INTERFACES_MEM2REGINTERFACES + +include "mlir/IR/OpBase.td" + +def PromotableAllocationOpInterface + : OpInterface<"PromotableAllocationOpInterface"> { + let description = [{ + Describes an operation allocating a memory slot that can be promoted into + SSA values. + }]; + let cppNamespace = "::mlir"; + + let methods = [ + InterfaceMethod<[{ + Returns a list of memory slots for which promotion should be attempted. + This only considers the local semantics of the allocator, ignoring + whether the slot pointer is properly used or not. This allocator is the + "owner" of the returned slots, meaning no two allocators should return + the same slot. The content of the memory slot must only be reachable + using loads and stores to the provided slot pointer, no aliasing is + allowed. + + Promotion of the slot will lead to the slot pointer no longer being + used, leaving the content of the memory slot unreachable. + }], "::llvm::SmallVector<::mlir::MemorySlot>", "getPromotableSlots", + (ins) + >, + InterfaceMethod<[{ + Provides the default Value of this memory slot. The provided Value + will be used as the reaching definition of loads done before any store. + This Value must outlive the promotion and dominate all the uses of this + slot's pointer. The provided builder can be used to create the default + value on the fly. + + The builder is located at the beginning of the block where the slot + pointer is defined. + }], "::mlir::Value", "getDefaultValue", + (ins "const ::mlir::MemorySlot &":$slot, "::mlir::OpBuilder &":$builder) + >, + InterfaceMethod<[{ + Hook triggered for every new block argument added to a block. + This will only be called for slots declared by this operation. + + The builder is located at the beginning of the block on call. + }], + "void", "handleBlockArgument", + (ins + "const ::mlir::MemorySlot &":$slot, + "::mlir::BlockArgument":$argument, + "::mlir::OpBuilder &":$builder + ) + >, + InterfaceMethod<[{ + Hook triggered once the promotion of a slot is complete. This can + also clean up the created default value if necessary. + This will only be called for slots declared by this operation. + }], + "void", "handlePromotionComplete", + (ins "const ::mlir::MemorySlot &":$slot, "::mlir::Value":$defaultValue) + >, + ]; +} + +def PromotableMemOpInterface : OpInterface<"PromotableMemOpInterface"> { + let description = [{ + Describes an operation that can load from memory slots and/or store + to memory slots. Loads and stores must be of whole values of the same + type as the slot itself. + + If the same operation does both loads and stores on the same slot, the + load must semantically happen first. + }]; + let cppNamespace = "::mlir"; + + let methods = [ + InterfaceMethod<[{ + Gets whether this operation loads from the specified slot. + }], + "bool", "loadsFrom", + (ins "const ::mlir::MemorySlot &":$slot) + >, + InterfaceMethod<[{ + Gets the value stored to the provided memory slot, or returns a null + value if this operation does not store to this slot. An operation + storing a value to a slot must always be able to provide the value it + stores. This method is only called on operations that use the slot. + }], + "::mlir::Value", "getStored", + (ins "const ::mlir::MemorySlot &":$slot) + >, + InterfaceMethod<[{ + Checks that this operation can be promoted to no longer use the provided + blocking uses, in the context of promoting `slot`. + + If the removal procedure of the use will require that other uses get + removed, that dependency should be added to the `newBlockingUses` + argument. Dependent uses must only be uses of results of this operation. + }], "bool", "canUsesBeRemoved", + (ins "const ::mlir::MemorySlot &":$slot, + "const ::llvm::SmallPtrSetImpl<::mlir::OpOperand *> &":$blockingUses, + "::llvm::SmallVectorImpl<::mlir::OpOperand *> &":$newBlockingUses) + >, + InterfaceMethod<[{ + Transforms IR to ensure that the current operation does not use the + provided memory slot anymore. `reachingDefinition` contains the value + currently stored in the provided memory slot, immediately before the + current operation. + + During the transformation, *no operation should be deleted*. + The operation can only schedule its own deletion by returning the + appropriate `DeletionKind`. The deletion must be legal assuming the + blocking uses passed through the `newBlockingUses` list in + `canUseBeRemoved` have been removed. + + After calling this method, the blocking uses should have disappeared + or this operation should have scheduled its own deletion. + + This method will only be called after ensuring promotion is allowed via + `canUseBeRemoved`. The requested blocking use removal may or may not + have been done at the point of calling this method, but it will be done + eventually. + + The builder is located after the promotable operation on call. + }], + "::mlir::DeletionKind", + "removeBlockingUses", + (ins "const ::mlir::MemorySlot &":$slot, + "const ::llvm::SmallPtrSetImpl &":$blockingUses, + "::mlir::OpBuilder &":$builder, + "::mlir::Value":$reachingDefinition) + >, + ]; +} + +def PromotableOpInterface : OpInterface<"PromotableOpInterface"> { + let description = [{ + Describes an operation that can be transformed or deleted so it no longer + uses a provided value (blocking use), in case this would allow the promotion + of a memory slot. + }]; + let cppNamespace = "::mlir"; + + let methods = [ + InterfaceMethod<[{ + Checks that this operation can be promoted to no longer use the provided + blocking uses, in the context of promoting `slot`. + + If the removal procedure of the use will require that other uses get + removed, that dependency should be added to the `newBlockingUses` + argument. Dependent uses must only be uses of results of this operation. + }], "bool", "canUsesBeRemoved", + (ins "const ::mlir::MemorySlot &":$slot, + "const ::llvm::SmallPtrSetImpl<::mlir::OpOperand *> &":$blockingUses, + "::llvm::SmallVectorImpl<::mlir::OpOperand *> &":$newBlockingUses) + >, + InterfaceMethod<[{ + Transforms IR to ensure that the current operation does not use the + provided memory slot anymore. In contrast to `PromotableMemOpInterface`, + operations implementing this interface must not need access to the + reaching definition of the content of the slot. + + During the transformation, *no operation should be deleted*. + The operation can only schedule its own deletion by returning the + appropriate `DeletionKind`. The deletion must be legal assuming the + blocking uses passed through the `newBlockingUses` list in + `canUseBeRemoved` have been removed. + + After calling this method, the blocking uses should have disappeared + or this operation should have scheduled its own deletion. + + This method will only be called after ensuring promotion is allowed via + `canUseBeRemoved`. The requested blocking use removal may or may not + have been done at the point of calling this method, but it will be done + eventually. + + The builder is located after the promotable operation on call. + }], + "::mlir::DeletionKind", + "removeBlockingUses", + (ins "const ::mlir::MemorySlot &":$slot, + "const ::llvm::SmallPtrSetImpl &":$blockingUses, + "::mlir::OpBuilder &":$builder) + >, + ]; +} + +#endif // MLIR_INTERFACES_MEM2REGINTERFACES diff --git a/mlir/include/mlir/Transforms/Mem2Reg.h b/mlir/include/mlir/Transforms/Mem2Reg.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Transforms/Mem2Reg.h @@ -0,0 +1,26 @@ +//===-- Mem2Reg.h - Mem2Reg definitions -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TRANSFORMS_MEM2REG_H +#define MLIR_TRANSFORMS_MEM2REG_H + +#include "mlir/IR/Dominance.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/Interfaces/Mem2RegInterfaces.h" + +namespace mlir { + +/// Attempts to promote the memory slots of the provided allocators. Succeeds if +/// at least one memory slot was promoted. +LogicalResult +tryToPromoteMemorySlots(ArrayRef allocators, + OpBuilder &builder, DominanceInfo &dominance); + +} // namespace mlir + +#endif // MLIR_TRANSFORMS_MEM2REG_H diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -33,6 +33,7 @@ #define GEN_PASS_DECL_CSEPASS #define GEN_PASS_DECL_INLINER #define GEN_PASS_DECL_LOOPINVARIANTCODEMOTION +#define GEN_PASS_DECL_MEM2REG #define GEN_PASS_DECL_PRINTIRPASS #define GEN_PASS_DECL_PRINTOPSTATS #define GEN_PASS_DECL_STRIPDEBUGINFO diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td --- a/mlir/include/mlir/Transforms/Passes.td +++ b/mlir/include/mlir/Transforms/Passes.td @@ -172,6 +172,25 @@ let constructor = "mlir::createLoopInvariantCodeMotionPass()"; } +def Mem2Reg : Pass<"mem2reg"> { + let summary = "Promotes memory slots into values."; + let description = [{ + This pass removes loads out of and stores into a memory slot, and turns + them into direct uses of SSA values. This is done generically using the + `PromoteAllocationOpInterface`, `PromoteOpInterface` and + `PromoteMemOpInterface` interfaces. + + This pass will attempt to compute which definitions of the content of + the memory slot reach operations that use the memory slot pointer. It + will rewire or remove operations that use the slot pointer so they no + longer use it. If any of this is not possible, the IR will be left + without mutation. + + This pass only supports unstructured control-flow. Promotion of operations + within subregions will not happen. + }]; +} + def PrintOpStats : Pass<"print-op-stats"> { let summary = "Print statistics of operations"; let constructor = "mlir::createPrintOpStatsPass()"; diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt --- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt @@ -6,6 +6,7 @@ IR/LLVMDialect.cpp IR/LLVMInlining.cpp IR/LLVMInterfaces.cpp + IR/LLVMMem2Reg.cpp IR/LLVMTypes.cpp IR/LLVMTypeSyntax.cpp @@ -33,6 +34,7 @@ MLIRDataLayoutInterfaces MLIRInferTypeOpInterface MLIRIR + MLIRMem2RegInterfaces MLIRSideEffectInterfaces MLIRSupport ) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMem2Reg.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMem2Reg.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMem2Reg.cpp @@ -0,0 +1,210 @@ +//===- LLVMMem2Reg.cpp - Mem2Reg Interfaces ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements Mem2Reg-related interfaces for LLVM dialect operations. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Interfaces/Mem2RegInterfaces.h" + +using namespace mlir; + +//===----------------------------------------------------------------------===// +// Interfaces for AllocaOp +//===----------------------------------------------------------------------===// + +llvm::SmallVector LLVM::AllocaOp::getPromotableSlots() { + if (!getOperation()->getBlock()->isEntryBlock()) + return {}; + + Type elemType = + getElemType() ? *getElemType() : getResult().getType().getElementType(); + return {MemorySlot{getResult(), elemType}}; +} + +Value LLVM::AllocaOp::getDefaultValue(const MemorySlot &slot, + OpBuilder &builder) { + return builder.create(getLoc(), slot.elemType); +} + +void LLVM::AllocaOp::handleBlockArgument(const MemorySlot &slot, + BlockArgument argument, + OpBuilder &builder) { + for (Operation *user : getOperation()->getUsers()) + if (auto declareOp = llvm::dyn_cast(user)) + builder.create(declareOp.getLoc(), argument, + declareOp.getVarInfo()); +} + +void LLVM::AllocaOp::handlePromotionComplete(const MemorySlot &slot, + Value defaultValue) { + if (defaultValue && defaultValue.use_empty()) + defaultValue.getDefiningOp()->erase(); + erase(); +} + +//===----------------------------------------------------------------------===// +// Interfaces for LoadOp/StoreOp +//===----------------------------------------------------------------------===// + +bool LLVM::LoadOp::loadsFrom(const MemorySlot &slot) { + return getAddr() == slot.ptr; +} + +Value LLVM::LoadOp::getStored(const MemorySlot &slot) { return {}; } + +bool LLVM::StoreOp::loadsFrom(const MemorySlot &slot) { return false; } + +Value LLVM::StoreOp::getStored(const MemorySlot &slot) { + return getAddr() == slot.ptr ? getValue() : Value(); +} + +bool LLVM::LoadOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses) { + if (blockingUses.size() != 1) + return false; + Value blockingUse = (*blockingUses.begin())->get(); + // If the blocking use is the slot ptr itself, there will be enough + // context to reconstruct the result of the load at removal time, so it can + // be removed (provided it loads the exact stored value and is not + // volatile). + return blockingUse == slot.ptr && getAddr() == slot.ptr && + getResult().getType() == slot.elemType && !getVolatile_(); +} + +DeletionKind LLVM::LoadOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder, Value reachingDefinition) { + // `canUsesBeRemoved` checked this blocking use must be the loaded slot + // pointer. + getResult().replaceAllUsesWith(reachingDefinition); + return DeletionKind::Delete; +} + +bool LLVM::StoreOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses) { + if (blockingUses.size() != 1) + return false; + Value blockingUse = (*blockingUses.begin())->get(); + // If the blocking use is the slot ptr itself, dropping the store is + // fine, provided we are currently promoting its target value. Don't allow a + // store OF the slot pointer, only INTO the slot pointer. + return blockingUse == slot.ptr && getAddr() == slot.ptr && + getValue() != slot.ptr && getValue().getType() == slot.elemType && + !getVolatile_(); +} + +DeletionKind LLVM::StoreOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder, Value reachingDefinition) { + // `canUsesBeRemoved` checked this blocking use must be the stored slot + // pointer. + for (Operation *user : slot.ptr.getUsers()) + if (auto declareOp = llvm::dyn_cast(user)) + builder.create(declareOp->getLoc(), getValue(), + declareOp.getVarInfo()); + return DeletionKind::Delete; +} + +//===----------------------------------------------------------------------===// +// Interfaces for discardable OPs +//===----------------------------------------------------------------------===// + +/// Conditions the deletion of the operation to the removal of all its uses. +static bool forwardToUsers(Operation *op, + SmallVectorImpl &newBlockingUses) { + for (Value result : op->getResults()) + for (OpOperand &use : result.getUses()) + newBlockingUses.push_back(&use); + return true; +} + +bool LLVM::BitcastOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses) { + return forwardToUsers(*this, newBlockingUses); +} + +DeletionKind LLVM::BitcastOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder) { + return DeletionKind::Delete; +} + +bool LLVM::AddrSpaceCastOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses) { + return forwardToUsers(*this, newBlockingUses); +} + +DeletionKind LLVM::AddrSpaceCastOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder) { + return DeletionKind::Delete; +} + +bool LLVM::LifetimeStartOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses) { + return true; +} + +DeletionKind LLVM::LifetimeStartOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder) { + return DeletionKind::Delete; +} + +bool LLVM::LifetimeEndOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses) { + return true; +} + +DeletionKind LLVM::LifetimeEndOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder) { + return DeletionKind::Delete; +} + +bool LLVM::DbgDeclareOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses) { + return true; +} + +DeletionKind LLVM::DbgDeclareOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder) { + return DeletionKind::Delete; +} + +static bool hasAllZeroIndices(LLVM::GEPOp gepOp) { + return llvm::all_of(gepOp.getIndices(), [](auto index) { + auto indexAttr = index.template dyn_cast(); + return indexAttr && indexAttr.getValue() == 0; + }); +} + +bool LLVM::GEPOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses) { + // GEP can be removed as long as it is a no-op and its users can be removed. + if (!hasAllZeroIndices(*this)) + return false; + return forwardToUsers(*this, newBlockingUses); +} + +DeletionKind LLVM::GEPOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder) { + return DeletionKind::Delete; +} diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt --- a/mlir/lib/Interfaces/CMakeLists.txt +++ b/mlir/lib/Interfaces/CMakeLists.txt @@ -9,6 +9,7 @@ InferIntRangeInterface.cpp InferTypeOpInterface.cpp LoopLikeInterface.cpp + Mem2RegInterfaces.cpp ParallelCombiningOpInterface.cpp RuntimeVerifiableOpInterface.cpp ShapedOpInterfaces.cpp @@ -45,6 +46,7 @@ add_mlir_interface_library(InferIntRangeInterface) add_mlir_interface_library(InferTypeOpInterface) add_mlir_interface_library(LoopLikeInterface) +add_mlir_interface_library(Mem2RegInterfaces) add_mlir_interface_library(ParallelCombiningOpInterface) add_mlir_interface_library(RuntimeVerifiableOpInterface) add_mlir_interface_library(ShapedOpInterfaces) diff --git a/mlir/lib/Interfaces/Mem2RegInterfaces.cpp b/mlir/lib/Interfaces/Mem2RegInterfaces.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Interfaces/Mem2RegInterfaces.cpp @@ -0,0 +1,11 @@ +//===-- Mem2RegInterfaces.cpp - Mem2Reg interfaces --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Interfaces/Mem2RegInterfaces.h" + +#include "mlir/Interfaces/Mem2RegInterfaces.cpp.inc" diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt --- a/mlir/lib/Transforms/CMakeLists.txt +++ b/mlir/lib/Transforms/CMakeLists.txt @@ -8,6 +8,7 @@ Inliner.cpp LocationSnapshot.cpp LoopInvariantCodeMotion.cpp + Mem2Reg.cpp OpStats.cpp PrintIR.cpp SCCP.cpp @@ -27,6 +28,7 @@ MLIRAnalysis MLIRCopyOpInterface MLIRLoopLikeInterface + MLIRMem2RegInterfaces MLIRPass MLIRRuntimeVerifiableOpInterface MLIRSideEffectInterfaces diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Transforms/Mem2Reg.cpp @@ -0,0 +1,562 @@ +//===- Mem2Reg.cpp - Promotes memory slots into values ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Transforms/Mem2Reg.h" +#include "mlir/Analysis/SliceAnalysis.h" +#include "mlir/IR/Builders.h" +#include "mlir/Interfaces/ControlFlowInterfaces.h" +#include "mlir/Transforms/Passes.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/GenericIteratedDominanceFrontier.h" + +namespace mlir { +#define GEN_PASS_DEF_MEM2REG +#include "mlir/Transforms/Passes.h.inc" +} // namespace mlir + +using namespace mlir; + +/// mem2reg +/// +/// This pass turns unnecessary uses of automatically allocated memory slots +/// into direct Value-based operations. For example, it will simplify storing a +/// constant in a memory slot to immediately load it to a direct use of that +/// constant. In other words, given a memory slot addressed by a non-aliased +/// "pointer" Value, mem2reg removes all the uses of that pointer. +/// +/// Within a block, this is done by following the chain of stores and loads of +/// the slot and replacing the results of loads with the values previously +/// stored. If a load happens before any other store, a poison value is used +/// instead. +/// +/// Control flow can create situations where a load could be replaced by +/// multiple possible stores depending on the control flow path taken. As a +/// result, this pass must introduce new block arguments in some blocks to +/// accomodate for the multiple possible definitions. Each predecessor will +/// populate the block argument with the definition reached at its end. With +/// this, the value stored can be well defined at block boundaries, allowing +/// the propagation of replacement through blocks. +/// +/// This pass computes this transformation in four main steps: +/// - A first step computes the list of operations that transitively use the +/// memory slot we would like to promote. The purpose of this phase is to +/// identify which uses must be removed to promote the slot, either by rewiring +/// the user or deleting it. Naturally, direct uses of the slot must be removed. +/// Sometimes additional uses must also be removed: this is notably the case +/// when a direct user of the slot cannot rewire its use and must delete itself, +/// and thus must make its users no longer use it. If any of those uses cannot +/// be removed by their users in any way, promotion cannot continue: this is +/// decided at this step. +/// - A second step computes the list of blocks where a block argument will be +/// needed ("merge points") without mutating the IR. These blocks are the blocks +/// leading to a definition clash between two predecessors. Such blocks happen +/// to be the Iterated Dominance Frontier (IDF) of the set of blocks containing +/// a store, as they represent the point where a clear defining dominator stops +/// existing. Computing this information in advance allows making sure the +/// terminators that will forward values are capable of doing so (inability to +/// do so aborts promotion at this step). +/// - A third step computes the reaching definition of the memory slot at each +/// blocking user. This is the core of the mem2reg algorithm, also known as +/// load-store forwarding. This analyses loads and stores and propagates which +/// value must be stored in the slot at each blocking user. This is achieved by +/// doing a depth-first walk of the dominator tree of the function. This is +/// sufficient because the reaching definition at the beginning of a block is +/// either its new block argument if it is a merge block, or the definition +/// reaching the end of its immediate dominator (parent in the dominator tree). +/// We can therefore propagate this information down the dominator tree to +/// proceed with renaming within blocks. +/// - The final fourth step uses the reaching definition to remove blocking uses +/// in topological order. +/// +/// The two first steps do not mutate IR because promotion can still be aborted +/// at this point. Once the two last steps are reached, promotion is guaranteed +/// to succeed, allowing to start mutating IR. +/// +/// For further reading, chapter three of SSA-based Compiler Design [1] +/// showcases SSA construction, where mem2reg is an adaptation of the same +/// process. +/// +/// [1]: Rastello F. & Bouchez Tichadou F., SSA-based Compiler Design (2022), +/// Springer. + +namespace { + +/// The SlotPromoter handles the state of promoting a memory slot. It wraps a +/// slot and its associated allocator, along with analysis results related to +/// the slot. +class SlotPromoter { +public: + SlotPromoter(MemorySlot slot, PromotableAllocationOpInterface allocator, + OpBuilder &builder, DominanceInfo &dominance); + + /// Prepare data for the promotion of the slot while checking if it can be + /// promoted. Succeeds if the slot can be promoted. This method does not + /// mutate IR. + LogicalResult prepareSlotPromotion(); + + /// Actually promotes the slot by mutating IR. This method must only be + /// called after a successful call to `SlotPromoter::prepareSlotPromotion`. + /// Promoting a slot does not invalidate the preparation of other slots. + void promoteSlot(); + +private: + /// This is the first step of the promotion algorithm. + /// Computes the transitive uses of the slot that block promotion. This finds + /// uses that would block the promotion, checks that the operation has a + /// solution to remove the blocking use, and potentially forwards the analysis + /// if the operation needs further blocking uses resolved to resolve its own + /// uses (typically, removing its users because it will delete itself to + /// resolve its own blocking uses). This will fail if one of the transitive + /// users cannot remove a requested use, and should prevent promotion. + LogicalResult computeBlockingUses(); + + /// Computes in which blocks the value stored in the slot is actually used, + /// meaning blocks leading to a load. This method uses `definingBlocks`, the + /// set of blocks containing a store to the slot (defining the value of the + /// slot). + SmallPtrSet + computeSlotLiveIn(SmallPtrSetImpl &definingBlocks); + + /// This is the second step of the promotion algorithm. + /// Computes the points in which multiple re-definitions of the slot's value + /// (stores) may conflict. + void computeMergePoints(); + + /// Ensures predecessors of merge points can properly provide their current + /// definition of the value stored in the slot to the merge point. This can + /// notably be an issue if the terminator used does not have the ability to + /// forward values through block operands. + bool areMergePointsUsable(); + + /// Computes the reaching definition for all the operations that require + /// promotion. `reachingDef` is the value the slot should contain at the + /// beginning of the block. This method returns the reached definition at the + /// end of the block. + Value computeReachingDefInBlock(Block *block, Value reachingDef); + + /// This is the third step of the promotion algorithm. + /// Computes the reaching definition for all the operations that require + /// promotion. `reachingDef` corresponds to the initial value the + /// slot will contain before any write, typically a poison value. + void computeReachingDefInRegion(Region *region, Value reachingDef); + + /// This is the fourth step of the promotion algorithm. + /// Removes the blocking uses of the slot, in topological order. + void removeBlockingUses(); + + /// Lazily-constructed default value representing the content of the slot when + /// no store has been executed. This function may mutate IR. + Value getLazyDefaultValue(); + + MemorySlot slot; + PromotableAllocationOpInterface allocator; + OpBuilder &builder; + /// Potentially non-initialized default value. Use `lazyDefaultValue` to + /// initialize it on demand. + Value defaultValue; + /// Blocks where multiple definitions of the slot value clash. + SmallPtrSet mergePoints; + /// Contains, for each operation, which uses must be eliminated by promotion. + /// This is a DAG structure because an operation that must eliminate some of + /// its uses always comes from a request from an operation that must + /// eliminate some of its own uses. + DenseMap> userToBlockingUses; + /// Contains the reaching definition at this operation. Reaching definitions + /// are only computed for promotable memory operations with blocking uses. + DenseMap reachingDefs; + DominanceInfo &dominance; +}; + +} // namespace + +SlotPromoter::SlotPromoter(MemorySlot slot, + PromotableAllocationOpInterface allocator, + OpBuilder &builder, DominanceInfo &dominance) + : slot(slot), allocator(allocator), builder(builder), dominance(dominance) { + bool isResultOrNewBlockArgument = slot.ptr.getDefiningOp() == allocator; + if (BlockArgument arg = slot.ptr.dyn_cast()) + isResultOrNewBlockArgument = isResultOrNewBlockArgument || + arg.getOwner()->getParentOp() == allocator; + (void)isResultOrNewBlockArgument; + assert(isResultOrNewBlockArgument && + "a slot must be a result of the allocator or an argument of the child " + "regions of the allocator"); +} + +Value SlotPromoter::getLazyDefaultValue() { + if (defaultValue) + return defaultValue; + + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(slot.ptr.getParentBlock()); + return defaultValue = allocator.getDefaultValue(slot, builder); +} + +LogicalResult SlotPromoter::computeBlockingUses() { + // The promotion of an operation may require the promotion of further + // operations (typically, removing operations that use an operation that must + // delete itself). We thus need to start from the use of the slot pointer and + // propagate further requests through the forward slice. + + // First insert that all immediate users of the slot pointer must no longer + // use it. + for (OpOperand &use : slot.ptr.getUses()) { + SmallPtrSet &blockingUses = + userToBlockingUses.getOrInsertDefault(use.getOwner()); + blockingUses.insert(&use); + } + + // Then, propagate the requirements for the removal of uses. The + // topologically-sorted forward slice allows for all blocking uses of an + // operation to have been computed before we reach it. Operations are + // traversed in topological order of their uses, starting from the slot + // pointer. + SetVector forwardSlice; + mlir::getForwardSlice(slot.ptr, &forwardSlice); + for (Operation *user : forwardSlice) { + // If the next operation has no blocking uses, everything is fine. + if (!userToBlockingUses.contains(user)) + continue; + + SmallPtrSet &blockingUses = userToBlockingUses[user]; + + SmallVector newBlockingUses; + // If the operation decides it cannot deal with removing the blocking uses, + // promotion must fail. + if (auto promotable = dyn_cast(user)) { + if (!promotable.canUsesBeRemoved(slot, blockingUses, newBlockingUses)) + return failure(); + } else if (auto promotable = dyn_cast(user)) { + if (!promotable.canUsesBeRemoved(slot, blockingUses, newBlockingUses)) + return failure(); + } else { + // An operation that has blocking uses must be promoted. If it is not + // promotable, promotion must fail. + return failure(); + } + + // Then, register any new blocking uses for coming operations. + for (OpOperand *blockingUse : newBlockingUses) { + assert(llvm::find(user->getResults(), blockingUse->get()) != + user->result_end()); + + SmallPtrSetImpl &newUserBlockingUseSet = + userToBlockingUses.getOrInsertDefault(blockingUse->getOwner()); + newUserBlockingUseSet.insert(blockingUse); + } + } + + // Because this pass currently only supports analysing the parent region of + // the slot pointer, if a promotable memory op that needs promotion is + // outside of this region, promotion must fail because it will be impossible + // to provide a valid `reachingDef` for it. + for (auto &[toPromote, _] : userToBlockingUses) + if (isa(toPromote) && + toPromote->getParentRegion() != slot.ptr.getParentRegion()) + return failure(); + + return success(); +} + +SmallPtrSet +SlotPromoter::computeSlotLiveIn(SmallPtrSetImpl &definingBlocks) { + SmallPtrSet liveIn; + + // The worklist contains blocks in which it is known that the slot value is + // live-in. The further blocks where this value is live-in will be inferred + // from these. + SmallVector liveInWorkList; + + // Blocks with a load before any other store to the slot are the starting + // points of the analysis. The slot value is definitely live-in in those + // blocks. + SmallPtrSet visited; + for (Operation *user : slot.ptr.getUsers()) { + if (visited.contains(user->getBlock())) + continue; + visited.insert(user->getBlock()); + + for (Operation &op : user->getBlock()->getOperations()) { + if (auto memOp = dyn_cast(op)) { + // If this operation loads the slot, it is loading from it before + // ever writing to it, so the value is live-in in this block. + if (memOp.loadsFrom(slot)) { + liveInWorkList.push_back(user->getBlock()); + break; + } + + // If we store to the slot, further loads will see that value. + // Because we did not meet any load before, the value is not live-in. + if (memOp.getStored(slot)) + break; + } + } + } + + // The information is then propagated to the predecessors until a def site + // (store) is found. + while (!liveInWorkList.empty()) { + Block *liveInBlock = liveInWorkList.pop_back_val(); + + if (!liveIn.insert(liveInBlock).second) + continue; + + // If a predecessor is a defining block, either: + // - It has a load before its first store, in which case it is live-in but + // has already been processed in the initialisation step. + // - It has a store before any load, in which case it is not live-in. + // We can thus at this stage insert to the worklist only predecessors that + // are not defining blocks. + for (Block *pred : liveInBlock->getPredecessors()) + if (!definingBlocks.contains(pred)) + liveInWorkList.push_back(pred); + } + + return liveIn; +} + +using IDFCalculator = llvm::IDFCalculatorBase; +void SlotPromoter::computeMergePoints() { + if (slot.ptr.getParentRegion()->hasOneBlock()) + return; + + IDFCalculator idfCalculator(dominance.getDomTree(slot.ptr.getParentRegion())); + + SmallPtrSet definingBlocks; + for (Operation *user : slot.ptr.getUsers()) + if (auto storeOp = dyn_cast(user)) + if (storeOp.getStored(slot)) + definingBlocks.insert(user->getBlock()); + + idfCalculator.setDefiningBlocks(definingBlocks); + + SmallPtrSet liveIn = computeSlotLiveIn(definingBlocks); + idfCalculator.setLiveInBlocks(liveIn); + + SmallVector mergePointsVec; + idfCalculator.calculate(mergePointsVec); + + mergePoints.insert(mergePointsVec.begin(), mergePointsVec.end()); +} + +bool SlotPromoter::areMergePointsUsable() { + for (Block *mergePoint : mergePoints) + for (Block *pred : mergePoint->getPredecessors()) + if (!isa(pred->getTerminator())) + return false; + + return true; +} + +Value SlotPromoter::computeReachingDefInBlock(Block *block, Value reachingDef) { + for (Operation &op : block->getOperations()) { + if (auto memOp = dyn_cast(op)) { + if (userToBlockingUses.contains(memOp)) + reachingDefs.insert({memOp, reachingDef}); + + if (Value stored = memOp.getStored(slot)) + reachingDef = stored; + } + } + + return reachingDef; +} + +void SlotPromoter::computeReachingDefInRegion(Region *region, + Value reachingDef) { + if (region->hasOneBlock()) { + computeReachingDefInBlock(®ion->front(), reachingDef); + return; + } + + struct DfsJob { + llvm::DomTreeNodeBase *block; + Value reachingDef; + }; + + SmallVector dfsStack; + + auto &domTree = dominance.getDomTree(slot.ptr.getParentRegion()); + + dfsStack.emplace_back( + {domTree.getNode(®ion->front()), reachingDef}); + + while (!dfsStack.empty()) { + DfsJob job = dfsStack.pop_back_val(); + Block *block = job.block->getBlock(); + + if (mergePoints.contains(block)) { + BlockArgument blockArgument = + block->addArgument(slot.elemType, slot.ptr.getLoc()); + builder.setInsertionPointToStart(block); + allocator.handleBlockArgument(slot, blockArgument, builder); + job.reachingDef = blockArgument; + } + + job.reachingDef = computeReachingDefInBlock(block, job.reachingDef); + + if (auto terminator = dyn_cast(block->getTerminator())) { + for (BlockOperand &blockOperand : terminator->getBlockOperands()) { + if (mergePoints.contains(blockOperand.get())) { + if (!job.reachingDef) + job.reachingDef = getLazyDefaultValue(); + terminator.getSuccessorOperands(blockOperand.getOperandNumber()) + .append(job.reachingDef); + } + } + } + + for (auto *child : job.block->children()) + dfsStack.emplace_back({child, job.reachingDef}); + } +} + +void SlotPromoter::removeBlockingUses() { + llvm::SetVector usersToRemoveUses; + for (auto &user : llvm::make_first_range(userToBlockingUses)) + usersToRemoveUses.insert(user); + SetVector sortedUsersToRemoveUses = + mlir::topologicalSort(usersToRemoveUses); + + llvm::SmallVector toErase; + for (Operation *toPromote : llvm::reverse(sortedUsersToRemoveUses)) { + if (auto toPromoteMemOp = dyn_cast(toPromote)) { + Value reachingDef = reachingDefs.lookup(toPromoteMemOp); + // If no reaching definition is known, this use is outside the reach of + // the slot. The default value should thus be used. + if (!reachingDef) + reachingDef = getLazyDefaultValue(); + + builder.setInsertionPointAfter(toPromote); + if (toPromoteMemOp.removeBlockingUses(slot, userToBlockingUses[toPromote], + builder, reachingDef) == + DeletionKind::Delete) + toErase.push_back(toPromote); + + continue; + } + + auto toPromoteBasic = cast(toPromote); + builder.setInsertionPointAfter(toPromote); + if (toPromoteBasic.removeBlockingUses(slot, userToBlockingUses[toPromote], + builder) == DeletionKind::Delete) + toErase.push_back(toPromote); + } + + for (Operation *toEraseOp : toErase) + toEraseOp->erase(); + + assert(slot.ptr.use_empty() && + "after promotion, the slot pointer should not be used anymore"); +} + +void SlotPromoter::promoteSlot() { + computeReachingDefInRegion(slot.ptr.getParentRegion(), {}); + + // Now that reaching definitions are known, remove all users. + removeBlockingUses(); + + // Update terminators in dead branches to forward default if they are + // succeeded by a merge points. + for (Block *mergePoint : mergePoints) { + for (BlockOperand &use : mergePoint->getUses()) { + auto user = cast(use.getOwner()); + SuccessorOperands succOperands = + user.getSuccessorOperands(use.getOperandNumber()); + assert(succOperands.size() == mergePoint->getNumArguments() || + succOperands.size() + 1 == mergePoint->getNumArguments()); + if (succOperands.size() + 1 == mergePoint->getNumArguments()) + succOperands.append(getLazyDefaultValue()); + } + } + + allocator.handlePromotionComplete(slot, defaultValue); +} + +LogicalResult SlotPromoter::prepareSlotPromotion() { + // First, find the set of operations that will need to be changed for the + // promotion to happen. These operations need to resolve some of their uses, + // either by rewiring them or simply deleting themselves. If any of them + // cannot find a way to resolve their blocking uses, we abort the promotion. + if (failed(computeBlockingUses())) + return failure(); + + // Then, compute blocks in which two or more definitions of the allocated + // variable may conflict. These blocks will need a new block argument to + // accomodate this. + computeMergePoints(); + + // The slot can be promoted if the block arguments to be created can + // actually be populated with values, which may not be possible depending + // on their predecessors. + return success(areMergePointsUsable()); +} + +LogicalResult mlir::tryToPromoteMemorySlots( + ArrayRef allocators, OpBuilder &builder, + DominanceInfo &dominance) { + // Actual promotion may invalidate the dominance analysis, so slot promotion + // is prepated in batches. + SmallVector toPromote; + for (PromotableAllocationOpInterface allocator : allocators) { + for (MemorySlot slot : allocator.getPromotableSlots()) { + if (slot.ptr.use_empty()) + continue; + + SlotPromoter promoter(slot, allocator, builder, dominance); + if (succeeded(promoter.prepareSlotPromotion())) + toPromote.emplace_back(std::move(promoter)); + } + } + + for (SlotPromoter &promoter : toPromote) + promoter.promoteSlot(); + + return success(!toPromote.empty()); +} + +namespace { + +struct Mem2Reg : impl::Mem2RegBase { + void runOnOperation() override { + Operation *scopeOp = getOperation(); + bool changed = false; + + for (Region ®ion : scopeOp->getRegions()) { + if (region.getBlocks().empty()) + continue; + + OpBuilder builder(®ion.front(), region.front().begin()); + + // Promoting a slot can allow for further promotion of other slots, + // promotion is tried until no promotion succeeds. + while (true) { + DominanceInfo &dominance = getAnalysis(); + + SmallVector allocators; + // Build a list of allocators to attempt to promote the slots of. + for (Block &block : region) + for (Operation &op : block.getOperations()) + if (auto allocator = dyn_cast(op)) + allocators.emplace_back(allocator); + + // Attempt promoting until no promotion succeeds. + if (failed(tryToPromoteMemorySlots(allocators, builder, dominance))) + break; + + changed = true; + getAnalysisManager().invalidate({}); + } + } + + if (!changed) + markAllAnalysesPreserved(); + } +}; + +} // namespace diff --git a/mlir/test/Transforms/mem2reg-llvmir-dbginfo.mlir b/mlir/test/Transforms/mem2reg-llvmir-dbginfo.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Transforms/mem2reg-llvmir-dbginfo.mlir @@ -0,0 +1,104 @@ +// RUN: mlir-opt %s --pass-pipeline='builtin.module(llvm.func(mem2reg))' | FileCheck %s + +llvm.func @use(i64) +llvm.func @use_ptr(!llvm.ptr) + +#di_basic_type = #llvm.di_basic_type +#di_file = #llvm.di_file<"test.ll" in ""> +#di_compile_unit = #llvm.di_compile_unit +#di_subprogram = #llvm.di_subprogram +// CHECK: #[[$VAR:.*]] = #llvm.di_local_variable<{{.*}}name = "ptr sized var"{{.*}}> +#di_local_variable = #llvm.di_local_variable +#di_local_variable_2 = #llvm.di_local_variable + +// CHECK-LABEL: llvm.func @basic_store_load +llvm.func @basic_store_load(%arg0: i64) -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: = llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK-NOT: llvm.store + llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr + // CHECK-NOT: llvm.intr.dbg.declare + llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr + // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[LOADED:.*]] : i64 + // CHECK-NOT: llvm.intr.dbg.value + // CHECK-NOT: llvm.intr.dbg.declare + // CHECK-NOT: llvm.store + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 + // CHECK: llvm.return %[[LOADED]] : i64 + llvm.return %2 : i64 +} + +// CHECK-LABEL: llvm.func @block_argument_value +// CHECK-SAME: (%[[ARG0:.*]]: i64, {{.*}}) +llvm.func @block_argument_value(%arg0: i64, %arg1: i1) -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: = llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK-NOT: llvm.intr.dbg.declare + llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr + llvm.cond_br %arg1, ^bb1, ^bb2 +// CHECK: ^{{.*}}: +^bb1: + // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[ARG0]] + // CHECK-NOT: llvm.intr.dbg.value + llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr + llvm.br ^bb2 +// CHECK: ^{{.*}}(%[[BLOCKARG:.*]]: i64): +^bb2: + // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[BLOCKARG]] + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 + llvm.return %2 : i64 +} + +// CHECK-LABEL: llvm.func @double_block_argument_value +// CHECK-SAME: (%[[ARG0:.*]]: i64, {{.*}}) +llvm.func @double_block_argument_value(%arg0: i64, %arg1: i1) -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: = llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK-NOT: llvm.intr.dbg.declare + llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr + llvm.cond_br %arg1, ^bb1, ^bb2 +// CHECK: ^{{.*}}(%[[BLOCKARG1:.*]]: i64): +^bb1: + // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[BLOCKARG1]] + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 + llvm.call @use(%2) : (i64) -> () + // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[ARG0]] + llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr + llvm.br ^bb2 + // CHECK-NOT: llvm.intr.dbg.value +// CHECK: ^{{.*}}(%[[BLOCKARG2:.*]]: i64): +^bb2: + // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[BLOCKARG2]] + llvm.br ^bb1 +} + +// CHECK-LABEL: llvm.func @always_drop_promoted_declare +// CHECK-NOT: = llvm.alloca +// CHECK-NOT: llvm.intr.dbg. +llvm.func @always_drop_promoted_declare() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr + llvm.return +} + +// CHECK-LABEL: llvm.func @keep_dbg_if_not_promoted +llvm.func @keep_dbg_if_not_promoted() { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK-NOT: = llvm.alloca + // CHECK-NOT: llvm.intr.dbg.declare + // CHECK: llvm.intr.dbg.declare #[[$VAR]] = %[[ALLOCA]] + // CHECK-NOT: = llvm.alloca + // CHECK-NOT: llvm.intr.dbg.declare + // CHECK: llvm.call @use_ptr(%[[ALLOCA]]) + llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr + %2 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + llvm.intr.dbg.declare #di_local_variable_2 = %2 : !llvm.ptr + llvm.call @use_ptr(%1) : (!llvm.ptr) -> () + llvm.return +} diff --git a/mlir/test/Transforms/mem2reg-llvmir.mlir b/mlir/test/Transforms/mem2reg-llvmir.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Transforms/mem2reg-llvmir.mlir @@ -0,0 +1,685 @@ +// RUN: mlir-opt %s --pass-pipeline="builtin.module(llvm.func(mem2reg))" --split-input-file | FileCheck %s + +// CHECK-LABEL: llvm.func @default_value +llvm.func @default_value() -> i32 { + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: llvm.return %[[UNDEF]] : i32 + llvm.return %2 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @store_of_ptr +llvm.func @store_of_ptr() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(4 : i32) : i32 + %2 = llvm.mlir.null : !llvm.ptr + // CHECK: %[[ALLOCA:.*]] = llvm.alloca + %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + // CHECK: llvm.store %{{.*}}, %[[ALLOCA]] + llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr + // CHECK: llvm.store %[[ALLOCA]], %{{.*}} + llvm.store %3, %2 {alignment = 8 : i64} : !llvm.ptr, !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @unreachable +llvm.func @unreachable() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(0 : i32) : i32 + // CHECK-NOT: = llvm.alloca + %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.return + +// CHECK: ^{{.*}}: +// CHECK-NEXT: llvm.return +^bb1: // no predecessors + llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @unreachable_in_loop +// CHECK-NOT: = llvm.alloca +llvm.func @unreachable_in_loop() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(6 : i32) : i32 + %2 = llvm.mlir.constant(5 : i32) : i32 + %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr + // CHECK: llvm.br ^[[LOOP:.*]] + llvm.br ^bb1 + +// CHECK: ^[[LOOP]]: +^bb1: // 2 preds: ^bb0, ^bb3 + // CHECK-NEXT: llvm.br ^[[ENDOFLOOP:.*]] + llvm.store %2, %3 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.br ^bb3 + +// CHECK: ^[[UNREACHABLE:.*]]: +^bb2: // no predecessors + // CHECK-NEXT: llvm.br ^[[ENDOFLOOP]] + llvm.br ^bb3 + +// CHECK: ^[[ENDOFLOOP]]: +^bb3: // 2 preds: ^bb1, ^bb2 + // CHECK-NEXT: llvm.br ^[[LOOP]] + llvm.br ^bb1 +} + +// ----- + +// CHECK-LABEL: llvm.func @branching +// CHECK-NOT: = llvm.alloca +llvm.func @branching(%arg0: i1, %arg1: i1) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i32) : i32 + %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + // CHECK: llvm.cond_br %{{.*}}, ^[[BB2:.*]](%{{.*}} : i32), ^{{.*}} + llvm.cond_br %arg0, ^bb2, ^bb1 +^bb1: // pred: ^bb0 + llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr + // CHECK: llvm.cond_br %{{.*}}, ^[[BB2]](%{{.*}} : i32), ^[[BB2]](%{{.*}} : i32) + llvm.cond_br %arg1, ^bb2, ^bb2 +// CHECK: ^[[BB2]](%[[V3:.*]]: i32): +^bb2: // 3 preds: ^bb0, ^bb1, ^bb1 + %3 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: llvm.return %[[V3]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @recursive_alloca +// CHECK-NOT: = llvm.alloca +llvm.func @recursive_alloca() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(0 : i32) : i32 + %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %4 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr + llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.store %3, %4 {alignment = 8 : i64} : !llvm.ptr, !llvm.ptr + %5 = llvm.load %4 {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr + %6 = llvm.load %5 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.store %6, %2 {alignment = 4 : i64} : i32, !llvm.ptr + %7 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.return %7 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @reset_in_branch +// CHECK-NOT: = llvm.alloca +// CHECK-NOT: ^{{.*}}({{.*}}): +llvm.func @reset_in_branch(%arg0: i32, %arg1: i1) { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(true) : i1 + %2 = llvm.mlir.constant(false) : i1 + %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %arg0, %3 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.cond_br %arg1, ^bb1, ^bb2 +^bb1: // pred: ^bb0 + llvm.store %arg0, %3 {alignment = 4 : i64} : i32, !llvm.ptr + %4 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.call @reset_in_branch(%4, %2) : (i32, i1) -> () + llvm.br ^bb3 +^bb2: // pred: ^bb0 + %5 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.call @reset_in_branch(%5, %1) : (i32, i1) -> () + llvm.br ^bb3 +^bb3: // 2 preds: ^bb1, ^bb2 + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @intertwined_alloca +// CHECK-NOT: = llvm.alloca +llvm.func @intertwined_alloca(%arg0: !llvm.ptr, %arg1: i32) { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(0 : i32) : i32 + %2 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr + %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %4 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %5 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %6 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %arg0, %2 {alignment = 8 : i64} : !llvm.ptr, !llvm.ptr + llvm.store %arg1, %3 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.store %1, %4 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.br ^bb1 +^bb1: // 2 preds: ^bb0, ^bb4 + %7 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32 + %8 = llvm.add %7, %0 : i32 + %9 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32 + %10 = llvm.icmp "sgt" %8, %9 : i32 + %11 = llvm.zext %10 : i1 to i32 + llvm.cond_br %10, ^bb2, ^bb5 +^bb2: // pred: ^bb1 + %12 = llvm.load %6 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.store %12, %5 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.store %1, %6 {alignment = 4 : i64} : i32, !llvm.ptr + %13 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32 + %14 = llvm.icmp "sgt" %13, %1 : i32 + %15 = llvm.zext %14 : i1 to i32 + llvm.cond_br %14, ^bb3, ^bb4 +^bb3: // pred: ^bb2 + %16 = llvm.load %2 {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr + %17 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32 + %18 = llvm.sub %17, %0 : i32 + %19 = llvm.getelementptr %16[%18] : (!llvm.ptr, i32) -> !llvm.ptr, i8 + %20 = llvm.load %5 {alignment = 4 : i64} : !llvm.ptr -> i32 + %21 = llvm.trunc %20 : i32 to i8 + llvm.store %21, %19 {alignment = 1 : i64} : i8, !llvm.ptr + llvm.br ^bb4 +^bb4: // 2 preds: ^bb2, ^bb3 + %22 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32 + %23 = llvm.add %22, %0 : i32 + llvm.store %23, %4 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.br ^bb1 +^bb5: // pred: ^bb1 + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @complex_cf +// CHECK-NOT: = llvm.alloca +llvm.func @complex_cf(%arg0: i32, ...) { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(false) : i1 + %2 = llvm.mlir.constant(0 : i32) : i32 + %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.cond_br %1, ^bb1, ^bb2 +^bb1: // pred: ^bb0 + llvm.br ^bb2 +^bb2: // 2 preds: ^bb0, ^bb1 + llvm.store %2, %3 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.br ^bb3 +^bb3: // 2 preds: ^bb2, ^bb16 + llvm.cond_br %1, ^bb4, ^bb17 +^bb4: // pred: ^bb3 + llvm.cond_br %1, ^bb5, ^bb14 +^bb5: // pred: ^bb4 + llvm.cond_br %1, ^bb7, ^bb6 +^bb6: // pred: ^bb5 + llvm.br ^bb7 +^bb7: // 2 preds: ^bb5, ^bb6 + llvm.cond_br %1, ^bb9, ^bb8 +^bb8: // pred: ^bb7 + llvm.br ^bb9 +^bb9: // 2 preds: ^bb7, ^bb8 + llvm.cond_br %1, ^bb11, ^bb10 +^bb10: // pred: ^bb9 + llvm.br ^bb11 +^bb11: // 2 preds: ^bb9, ^bb10 + llvm.cond_br %1, ^bb12, ^bb13 +^bb12: // pred: ^bb11 + llvm.br ^bb13 +^bb13: // 2 preds: ^bb11, ^bb12 + llvm.br ^bb14 +^bb14: // 2 preds: ^bb4, ^bb13 + llvm.cond_br %1, ^bb15, ^bb16 +^bb15: // pred: ^bb14 + llvm.br ^bb16 +^bb16: // 2 preds: ^bb14, ^bb15 + llvm.br ^bb3 +^bb17: // pred: ^bb3 + llvm.br ^bb20 +^bb18: // no predecessors + %4 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.br ^bb24 +^bb19: // no predecessors + llvm.br ^bb20 +^bb20: // 2 preds: ^bb17, ^bb19 + llvm.cond_br %1, ^bb21, ^bb22 +^bb21: // pred: ^bb20 + llvm.br ^bb23 +^bb22: // pred: ^bb20 + llvm.br ^bb23 +^bb23: // 2 preds: ^bb21, ^bb22 + llvm.br ^bb24 +^bb24: // 2 preds: ^bb18, ^bb23 + llvm.br ^bb26 +^bb25: // no predecessors + llvm.br ^bb26 +^bb26: // 2 preds: ^bb24, ^bb25 + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @llvm_crash +llvm.func @llvm_crash() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(0 : i32) : i32 + %2 = llvm.mlir.addressof @j : !llvm.ptr + %3 = llvm.mlir.constant(0 : i8) : i8 + // CHECK-NOT: = llvm.alloca + // CHECK: %[[VOLATILE_ALLOCA:.*]] = llvm.alloca + // CHECK-NOT: = llvm.alloca + %4 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %5 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %6 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %7 = llvm.bitcast %1 : i32 to i32 + // CHECK: llvm.store volatile %{{.*}}, %[[VOLATILE_ALLOCA]] + llvm.store volatile %1, %5 {alignment = 4 : i64} : i32, !llvm.ptr + %8 = llvm.call @_setjmp(%2) : (!llvm.ptr) -> i32 + %9 = llvm.icmp "ne" %8, %1 : i32 + %10 = llvm.zext %9 : i1 to i8 + %11 = llvm.icmp "ne" %10, %3 : i8 + llvm.cond_br %11, ^bb1, ^bb2 +^bb1: // pred: ^bb0 + // CHECK: = llvm.load volatile %[[VOLATILE_ALLOCA]] + %12 = llvm.load volatile %5 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.store %12, %6 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.br ^bb3 +^bb2: // pred: ^bb0 + // CHECK: llvm.store volatile %{{.*}}, %[[VOLATILE_ALLOCA]] + llvm.store volatile %0, %5 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.call @g() : () -> () + llvm.store %1, %6 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.br ^bb3 +^bb3: // 2 preds: ^bb1, ^bb2 + %13 = llvm.load %6 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.store %13, %4 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.br ^bb4 +^bb4: // pred: ^bb3 + %14 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.return %14 : i32 +} +llvm.mlir.global external @j() {addr_space = 0 : i32} : !llvm.array<1 x struct<"struct.__jmp_buf_tag", (array<6 x i32>, i32, struct<"struct.__sigset_t", (array<32 x i32>)>)>> +llvm.func @_setjmp(!llvm.ptr) -> i32 attributes {passthrough = ["returns_twice"]} +llvm.func @g() + +// ----- + +// CHECK-LABEL: llvm.func amdgpu_kernelcc @addrspace_discard +// CHECK-NOT: = llvm.alloca +llvm.func amdgpu_kernelcc @addrspace_discard() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i64) : i64 + %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> + %3 = llvm.addrspacecast %2 : !llvm.ptr<5> to !llvm.ptr + llvm.intr.lifetime.start 2, %3 : !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @ignore_atomic +// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32 +llvm.func @ignore_atomic(%arg0: i32) -> i32 { + // CHECK-NOT: = llvm.alloca + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %arg0, %1 atomic seq_cst {alignment = 4 : i64} : i32, !llvm.ptr + %2 = llvm.load %1 atomic seq_cst {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: llvm.return %[[ARG0]] : i32 + llvm.return %2 : i32 +} + +// ----- + +// CHECK: llvm.func @landing_pad +// CHECK-NOT: = llvm.alloca +llvm.func @landing_pad() -> i32 attributes {personality = @__gxx_personality_v0} { + // CHECK-NOT: = llvm.alloca + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32 + // CHECK-NOT: = llvm.alloca + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + // CHECK: %[[V2:.*]] = llvm.invoke + %2 = llvm.invoke @landing_padf() to ^bb1 unwind ^bb3 : () -> i32 +// CHECK: ^{{.*}}: +^bb1:// pred: ^bb0 + llvm.store %2, %1 {alignment = 4 : i64} : i32, !llvm.ptr + // CHECK: llvm.br ^[[BB2:.*]](%[[V2]] : i32) + llvm.br ^bb2 +// CHECK: ^[[BB2]]([[V3:.*]]: i32): +^bb2:// 2 preds: ^bb1, ^bb3 + %3 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: llvm.return [[V3]] : i32 + llvm.return %3 : i32 +// CHECK: ^{{.*}}: +^bb3:// pred: ^bb0 + %4 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + // CHECK: llvm.br ^[[BB2:.*]](%[[UNDEF]] : i32) + llvm.br ^bb2 +} +llvm.func @landing_padf() -> i32 +llvm.func @__gxx_personality_v0(...) -> i32 + +// ----- + +// CHECK-LABEL: llvm.func @unreachable_defines +llvm.func @unreachable_defines() -> i32 { + // CHECK-NOT: = llvm.alloca + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32 + // CHECK-NOT: = llvm.alloca + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.br ^bb1 +^bb1: // 2 preds: ^bb0, ^bb2 + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: llvm.return %[[UNDEF]] : i32 + llvm.return %2 : i32 +^bb2: // no predecessors + %3 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.store %3, %1 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.br ^bb1 +} + +// ----- + +// CHECK-LABEL: llvm.func @unreachable_jumps_to_merge_point +// CHECK-NOT: = llvm.alloca +llvm.func @unreachable_jumps_to_merge_point(%arg0: i1) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(6 : i32) : i32 + %2 = llvm.mlir.constant(5 : i32) : i32 + %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.cond_br %arg0, ^bb1, ^bb2 +^bb1: // 2 preds: ^bb0, ^bb4 + llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.br ^bb4 +^bb2: // pred: ^bb0 + llvm.store %2, %3 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.br ^bb4 +^bb3: // no predecessors + llvm.br ^bb4 +^bb4: // 3 preds: ^bb1, ^bb2, ^bb3 + %4 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.return %4 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @ignore_lifetime +// CHECK-NOT: = llvm.alloca +llvm.func @ignore_lifetime() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.intr.lifetime.start 2, %1 : !llvm.ptr + llvm.store %0, %1 {alignment = 4 : i64} : i32, !llvm.ptr + llvm.intr.lifetime.end 2, %1 : !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @ignore_discardable_tree +// CHECK-NOT: = llvm.alloca +llvm.func @ignore_discardable_tree() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(0 : i16) : i16 + %2 = llvm.mlir.constant(0 : i8) : i8 + %3 = llvm.mlir.undef : !llvm.struct<(i8, i16)> + %4 = llvm.insertvalue %2, %3[0] : !llvm.struct<(i8, i16)> + %5 = llvm.insertvalue %1, %4[1] : !llvm.struct<(i8, i16)> + %6 = llvm.alloca %0 x !llvm.struct<(i8, i16)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %7 = llvm.getelementptr %6[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i8, i16)> + llvm.intr.lifetime.start 2, %7 : !llvm.ptr + llvm.store %5, %6 {alignment = 2 : i64} : !llvm.struct<(i8, i16)>, !llvm.ptr + llvm.intr.lifetime.end 2, %7 : !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @store_load_forward +llvm.func @store_load_forward() -> i32 { + // CHECK-NOT: = llvm.alloca + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[RES:.*]] = llvm.mlir.constant(0 : i32) : i32 + %1 = llvm.mlir.constant(0 : i32) : i32 + %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr + %3 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: llvm.return %[[RES]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @store_load_wrong_type +llvm.func @store_load_wrong_type() -> i16 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(0 : i32) : i32 + // CHECK: = llvm.alloca + %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr + %3 = llvm.load %2 {alignment = 2 : i64} : !llvm.ptr -> i16 + llvm.return %3 : i16 +} + +// ----- + +// CHECK-LABEL: llvm.func @merge_point_cycle +llvm.func @merge_point_cycle() { + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(7 : i32) : i32 + %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + // CHECK: llvm.br ^[[BB1:.*]](%[[UNDEF]] : i32) + llvm.br ^bb1 +// CHECK: ^[[BB1]](%[[BARG:.*]]: i32): +^bb1: // 2 preds: ^bb0, ^bb1 + %3 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: = llvm.call @use(%[[BARG]]) + %4 = llvm.call @use(%3) : (i32) -> i1 + // CHECK: %[[DEF:.*]] = llvm.call @def + %5 = llvm.call @def(%1) : (i32) -> i32 + llvm.store %5, %2 {alignment = 4 : i64} : i32, !llvm.ptr + // CHECK: llvm.cond_br %{{.*}}, ^[[BB1]](%[[DEF]] : i32), ^{{.*}} + llvm.cond_br %4, ^bb1, ^bb2 +^bb2: // pred: ^bb1 + llvm.return +} + +llvm.func @def(i32) -> i32 +llvm.func @use(i32) -> i1 + +// ----- + +// CHECK-LABEL: llvm.func @no_unnecessary_arguments +llvm.func @no_unnecessary_arguments() { + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + // CHECK: llvm.br ^[[BB1:.*]] + llvm.br ^bb1 +// CHECK: ^[[BB1]]: +^bb1: // 2 preds: ^bb0, ^bb1 + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: = llvm.call @use(%[[UNDEF]]) + %3 = llvm.call @use(%2) : (i32) -> i1 + // CHECK: llvm.cond_br %{{.*}}, ^[[BB1]], ^{{.*}} + llvm.cond_br %3, ^bb1, ^bb2 +^bb2: // pred: ^bb1 + llvm.return +} + +llvm.func @use(i32) -> i1 + +// ----- + +// CHECK-LABEL: llvm.func @discardable_use_tree +// CHECK-NOT: = llvm.alloca +llvm.func @discardable_use_tree() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i64) : i64 + %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr + %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr + %4 = llvm.bitcast %3 : !llvm.ptr to !llvm.ptr + llvm.intr.lifetime.start 2, %3 : !llvm.ptr + llvm.intr.lifetime.start 2, %4 : !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @non_discardable_use_tree +llvm.func @non_discardable_use_tree() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i64) : i64 + // CHECK: = llvm.alloca + %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr + %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr + %4 = llvm.bitcast %3 : !llvm.ptr to !llvm.ptr + llvm.intr.lifetime.start 2, %3 : !llvm.ptr + llvm.intr.lifetime.start 2, %4 : !llvm.ptr + llvm.call @use(%4) : (!llvm.ptr) -> i1 + llvm.return +} +llvm.func @use(!llvm.ptr) -> i1 + +// ----- + +// CHECK-LABEL: llvm.func @trivial_get_element_ptr +// CHECK-NOT: = llvm.alloca +llvm.func @trivial_get_element_ptr() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i64) : i64 + %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr + %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr + %4 = llvm.getelementptr %3[0, 0, 0] : (!llvm.ptr) -> !llvm.ptr, i8 + llvm.intr.lifetime.start 2, %3 : !llvm.ptr + llvm.intr.lifetime.start 2, %4 : !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @nontrivial_get_element_ptr +llvm.func @nontrivial_get_element_ptr() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i64) : i64 + // CHECK: = llvm.alloca + %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr + %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr + %4 = llvm.getelementptr %3[0, 1, 0] : (!llvm.ptr) -> !llvm.ptr, i8 + llvm.intr.lifetime.start 2, %3 : !llvm.ptr + llvm.intr.lifetime.start 2, %4 : !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @dynamic_get_element_ptr +llvm.func @dynamic_get_element_ptr() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i64) : i64 + // CHECK: = llvm.alloca + %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr + %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr + %4 = llvm.getelementptr %3[0, %0] : (!llvm.ptr, i32) -> !llvm.ptr, i8 + llvm.intr.lifetime.start 2, %3 : !llvm.ptr + llvm.intr.lifetime.start 2, %4 : !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @live_cycle +// CHECK-SAME: (%[[ARG0:.*]]: i64, %{{.*}}: i1, %[[ARG2:.*]]: i64) -> i64 +llvm.func @live_cycle(%arg0: i64, %arg1: i1, %arg2: i64) -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: = llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + llvm.store %arg2, %1 {alignment = 4 : i64} : i64, !llvm.ptr + // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]](%[[ARG2]] : i64), ^[[BB2:.*]](%[[ARG2]] : i64) + llvm.cond_br %arg1, ^bb1, ^bb2 +// CHECK: ^[[BB1]](%[[V1:.*]]: i64): +^bb1: + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 + // CHECK: llvm.call @use(%[[V1]]) + llvm.call @use(%2) : (i64) -> () + llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr + // CHECK: llvm.br ^[[BB2]](%[[ARG0]] : i64) + llvm.br ^bb2 +// CHECK: ^[[BB2]](%[[V2:.*]]: i64): +^bb2: + // CHECK: llvm.br ^[[BB1]](%[[V2]] : i64) + llvm.br ^bb1 +} + +llvm.func @use(i64) + +// ----- + +// This test should no longer be an issue once promotion within subregions +// is supported. +// CHECK-LABEL: llvm.func @subregion_block_promotion +// CHECK-SAME: (%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64) -> i64 +llvm.func @subregion_block_promotion(%arg0: i64, %arg1: i64) -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK: llvm.store %[[ARG1]], %[[ALLOCA]] + llvm.store %arg1, %1 {alignment = 4 : i64} : i64, !llvm.ptr + // CHECK: scf.execute_region { + scf.execute_region { + // CHECK: llvm.store %[[ARG0]], %[[ALLOCA]] + llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr + scf.yield + } + // CHECK: } + // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]] + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 + // CHECK: llvm.return %[[RES]] : i64 + llvm.return %2 : i64 +} + +// ----- + +// CHECK-LABEL: llvm.func @subregion_simple_transitive_promotion +// CHECK-SAME: (%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64) -> i64 +llvm.func @subregion_simple_transitive_promotion(%arg0: i64, %arg1: i64) -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: = llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + llvm.store %arg1, %1 {alignment = 4 : i64} : i64, !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 + // CHECK: scf.execute_region { + scf.execute_region { + // CHECK: llvm.call @use(%[[ARG1]]) + llvm.call @use(%2) : (i64) -> () + scf.yield + } + // CHECK: } + // CHECK: llvm.return %[[ARG1]] : i64 + llvm.return %2 : i64 +} + +llvm.func @use(i64) + +// ----- + +// This behavior is specific to the LLVM dialect, because LLVM semantics are +// that reaching an alloca multiple times allocates on the stack multiple +// times. Promoting an alloca that is reached multiple times could lead to +// changes in observable behavior. Thus only allocas in the entry block are +// promoted. + +// CHECK-LABEL: llvm.func @no_inner_alloca_promotion +// CHECK-SAME: (%[[ARG:.*]]: i64) -> i64 +llvm.func @no_inner_alloca_promotion(%arg: i64) -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.br ^bb1 +^bb1: + // CHECK: %[[ALLOCA:.*]] = llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK: llvm.store %[[ARG]], %[[ALLOCA]] + llvm.store %arg, %1 {alignment = 4 : i64} : i64, !llvm.ptr + // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]] + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 + // CHECK: llvm.return %[[RES]] : i64 + llvm.return %2 : i64 +}