diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -219,8 +219,10 @@ LLVM_OneResultOp<"alloca">, Arguments<(ins LLVM_Type:$arraySize, OptionalAttr:$alignment)> { string llvmBuilder = [{ + llvm::Module *module = builder.GetInsertBlock()->getModule(); + auto allocaAddrSpace = module->getDataLayout().getAllocaAddrSpace(); auto *alloca = builder.CreateAlloca( - $_resultType->getPointerElementType(), $arraySize); + $_resultType->getPointerElementType(), allocaAddrSpace, $arraySize); if ($alignment.hasValue()) { auto align = $alignment.getValue().getZExtValue(); if (align != 0) diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h --- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h @@ -49,12 +49,14 @@ class ModuleTranslation { public: template - static std::unique_ptr translateModule(Operation *m) { + static std::unique_ptr + translateModule(Operation *m, StringRef triple = "", + StringRef dataLayout = "") { if (!satisfiesLLVMModule(m)) return nullptr; if (failed(checkSupportedModuleOps(m))) return nullptr; - auto llvmModule = prepareLLVMModule(m); + auto llvmModule = prepareLLVMModule(m, triple, dataLayout); if (!llvmModule) return nullptr; @@ -85,7 +87,9 @@ llvm::IRBuilder<> &builder); virtual LogicalResult convertOmpOperation(Operation &op, llvm::IRBuilder<> &builder); - static std::unique_ptr prepareLLVMModule(Operation *m); + static std::unique_ptr + prepareLLVMModule(Operation *m, StringRef triple = "", + StringRef dayaLayout = ""); /// A helper to look up remapped operands in the value remapping table. SmallVector lookupValues(ValueRange values); diff --git a/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp b/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp --- a/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertToROCDLIR.cpp @@ -76,8 +76,13 @@ std::unique_ptr mlir::translateModuleToROCDLIR(Operation *m) { // lower MLIR (with RODL Dialect) to LLVM IR (with ROCDL intrinsics) - auto llvmModule = - LLVM::ModuleTranslation::translateModule(m); + auto amdgcnTriple = "amdgcn-amd-amdhsa"; + auto amdgcnDataLayout = + "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:" + "1024-v2048:2048-n32:64-S32-A5-ni:7"; + auto llvmModule = LLVM::ModuleTranslation::translateModule( + m, amdgcnTriple, amdgcnDataLayout); // foreach GPU kernel // 1. Insert AMDGPU_KERNEL calling convention. diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -802,7 +802,8 @@ } std::unique_ptr -ModuleTranslation::prepareLLVMModule(Operation *m) { +ModuleTranslation::prepareLLVMModule(Operation *m, StringRef triple, + StringRef dataLayout) { auto *dialect = m->getContext()->getRegisteredDialect(); assert(dialect && "LLVM dialect must be registered"); // Lock the LLVM context as we might create new types here. @@ -815,6 +816,12 @@ llvm::LLVMContext &llvmContext = llvmModule->getContext(); llvm::IRBuilder<> builder(llvmContext); + // Set target triple string. + llvmModule->setTargetTriple(triple); + + // Set data layout string. + llvmModule->setDataLayout(dataLayout); + // Inject declarations for `malloc` and `free` functions that can be used in // memref allocation/deallocation coming from standard ops lowering. llvmModule->getOrInsertFunction("malloc", builder.getInt8PtrTy(), diff --git a/mlir/test/Target/rocdl.mlir b/mlir/test/Target/rocdl.mlir --- a/mlir/test/Target/rocdl.mlir +++ b/mlir/test/Target/rocdl.mlir @@ -1,5 +1,8 @@ // RUN: mlir-translate -mlir-to-rocdlir %s | FileCheck %s +// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" +// CHECK-NEXT: target triple = "amdgcn-amd-amdhsa" + llvm.func @rocdl_special_regs() -> !llvm.i32 { // CHECK-LABEL: rocdl_special_regs // CHECK: call i32 @llvm.amdgcn.workitem.id.x() @@ -33,3 +36,14 @@ // CHECK-LABEL: amdgpu_kernel void @kernel_func llvm.return } + +// CHECK-LABEL: @alloca_non_zero_addrspace +llvm.func @alloca_non_zero_addrspace(%size : !llvm.i64) { + // Alignment automatically set by the LLVM IR builder when alignment attribute + // is 0. + // CHECK: alloca {{.*}} align 4, addrspace(5) + llvm.alloca %size x !llvm.i32 {alignment = 0} : (!llvm.i64) -> (!llvm<"i32 addrspace(5)*">) + // CHECK-NEXT: alloca {{.*}} align 8, addrspace(5) + llvm.alloca %size x !llvm.i32 {alignment = 8} : (!llvm.i64) -> (!llvm<"i32 addrspace(5)*">) + llvm.return +}