diff --git a/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp b/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp --- a/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp +++ b/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp @@ -32,7 +32,7 @@ // Given a value this function will iterate over an operators results // and return the relevant index for the result the value corresponds to. // There may be a simpler way to do this however. - unsigned getResultIndex(mlir::Value value, mlir::Operation *op) { + static unsigned getResultIndex(mlir::Value value, mlir::Operation *op) { for (unsigned i = 0; i < op->getNumResults(); ++i) { if (op->getResult(i) == value) return i; @@ -40,9 +40,10 @@ return 0; } - bool isDeclareTargetOp(mlir::Operation *op) { - if (fir::AddrOfOp addressOfOp = mlir::dyn_cast(op)) - if (fir::GlobalOp gOp = mlir::dyn_cast( + static bool isAddressOfGlobalDeclareTarget(mlir::Value value) { + if (fir::AddrOfOp addressOfOp = + mlir::dyn_cast_if_present(value.getDefiningOp())) + if (fir::GlobalOp gOp = mlir::dyn_cast_if_present( addressOfOp->getParentOfType().lookupSymbol( addressOfOp.getSymbol()))) if (auto declareTargetGlobal = @@ -59,14 +60,14 @@ // NOTE: Results in duplication of some values that would otherwise be // a single SSA value shared between operations, this is tidied up on // lowering to some extent. - mlir::Operation * + static mlir::Operation * cloneArgAndChildren(mlir::OpBuilder &builder, mlir::Operation *op, llvm::SetVector &inputs, mlir::Block::BlockArgListType &newInputs) { mlir::IRMapping valueMap; - for (auto opValue : op->getOperands()) { + for (mlir::Value opValue : op->getOperands()) { if (opValue.getDefiningOp()) { - auto resIdx = getResultIndex(opValue, opValue.getDefiningOp()); + unsigned resIdx = getResultIndex(opValue, opValue.getDefiningOp()); valueMap.map(opValue, cloneArgAndChildren(builder, opValue.getDefiningOp(), inputs, newInputs) @@ -82,11 +83,12 @@ return builder.clone(*op, valueMap); } - void cloneMapOpVariables(mlir::OpBuilder &builder, mlir::IRMapping &valueMap, - mlir::IRMapping &mapInfoMap, - llvm::SetVector &inputs, - mlir::Block::BlockArgListType &newInputs, - mlir::Value varPtr) { + static void cloneMapOpVariables(mlir::OpBuilder &builder, + mlir::IRMapping &valueMap, + mlir::IRMapping &mapInfoMap, + llvm::SetVector &inputs, + mlir::Block::BlockArgListType &newInputs, + mlir::Value varPtr) { if (fir::BoxAddrOp boxAddrOp = mlir::dyn_cast_if_present(varPtr.getDefiningOp())) { mlir::Value newV = @@ -97,7 +99,7 @@ return; } - if (varPtr.getDefiningOp() && isDeclareTargetOp(varPtr.getDefiningOp())) { + if (isAddressOfGlobalDeclareTarget(varPtr)) { fir::AddrOfOp addrOp = mlir::dyn_cast(varPtr.getDefiningOp()); mlir::Value newV = builder.clone(*addrOp)->getResult(0); @@ -129,18 +131,17 @@ // filter out declareTarget and map entries which are specially handled // at the moment, so we do not wish these to end up as function arguments // which would just be more noise in the IR. - for (auto value : inputs) - if (value.getDefiningOp()) - if (mlir::isa(value.getDefiningOp()) || - isDeclareTargetOp(value.getDefiningOp())) - inputs.remove(value); + for (mlir::Value value : inputs) + if (mlir::isa_and_nonnull(value.getDefiningOp()) || + isAddressOfGlobalDeclareTarget(value)) + inputs.remove(value); // Create new function and initialize mlir::FunctionType funcType = builder.getFunctionType( mlir::TypeRange(inputs.getArrayRef()), mlir::TypeRange()); std::string parentName(parentFunc.getName()); std::string funcName = getOutlinedFnName(parentName, count); - auto loc = targetOp.getLoc(); + mlir::Location loc = targetOp.getLoc(); mlir::func::FuncOp newFunc = mlir::func::FuncOp::create(loc, funcName, funcType); mlir::Block *entryBlock = newFunc.addEntryBlock(); @@ -175,11 +176,11 @@ // however, cloning across the minimum for the moment to avoid // optimisations breaking segments of the lowering seems prudent as this // was the original intent of the pass. - for (auto oper : targetOp.getOperation()->getOperands()) { + for (mlir::Value oper : targetOp->getOperands()) { if (auto mapEntry = mlir::dyn_cast(oper.getDefiningOp())) { mlir::IRMapping mapInfoMap; - for (auto bound : mapEntry.getBounds()) { + for (mlir::Value bound : mapEntry.getBounds()) { if (auto mapEntryBound = mlir::dyn_cast( bound.getDefiningOp())) { mapInfoMap.map(bound, cloneArgAndChildren(builder, mapEntryBound, diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -29,8 +29,11 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/FileSystem.h" +#include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/ModuleUtils.h" + #include +#include using namespace mlir; @@ -1507,6 +1510,104 @@ return 0; } +static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind +convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) { + switch (deviceClause) { + case mlir::omp::DeclareTargetDeviceType::host: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; + break; + case mlir::omp::DeclareTargetDeviceType::nohost: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; + break; + case mlir::omp::DeclareTargetDeviceType::any: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; + break; + default: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; + break; + } +} + +static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind +convertToCaptureClauseKind( + mlir::omp::DeclareTargetCaptureClause captureClasue) { + switch (captureClasue) { + case mlir::omp::DeclareTargetCaptureClause::to: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; + break; + case mlir::omp::DeclareTargetCaptureClause::link: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; + break; + default: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; + break; + } +} + +static llvm::SmallString<64> +getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp, + llvm::OpenMPIRBuilder &ompBuilder) { + llvm::SmallString<64> suffix; + llvm::raw_svector_ostream os(suffix); + if (globalOp.getVisibility() == mlir::SymbolTable::Visibility::Private) { + auto loc = globalOp->getLoc()->findInstanceOf(); + auto fileInfoCallBack = [&loc]() { + return std::pair( + llvm::StringRef(loc.getFilename()), loc.getLine()); + }; + + os << llvm::format( + "_%x", ompBuilder.getTargetEntryUniqueInfo(fileInfoCallBack).FileID); + } + os << "_decl_tgt_ref_ptr"; + + return suffix; +} + +// Returns the reference pointer generated by the lowering of the declare target +// operation in cases where the link clause is used or the to clause is used in +// USM mode. +static llvm::Value * +getRefPtrIfDeclareTarget(mlir::Value value, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + // An easier way to do this may just be to keep track of any pointer + // references and their mapping to their respective operation + if (auto addressOfOp = + llvm::dyn_cast_if_present(value.getDefiningOp())) { + if (auto gOp = llvm::dyn_cast_or_null( + addressOfOp->getParentOfType().lookupSymbol( + addressOfOp.getGlobalName()))) { + + if (auto declareTargetGlobal = + llvm::dyn_cast( + gOp.getOperation())) { + + // In this case, we must utilise the reference pointer generated by the + // declare target operation, similar to Clang + if ((declareTargetGlobal.getDeclareTargetCaptureClause() == + mlir::omp::DeclareTargetCaptureClause::link) || + (declareTargetGlobal.getDeclareTargetCaptureClause() == + mlir::omp::DeclareTargetCaptureClause::to && + ompBuilder->Config.hasRequiresUnifiedSharedMemory())) { + llvm::SmallString<64> suffix = + getDeclareTargetRefPtrSuffix(gOp, *ompBuilder); + + if (gOp.getSymName().contains(suffix)) + return moduleTranslation.getLLVMModule()->getNamedValue( + gOp.getSymName()); + + return moduleTranslation.getLLVMModule()->getNamedValue( + (gOp.getSymName().str() + suffix.str()).str()); + } + } + } + } + + return nullptr; +} + // Generate all map related information and fill the combinedInfo. static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, @@ -1516,7 +1617,7 @@ const ArrayAttr &mapTypes, const SmallVector &devPtrOperands = {}, const SmallVector &devAddrOperands = {}, - bool IsTargetParams = false) { + bool isTargetParams = false) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto fail = [&combinedInfo]() -> void { @@ -1543,31 +1644,40 @@ // Unlike dev_ptr and dev_addr operands these map operands point // to a map entry operation which contains further information // on the variable being mapped and how it should be mapped. - auto MapInfoOp = + auto mapInfoOp = mlir::dyn_cast(mapOp.getDefiningOp()); // TODO: Only LLVMPointerTypes are handled. - if (!MapInfoOp.getType().isa()) + if (!mapInfoOp.getType().isa()) return fail(); llvm::Value *mapOpValue = - moduleTranslation.lookupValue(MapInfoOp.getVarPtr()); - combinedInfo.BasePointers.emplace_back(mapOpValue); + moduleTranslation.lookupValue(mapInfoOp.getVarPtr()); + + llvm::Value *refPtr = + getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(), moduleTranslation); + + combinedInfo.BasePointers.emplace_back(refPtr ? refPtr : mapOpValue); combinedInfo.Pointers.emplace_back(mapOpValue); combinedInfo.DevicePointers.emplace_back( llvm::OpenMPIRBuilder::DeviceInfoTy::None); combinedInfo.Names.emplace_back(LLVM::createMappingInformation( - MapInfoOp.getVarPtr().getLoc(), *ompBuilder)); + mapInfoOp.getVarPtr().getLoc(), *ompBuilder)); + + auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags( + mapTypes[index].cast().getUInt()); - combinedInfo.Types.emplace_back( - llvm::omp::OpenMPOffloadMappingFlags( - mapTypes[index].dyn_cast().getUInt()) | - (IsTargetParams - ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM - : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE)); + // Declare Target Mappings are excluded from being marked as + // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're marked + // with OMP_MAP_PTR_AND_OBJ instead. + if (refPtr) + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; + else if (isTargetParams) + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + combinedInfo.Types.emplace_back(mapFlag); combinedInfo.Sizes.emplace_back( - builder.getInt64(getSizeInBytes(DL, MapInfoOp.getVarPtr().getType()))); + builder.getInt64(getSizeInBytes(DL, mapInfoOp.getVarPtr().getType()))); index++; } @@ -1856,6 +1966,37 @@ return true; } +static void +handleDeclareTargetMapVar(llvm::ArrayRef mapOperands, + LLVM::ModuleTranslation &moduleTranslation, + llvm::IRBuilderBase &builder) { + for (const mlir::Value &mapOp : mapOperands) { + auto mapInfoOp = + mlir::dyn_cast(mapOp.getDefiningOp()); + llvm::Value *mapOpValue = + moduleTranslation.lookupValue(mapInfoOp.getVarPtr()); + if (auto *declareTarget = getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(), + moduleTranslation)) { + // The user's iterator will get invalidated if we modify an element, + // so we populate this vector of uses to alter each user on an individual + // basis to emit its own load (rather than one load for all). + llvm::SmallVector userVec; + for (llvm::User *user : mapOpValue->users()) + userVec.push_back(user); + + for (llvm::User *user : userVec) { + if (auto *insn = dyn_cast(user)) { + auto *load = builder.CreateLoad( + moduleTranslation.convertType(mapInfoOp.getVarPtr().getType()), + declareTarget); + load->moveBefore(insn); + user->replaceUsesOfWith(mapOpValue, load); + } + } + } + } +} + static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -1866,13 +2007,25 @@ auto targetOp = cast(opInst); auto &targetRegion = targetOp.getRegion(); + // This function filters out kernel data that will not show up as kernel + // input arguments to the generated kernel function but will still need + // explicitly mapped through supplying information to the OpenMP runtime + // (declare target). It also prepares some data used for generating the + // kernel and populating the associated OpenMP runtime data structures. + auto getKernelArguments = + [&](const llvm::SetVector &operandSet, + llvm::SmallVectorImpl &llvmInputs) { + for (Value operand : operandSet) { + if (!getRefPtrIfDeclareTarget(operand, moduleTranslation)) + llvmInputs.push_back(moduleTranslation.lookupValue(operand)); + } + }; + llvm::SetVector operandSet; getUsedValuesDefinedAbove(targetRegion, operandSet); - // Collect the input arguments. llvm::SmallVector inputs; - for (Value operand : operandSet) - inputs.push_back(moduleTranslation.lookupValue(operand)); + getKernelArguments(operandSet, inputs); LogicalResult bodyGenStatus = success(); @@ -1939,18 +2092,24 @@ ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams, defaultValThreads, inputs, genMapInfoCB, bodyCB)); + // Remap access operations to declare target reference pointers for the + // device, essentially generating extra loadop's as necessary + if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) { + SmallVector mapOperands = targetOp.getMapOperands(); + handleDeclareTargetMapVar(llvm::ArrayRef(mapOperands), moduleTranslation, + builder); + } return bodyGenStatus; } static LogicalResult -convertDeclareTargetAttr(Operation *op, - omp::DeclareTargetAttr declareTargetAttr, +convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, LLVM::ModuleTranslation &moduleTranslation) { // Amend omp.declare_target by deleting the IR of the outlined functions // created for target regions. They cannot be filtered out from MLIR earlier - // because the omp.target operation inside must be translated to LLVM, but the - // wrapper functions themselves must not remain at the end of the process. - // We know that functions where omp.declare_target does not match + // because the omp.target operation inside must be translated to LLVM, but + // the wrapper functions themselves must not remain at the end of the + // process. We know that functions where omp.declare_target does not match // omp.is_target_device at this stage can only be wrapper functions because // those that aren't are removed earlier as an MLIR transformation pass. if (FunctionOpInterface funcOp = dyn_cast(op)) { @@ -1960,7 +2119,8 @@ return success(); omp::DeclareTargetDeviceType declareType = - declareTargetAttr.getDeviceType().getValue(); + attribute.getDeviceType().getValue(); + if (declareType == omp::DeclareTargetDeviceType::host) { llvm::Function *llvmFunc = moduleTranslation.lookupFunction(funcOp.getName()); @@ -1968,7 +2128,77 @@ llvmFunc->eraseFromParent(); } } + return success(); + } + + if (LLVM::GlobalOp gOp = dyn_cast(op)) { + llvm::Module *llvmModule = moduleTranslation.getLLVMModule(); + if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + bool isDeclaration = gOp.isDeclaration(); + bool isExternallyVisible = + gOp.getVisibility() != mlir::SymbolTable::Visibility::Private; + auto loc = op->getLoc()->findInstanceOf(); + llvm::StringRef mangledName = gOp.getSymName(); + auto captureClause = + convertToCaptureClauseKind(attribute.getCaptureClause().getValue()); + auto deviceClause = + convertToDeviceClauseKind(attribute.getDeviceType().getValue()); + // unused for MLIR at the moment, required in Clang for book + // keeping + std::vector generatedRefs; + + std::vector targetTriple; + auto targetTripleAttr = + op->getParentOfType() + ->getAttr(LLVM::LLVMDialect::getTargetTripleAttrName()) + .dyn_cast_or_null(); + if (targetTripleAttr) + targetTriple.emplace_back(targetTripleAttr.data()); + + auto fileInfoCallBack = [&loc]() { + std::string filename = ""; + std::uint64_t lineNo = 0; + + if (loc) { + filename = loc.getFilename().str(); + lineNo = loc.getLine(); + } + + return std::pair(llvm::StringRef(filename), + lineNo); + }; + + ompBuilder->registerTargetGlobalVariable( + captureClause, deviceClause, isDeclaration, isExternallyVisible, + ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName, + generatedRefs, /*OpenMPSimd*/ false, targetTriple, + /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr, + gVal->getType(), gVal); + + if (ompBuilder->Config.isTargetDevice() && + (attribute.getCaptureClause().getValue() != + mlir::omp::DeclareTargetCaptureClause::to || + ompBuilder->Config.hasRequiresUnifiedSharedMemory())) { + ompBuilder->getAddrOfDeclareTargetVar( + captureClause, deviceClause, isDeclaration, isExternallyVisible, + ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName, + generatedRefs, /*OpenMPSimd*/ false, targetTriple, gVal->getType(), + /*GlobalInitializer*/ nullptr, + /*VariableLinkage*/ nullptr); + // A global has already been generated by this stage, unlike Clang, so + // this needs to be specially removed here for device when we're + // anything but a To clause specified variable with no unified shared + // memory. + if (llvm::GlobalValue *llvmVal = + llvmModule->getNamedValue(mangledName)) { + llvmVal->removeFromParent(); + llvmVal->dropAllReferences(); + } + } + } } + return success(); } diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir @@ -0,0 +1,33 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// This test the generation of additional load operations for declare target link variables +// inside of target op regions when lowering to IR for device. Unfortunately as the host file is not +// passed as a module attribute, we miss out on the metadata and entryinfo. +// +// Unfortunately, only so much can be tested as the device side is dependent on a *.bc +// file created by the host and appended as an attribute to the module. + +module attributes {omp.is_target_device = true} { + // CHECK-DAG: @_QMtest_0Esp_decl_tgt_ref_ptr = weak global ptr null, align 8 + llvm.mlir.global external @_QMtest_0Esp() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %0 : i32 + } + + llvm.func @_QQmain() attributes {} { + %0 = llvm.mlir.addressof @_QMtest_0Esp : !llvm.ptr + + // CHECK-DAG: omp.target: ; preds = %user_code.entry + // CHECK-DAG: %1 = load ptr, ptr @_QMtest_0Esp_decl_tgt_ref_ptr, align 8 + // CHECK-DAG: store i32 1, ptr %1, align 4 + // CHECK-DAG: br label %omp.region.cont + %map = omp.map_info var_ptr(%0 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target map_entries(%map : !llvm.ptr) { + %1 = llvm.mlir.constant(1 : i32) : i32 + llvm.store %1, %0 : !llvm.ptr + omp.terminator + } + + llvm.return + } +} diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir @@ -0,0 +1,140 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// CHECK-DAG: %struct.__tgt_offload_entry = type { ptr, ptr, i64, i32, i32 } +// CHECK-DAG: !omp_offload.info = !{!{{.*}}} +module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_target_device = false} { + + // CHECK-DAG: @_QMtest_0Earray_1d = global [3 x i32] [i32 1, i32 2, i32 3] + // CHECK-DAG: @_QMtest_0Earray_1d_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Earray_1d + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Earray_1d_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Earray_1d_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Earray_1d_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Earray_1d_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Earray_1d(dense<[1, 2, 3]> : tensor<3xi32>) {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : !llvm.array<3 x i32> + + // CHECK-DAG: @_QMtest_0Earray_2d = global [2 x [2 x i32]] {{.*}} + // CHECK-DAG: @_QMtest_0Earray_2d_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Earray_2d + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Earray_2d_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Earray_2d_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Earray_2d_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Earray_2d_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Earray_2d() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : !llvm.array<2 x array<2 x i32>> { + %0 = llvm.mlir.undef : !llvm.array<2 x array<2 x i32>> + %1 = llvm.mlir.constant(1 : i32) : i32 + %2 = llvm.insertvalue %1, %0[0, 0] : !llvm.array<2 x array<2 x i32>> + %3 = llvm.mlir.constant(2 : i32) : i32 + %4 = llvm.insertvalue %3, %2[0, 1] : !llvm.array<2 x array<2 x i32>> + %5 = llvm.mlir.constant(3 : i32) : i32 + %6 = llvm.insertvalue %5, %4[1, 0] : !llvm.array<2 x array<2 x i32>> + %7 = llvm.mlir.constant(4 : i32) : i32 + %8 = llvm.insertvalue %7, %6[1, 1] : !llvm.array<2 x array<2 x i32>> + %9 = llvm.mlir.constant(2 : index) : i64 + %10 = llvm.mlir.constant(2 : index) : i64 + llvm.return %8 : !llvm.array<2 x array<2 x i32>> + } + + // CHECK-DAG: @_QMtest_0Edata_extended_link_1 = global float 2.000000e+00 + // CHECK-DAG: @_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Edata_extended_link_1 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [48 x i8] c"_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_extended_link_1() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { + %0 = llvm.mlir.constant(2.000000e+00 : f32) : f32 + llvm.return %0 : f32 + } + + // CHECK-DAG: @_QMtest_0Edata_extended_link_2 = global float 3.000000e+00 + // CHECK-DAG: @_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Edata_extended_link_2 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [48 x i8] c"_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_extended_link_2() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { + %0 = llvm.mlir.constant(3.000000e+00 : f32) : f32 + llvm.return %0 : f32 + } + + // CHECK-DAG: @_QMtest_0Edata_extended_to_1 = global float 2.000000e+00 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [29 x i8] c"_QMtest_0Edata_extended_to_1\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_to_1 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_to_1, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_to_1", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_extended_to_1() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { + %0 = llvm.mlir.constant(2.000000e+00 : f32) : f32 + llvm.return %0 : f32 + } + + // CHECK-DAG: @_QMtest_0Edata_extended_to_2 = global float 3.000000e+00 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [29 x i8] c"_QMtest_0Edata_extended_to_2\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_to_2 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_to_2, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_to_2", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_extended_to_2() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { + %0 = llvm.mlir.constant(3.000000e+00 : f32) : f32 + llvm.return %0 : f32 + } + + // CHECK-DAG: @_QMtest_0Edata_int = global i32 1 + // CHECK-DAG: @_QMtest_0Edata_int_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Edata_int + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Edata_int_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_int_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_int_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_int() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(10 : i32) : i32 + llvm.return %0 : i32 + } + + // CHECK-DAG: @_QMtest_0Edata_int_clauseless = global i32 1 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [30 x i8] c"_QMtest_0Edata_int_clauseless\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_int_clauseless = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_clauseless, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_int_clauseless", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_int_clauseless() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 + } + + // CHECK-DAG: @_QMtest_0Edata_int_to = global i32 5 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [22 x i8] c"_QMtest_0Edata_int_to\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_int_to = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_to, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_int_to", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_int_to() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(5 : i32) : i32 + llvm.return %0 : i32 + } + + // CHECK-DAG: @_QMtest_0Ept1 = global { ptr, i64, i32, i8, i8, i8, i8 } { ptr null, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20180515, i8 0, i8 9, i8 1, i8 0 } + // CHECK-DAG: @_QMtest_0Ept1_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Ept1 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [31 x i8] c"_QMtest_0Ept1_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Ept1_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Ept1_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Ept1_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Ept1() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> { + %0 = llvm.mlir.null : !llvm.ptr + %1 = llvm.mlir.constant(9 : i32) : i32 + %2 = llvm.mlir.null : !llvm.ptr + %3 = llvm.getelementptr %2[1] : (!llvm.ptr) -> !llvm.ptr + %4 = llvm.ptrtoint %3 : !llvm.ptr to i64 + %5 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %6 = llvm.insertvalue %4, %5[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %7 = llvm.mlir.constant(20180515 : i32) : i32 + %8 = llvm.insertvalue %7, %6[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %9 = llvm.mlir.constant(0 : i32) : i32 + %10 = llvm.trunc %9 : i32 to i8 + %11 = llvm.insertvalue %10, %8[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %12 = llvm.trunc %1 : i32 to i8 + %13 = llvm.insertvalue %12, %11[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %14 = llvm.mlir.constant(1 : i32) : i32 + %15 = llvm.trunc %14 : i32 to i8 + %16 = llvm.insertvalue %15, %13[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %17 = llvm.mlir.constant(0 : i32) : i32 + %18 = llvm.trunc %17 : i32 to i8 + %19 = llvm.insertvalue %18, %16[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %20 = llvm.bitcast %0 : !llvm.ptr to !llvm.ptr + %21 = llvm.insertvalue %20, %19[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.return %21 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + } + + // CHECK-DAG: @_QMtest_0Ept2_tar = global i32 5 + // CHECK-DAG: @_QMtest_0Ept2_tar_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Ept2_tar + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [35 x i8] c"_QMtest_0Ept2_tar_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Ept2_tar_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Ept2_tar_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Ept2_tar_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Ept2_tar() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(5 : i32) : i32 + llvm.return %0 : i32 + } +} diff --git a/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90 b/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90 new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/declare-target-array-in-target-region.f90 @@ -0,0 +1,34 @@ +! Offloading test with a target region mapping a declare target +! Fortran array writing some values to it and checking the host +! correctly receives the updates made on the device. +! REQUIRES: flang, amdgcn-amd-amdhsa +! UNSUPPORTED: nvptx64-nvidia-cuda +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +module test_0 + implicit none + INTEGER :: sp(10) = (/0,0,0,0,0,0,0,0,0,0/) + !$omp declare target link(sp) +end module test_0 + +program main + use test_0 + integer :: i = 1 + integer :: j = 11 +!$omp target map(tofrom:sp, i, j) + do while (i <= j) + sp(i) = i; + i = i + 1 + end do +!$omp end target + +PRINT *, sp(:) + +end program + +! CHECK: 1 2 3 4 5 6 7 8 9 10