diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -790,8 +790,31 @@ TODO(currentLocation, "OMPD_target_data MapOperand BoxType"); }; + // Ref pointers are used rather than direct access when we + // map a declare target link variable or declare target to + // with USM mode. + auto requiresReference = [&firOpBuilder](const mlir::Value &mapOp) { + auto *op = mapOp.getDefiningOp(); + if (auto addrOp = dyn_cast(op)) { + op = firOpBuilder.getModule().lookupSymbol(addrOp.getSymbol()); + } + + // TODO: Add To Clause+USM mode case when we have some method of + // enabling USM in the frontend and getting this information + if (auto declareTargetGlobal = + llvm::dyn_cast(op)) { + if (declareTargetGlobal.isDeclareTarget() && + ((declareTargetGlobal.getDeclareTargetCaptureClause() == + mlir::omp::DeclareTargetCaptureClause::link))) { + return true; + } + } + + return false; + }; + auto addMapClause = [&](const auto &mapClause, - mlir::Location ¤tLocation) { + mlir::Location ¤tLocation) { auto mapType = std::get( std::get>(mapClause->v.t) ->t); @@ -826,12 +849,6 @@ // TODO: Add support MapTypeModifiers close, mapper, present, iterator - mlir::IntegerAttr mapTypeAttr = firOpBuilder.getIntegerAttr( - firOpBuilder.getI64Type(), - static_cast< - std::underlying_type_t>( - mapTypeBits)); - llvm::SmallVector mapOperand; /// Check for unsupported map operand types. for (const Fortran::parser::OmpObject &ompObject : @@ -847,6 +864,24 @@ for (mlir::Value mapOp : mapOperand) { checkType(mapOp.getLoc(), mapOp.getType()); + llvm::omp::OpenMPOffloadMappingFlags perValMapTypeBit = mapTypeBits; + + // TODO: Clang special cases this for several other cases (member + // references as one example), see getMapTypeBits inside of + // generateInfoForComponentList in Clang's CGOpenMPRuntime for + // reference. We only support the declare target link variation + // at the moment. + bool requiresRef = requiresReference(mapOp); + if (requiresRef) + perValMapTypeBit |= + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; + + mlir::IntegerAttr mapTypeAttr = firOpBuilder.getIntegerAttr( + firOpBuilder.getI64Type(), + static_cast< + std::underlying_type_t>( + perValMapTypeBit)); + mapOperands.push_back(mapOp); mapTypes.push_back(mapTypeAttr); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -27,6 +27,9 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/FileSystem.h" +#include "llvm/TargetParser/Triple.h" + +#include using namespace mlir; @@ -1368,6 +1371,74 @@ return 0; } +// Returns the reference pointer generated by the lowering of the declare target +// operation in cases where the link clause is used or the to clause is used in +// USM mode. An alternative approach to this function may be to maintain these +// references in a vector or map to the original MLIR op stored in +// ModuleTranslation or a construct similar to OpenMPVarMappingStackFrame. +static llvm::Value * +getRefPtrIfDeclareTarget(mlir::Value const &value, + LLVM::ModuleTranslation &moduleTranslation) { + if (!value.getDefiningOp()) + return nullptr; + + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + if (isa(value.getDefiningOp())) { + LLVM::AddressOfOp addressOfOp = + dyn_cast(value.getDefiningOp()); + LLVM::GlobalOp gOp = dyn_cast( + addressOfOp->getParentOfType().lookupSymbol( + addressOfOp.getGlobalName())); + + if (auto declareTargetGlobal = + llvm::dyn_cast( + gOp.getOperation())) { + // In this case, we must utilise the reference pointer generated by the + // declare target operation, similar to Clang + if (declareTargetGlobal.isDeclareTarget() && + ((declareTargetGlobal.getDeclareTargetCaptureClause() == + mlir::omp::DeclareTargetCaptureClause::link) || + (declareTargetGlobal.getDeclareTargetCaptureClause() == + mlir::omp::DeclareTargetCaptureClause::to && + ompBuilder->Config.hasRequiresUnifiedSharedMemory()))) { + llvm::SmallString<64> suffix; + { + llvm::raw_svector_ostream os(suffix); + if (gOp.getVisibility() == mlir::SymbolTable::Visibility::Private) { + auto loc = gOp->getLoc()->findInstanceOf(); + auto fileInfoCallBack = [&]() { + llvm::StringRef filename = loc.getFilename(); + // handle split file from mlir-translate that for some reason + // manipulates the string to contain other components than + // filename + if (filename.contains("within split at")) { + filename.consume_front("within split at "); + filename = filename.rsplit(":").first; + } + + return std::pair(filename, loc.getLine()); + }; + + os << llvm::format( + "_%x", + ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack).FileID); + } + os << "_decl_tgt_ref_ptr"; + } + + if (gOp.getSymName().contains(suffix)) + return moduleTranslation.getLLVMModule()->getNamedValue( + gOp.getSymName()); + + return moduleTranslation.getLLVMModule()->getNamedValue( + (gOp.getSymName().str() + suffix.str()).str()); + } + } + } + + return nullptr; +} + static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &DL, @@ -1389,7 +1460,12 @@ } llvm::Value *mapOpValue = moduleTranslation.lookupValue(mapOp); - combinedInfo.BasePointers.emplace_back(mapOpValue); + + if (auto *refPtr = getRefPtrIfDeclareTarget(mapOp, moduleTranslation)) + combinedInfo.BasePointers.emplace_back(refPtr); + else + combinedInfo.BasePointers.emplace_back(mapOpValue); + combinedInfo.Pointers.emplace_back(mapOpValue); combinedInfo.Names.emplace_back( mlir::LLVM::createMappingInformation(mapOp.getLoc(), *ompBuilder)); @@ -1612,6 +1688,33 @@ return true; } +static void +handleDeclareTargetMapVar(llvm::SmallVector &mapOperands, + LLVM::ModuleTranslation &moduleTranslation, + llvm::IRBuilderBase &builder) { + for (const auto &mapOp : mapOperands) { + llvm::Value *mapOpValue = moduleTranslation.lookupValue(mapOp); + if (auto *declareTarget = + getRefPtrIfDeclareTarget(mapOp, moduleTranslation)) { + // The users iterator will get invalidated if we modify an element, + // so we populate this vector of uses to alter each user on an individual + // basis to emit it's own load (rather than one load for all). + llvm::SmallVector userVec; + for (llvm::User *user : mapOpValue->users()) + userVec.push_back(user); + + for (auto *user : userVec) { + if (auto *insn = dyn_cast(user)) { + auto *load = builder.CreateLoad( + moduleTranslation.convertType(mapOp.getType()), declareTarget); + load->moveBefore(insn); + user->replaceUsesOfWith(mapOpValue, load); + } + } + } + } +} + static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -1622,13 +1725,29 @@ auto targetOp = cast(opInst); auto &targetRegion = targetOp.getRegion(); + // FIXME: When explicit map operands are handled they need to be incorporated + // into this, to filter out declare target from input arguments to the kernel. + // At the moment this captures anything used in the kernel implicitly, + // however, some will explicitly be defined in the map operands. + auto getKernelArguments = [&](const llvm::SetVector &operandSet, + llvm::SmallVector &llvmInputs, + llvm::SmallVector &mlirInputs) { + for (Value operand : operandSet) { + if (getRefPtrIfDeclareTarget(operand, moduleTranslation)) { + continue; + } + + llvmInputs.push_back(moduleTranslation.lookupValue(operand)); + mlirInputs.push_back(operand); + } + }; + llvm::SetVector operandSet; getUsedValuesDefinedAbove(targetRegion, operandSet); - // Collect the input arguments. llvm::SmallVector inputs; - for (Value operand : operandSet) - inputs.push_back(moduleTranslation.lookupValue(operand)); + llvm::SmallVector mlirInputs; + getKernelArguments(operandSet, inputs, mlirInputs); LogicalResult bodyGenStatus = success(); @@ -1656,9 +1775,123 @@ ompLoc, builder.saveIP(), entryInfo, defaultValTeams, defaultValThreads, inputs, bodyCB)); + // Remap access operations to declare target reference pointers for the + // device, essentially generating extra loadop's as neccesary + if (moduleTranslation.getOpenMPBuilder()->Config.isEmbedded()) { + SmallVector mapOperands = targetOp.getMapOperands(); + handleDeclareTargetMapVar(mapOperands, moduleTranslation, builder); + } return bodyGenStatus; } +LogicalResult +convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, + LLVM::ModuleTranslation &moduleTranslation) { + // do a return for functions at the moment, may need specialised lowering + // later to optimise but for the moment they execute on device. + if (LLVM::LLVMFuncOp gOp = dyn_cast(op)) + return success(); + + auto convertToDeviceClauseKind = [](mlir::omp::DeclareTargetAttr attr) { + switch (attr.getDeviceType().getValue()) { + case mlir::omp::DeclareTargetDeviceType::host: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; + break; + case mlir::omp::DeclareTargetDeviceType::nohost: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; + break; + case mlir::omp::DeclareTargetDeviceType::any: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; + break; + default: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; + break; + } + }; + + auto convertToCaptureClauseKind = [](mlir::omp::DeclareTargetAttr attr) { + switch (attr.getCaptureClause().getValue()) { + case mlir::omp::DeclareTargetCaptureClause::to: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; + break; + case mlir::omp::DeclareTargetCaptureClause::link: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; + break; + default: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; + break; + } + }; + + if (LLVM::GlobalOp gOp = dyn_cast(op)) { + llvm::Module *llvmModule = moduleTranslation.getLLVMModule(); + if (auto *gVal = llvmModule->getNamedValue(gOp.getSymName())) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + bool isDeclaration = gOp.isDeclaration(); + bool isExternallyVisible = + gVal->getVisibility() != + llvm::GlobalValue::VisibilityTypes::HiddenVisibility; + llvm::StringRef mangledName = gOp.getSymName(); + auto captureClause = convertToCaptureClauseKind(attribute); + auto deviceClause = convertToDeviceClauseKind(attribute); + // unused for MLIR at the moment, required in Clang for book + // keeping + std::vector generatedRefs; + + std::vector targetTriple; + auto targetTripleAttr = + op->getParentOfType().getOperation()->getAttr( + LLVM::LLVMDialect::getTargetTripleAttrName()); + if (targetTripleAttr) + targetTriple.emplace_back( + targetTripleAttr.dyn_cast_or_null().data()); + + auto loc = op->getLoc()->findInstanceOf(); + + auto fileInfoCallBack = [&]() { + llvm::StringRef filename = loc.getFilename(); + // handle split file from mlir-translate that for some reason + // manipulates the string to contain other components than + // filename + if (filename.contains("within split at")) { + filename.consume_front("within split at "); + filename = filename.rsplit(":").first; + } + + return std::pair(filename, loc.getLine()); + }; + + ompBuilder->registerTargetGlobalVariable( + captureClause, deviceClause, isDeclaration, isExternallyVisible, + ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName, + generatedRefs, false, targetTriple, nullptr, nullptr, gVal->getType(), + gVal); + + if (ompBuilder->Config.isEmbedded() && + (attribute.getCaptureClause().getValue() != + mlir::omp::DeclareTargetCaptureClause::to || + ompBuilder->Config.hasRequiresUnifiedSharedMemory())) { + ompBuilder->getAddrOfDeclareTargetVar( + captureClause, deviceClause, isDeclaration, isExternallyVisible, + ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack), mangledName, + generatedRefs, false, targetTriple, gVal->getType(), + nullptr, nullptr); + // A global has already been generated by this stage, unlike Clang, so + // this needs to be specially removed here for device when we're + // anything but a To clause specified variable with no unified shared + // memory. + if (llvm::GlobalValue *llvmVal = + llvmModule->getNamedValue(mangledName)) { + llvmVal->removeFromParent(); + llvmVal->dropAllReferences(); + } + } + } + } + + return success(); +} + namespace { /// Implementation of the dialect interface that converts operations belonging @@ -1698,6 +1931,9 @@ versionAttr.getVersion()); return success(); }) + .Case([&](mlir::omp::DeclareTargetAttr dtAttr) { + return convertDeclareTargetAttr(op, dtAttr, moduleTranslation); + }) .Default([&](Attribute attr) { // fall through for omp attributes that do not require lowering and/or // have no concrete definition and thus no type to define a case on diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm.mlir @@ -0,0 +1,174 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +// NOTE: This test unfortunately only checks the host side, as the device side is dependent +// on a *.bc file created by the host and appended as an attribute to the module. + +// CHECK-DAG: %struct.__tgt_offload_entry = type { ptr, ptr, i64, i32, i32 } +// CHECK-DAG: !omp_offload.info = !{!{{.*}}} +module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_device = false} { + + // CHECK-DAG: @_QMtest_0Earray_1d = global [3 x i32] [i32 1, i32 2, i32 3] + // CHECK-DAG: @_QMtest_0Earray_1d_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Earray_1d + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Earray_1d_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Earray_1d_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Earray_1d_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Earray_1d_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Earray_1d(dense<[1, 2, 3]> : tensor<3xi32>) {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : !llvm.array<3 x i32> + + // CHECK-DAG: @_QMtest_0Earray_2d = global [2 x [2 x i32]] {{.*}} + // CHECK-DAG: @_QMtest_0Earray_2d_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Earray_2d + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Earray_2d_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Earray_2d_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Earray_2d_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Earray_2d_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Earray_2d() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : !llvm.array<2 x array<2 x i32>> { + %0 = llvm.mlir.undef : !llvm.array<2 x array<2 x i32>> + %1 = llvm.mlir.constant(1 : i32) : i32 + %2 = llvm.insertvalue %1, %0[0, 0] : !llvm.array<2 x array<2 x i32>> + %3 = llvm.mlir.constant(2 : i32) : i32 + %4 = llvm.insertvalue %3, %2[0, 1] : !llvm.array<2 x array<2 x i32>> + %5 = llvm.mlir.constant(3 : i32) : i32 + %6 = llvm.insertvalue %5, %4[1, 0] : !llvm.array<2 x array<2 x i32>> + %7 = llvm.mlir.constant(4 : i32) : i32 + %8 = llvm.insertvalue %7, %6[1, 1] : !llvm.array<2 x array<2 x i32>> + %9 = llvm.mlir.constant(2 : index) : i64 + %10 = llvm.mlir.constant(2 : index) : i64 + llvm.return %8 : !llvm.array<2 x array<2 x i32>> + } + + // CHECK-DAG: @_QMtest_0Edata_extended_link_1 = global float 2.000000e+00 + // CHECK-DAG: @_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Edata_extended_link_1 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [48 x i8] c"_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_extended_link_1() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { + %0 = llvm.mlir.constant(2.000000e+00 : f32) : f32 + llvm.return %0 : f32 + } + + // CHECK-DAG: @_QMtest_0Edata_extended_link_2 = global float 3.000000e+00 + // CHECK-DAG: @_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Edata_extended_link_2 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [48 x i8] c"_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_extended_link_2() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { + %0 = llvm.mlir.constant(3.000000e+00 : f32) : f32 + llvm.return %0 : f32 + } + + // CHECK-DAG: @_QMtest_0Edata_extended_to_1 = global float 2.000000e+00 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [29 x i8] c"_QMtest_0Edata_extended_to_1\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_to_1 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_to_1, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_to_1", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_extended_to_1() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { + %0 = llvm.mlir.constant(2.000000e+00 : f32) : f32 + llvm.return %0 : f32 + } + + // CHECK-DAG: @_QMtest_0Edata_extended_to_2 = global float 3.000000e+00 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [29 x i8] c"_QMtest_0Edata_extended_to_2\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_to_2 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_to_2, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_to_2", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_extended_to_2() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { + %0 = llvm.mlir.constant(3.000000e+00 : f32) : f32 + llvm.return %0 : f32 + } + + // CHECK-DAG: @_QMtest_0Edata_int = global i32 1 + // CHECK-DAG: @_QMtest_0Edata_int_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Edata_int + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Edata_int_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_int_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_int_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_int() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(10 : i32) : i32 + llvm.return %0 : i32 + } + + // CHECK-DAG: @_QMtest_0Edata_int_clauseless = global i32 1 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [30 x i8] c"_QMtest_0Edata_int_clauseless\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_int_clauseless = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_clauseless, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_int_clauseless", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_int_clauseless() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 + } + + // CHECK-DAG: @_QMtest_0Edata_int_to = global i32 5 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [22 x i8] c"_QMtest_0Edata_int_to\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_int_to = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_to, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_int_to", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Edata_int_to() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(5 : i32) : i32 + llvm.return %0 : i32 + } + + // CHECK-DAG: @_QMtest_0Ept1 = global { ptr, i64, i32, i8, i8, i8, i8 } { ptr null, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20180515, i8 0, i8 9, i8 1, i8 0 } + // CHECK-DAG: @_QMtest_0Ept1_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Ept1 + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [31 x i8] c"_QMtest_0Ept1_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Ept1_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Ept1_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Ept1_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Ept1() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> { + %0 = llvm.mlir.null : !llvm.ptr + %1 = llvm.mlir.constant(9 : i32) : i32 + %2 = llvm.mlir.null : !llvm.ptr + %3 = llvm.getelementptr %2[1] : (!llvm.ptr) -> !llvm.ptr + %4 = llvm.ptrtoint %3 : !llvm.ptr to i64 + %5 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %6 = llvm.insertvalue %4, %5[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %7 = llvm.mlir.constant(20180515 : i32) : i32 + %8 = llvm.insertvalue %7, %6[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %9 = llvm.mlir.constant(0 : i32) : i32 + %10 = llvm.trunc %9 : i32 to i8 + %11 = llvm.insertvalue %10, %8[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %12 = llvm.trunc %1 : i32 to i8 + %13 = llvm.insertvalue %12, %11[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %14 = llvm.mlir.constant(1 : i32) : i32 + %15 = llvm.trunc %14 : i32 to i8 + %16 = llvm.insertvalue %15, %13[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %17 = llvm.mlir.constant(0 : i32) : i32 + %18 = llvm.trunc %17 : i32 to i8 + %19 = llvm.insertvalue %18, %16[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %20 = llvm.bitcast %0 : !llvm.ptr to !llvm.ptr + %21 = llvm.insertvalue %20, %19[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.return %21 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + } + + // CHECK-DAG: @_QMtest_0Ept2_tar = global i32 5 + // CHECK-DAG: @_QMtest_0Ept2_tar_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Ept2_tar + // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [35 x i8] c"_QMtest_0Ept2_tar_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.omp_offloading.entry._QMtest_0Ept2_tar_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Ept2_tar_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Ept2_tar_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} + llvm.mlir.global external @_QMtest_0Ept2_tar() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(5 : i32) : i32 + llvm.return %0 : i32 + } +} + +// ----- + +// This test the generation of additional load operations for declare target link variables +// inside of target op regions when lowering to IR for device. Unfortunately as the host file is not +// passed as a module attribute, we miss out on the metadata and entryinfo, but it isn't neccessary +// to test the transformation in this case. + +module attributes {omp.is_device = true} { + // CHECK-DAG: @_QMtest_0Esp_decl_tgt_ref_ptr = weak global ptr null, align 8 + llvm.mlir.global external @_QMtest_0Esp() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %0 : i32 + } + + llvm.func @_QQmain() attributes {} { + %0 = llvm.mlir.addressof @_QMtest_0Esp : !llvm.ptr + + // CHECK-DAG: omp.target: ; preds = %user_code.entry + // CHECK-DAG: %1 = load ptr, ptr @_QMtest_0Esp_decl_tgt_ref_ptr, align 8 + // CHECK-DAG: store i32 1, ptr %1, align 4 + // CHECK-DAG: br label %omp.region.cont + omp.target map((tofrom -> %0 : !llvm.ptr)) { + %1 = llvm.mlir.constant(1 : i32) : i32 + llvm.store %1, %0 : !llvm.ptr + omp.terminator + } + + llvm.return + } +}