Index: lib/CodeGen/CGDeclCXX.cpp =================================================================== --- lib/CodeGen/CGDeclCXX.cpp +++ lib/CodeGen/CGDeclCXX.cpp @@ -74,7 +74,7 @@ // bails even if the attribute is not present. if (D.isNoDestroy(CGF.getContext())) return; - + CodeGenModule &CGM = CGF.CGM; // FIXME: __attribute__((cleanup)) ? Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -2295,15 +2295,22 @@ return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } -static Address emitDeclTargetLinkVarDeclLValue(CodeGenFunction &CGF, - const VarDecl *VD, QualType T) { +static Address emitDeclTargetVarDeclLValue(CodeGenFunction &CGF, + const VarDecl *VD, QualType T) { llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_To) + // Return an invalid address if variable is MT_To and unified + // memory is not enabled. For all other cases: MT_Link and + // MT_To with unified memory, return a valid address. + if (!Res || (*Res == OMPDeclareTargetDeclAttr::MT_To && + !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) return Address::invalid(); - assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && "Expected link clause"); + assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) && + "Expected link clause OR to clause with unified memory enabled."); QualType PtrTy = CGF.getContext().getPointerType(VD->getType()); - Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetClause(VD); return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs()); } @@ -2359,7 +2366,7 @@ // Check if the variable is marked as declare target with link clause in // device codegen. if (CGF.getLangOpts().OpenMPIsDevice) { - Address Addr = emitDeclTargetLinkVarDeclLValue(CGF, VD, T); + Address Addr = emitDeclTargetVarDeclLValue(CGF, VD, T); if (Addr.isValid()) return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -1121,8 +1121,8 @@ SourceLocation Loc); /// Returns the address of the variable marked as declare target with link - /// clause. - virtual Address getAddrOfDeclareTargetLink(const VarDecl *VD); + /// clause OR as declare target with to clause and unified memory. + virtual Address getAddrOfDeclareTargetClause(const VarDecl *VD); /// Emit a code for initialization of threadprivate variable. It emits /// a call to runtime library which adds initial value to the newly created Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -2552,16 +2552,18 @@ return CGM.CreateRuntimeFunction(FnTy, Name); } -Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { +Address CGOpenMPRuntime::getAddrOfDeclareTargetClause(const VarDecl *VD) { if (CGM.getLangOpts().OpenMPSimd) return Address::invalid(); llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { + if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory))) { SmallString<64> PtrName; { llvm::raw_svector_ostream OS(PtrName); - OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; + OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr"; } llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); if (!Ptr) { @@ -2778,7 +2780,9 @@ bool PerformInit) { Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory)) return CGM.getLangOpts().OpenMPIsDevice; VD = VD->getDefinition(CGM.getContext()); if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) @@ -4194,6 +4198,9 @@ CE->getFlags()); switch (Flags) { case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { + if (CGM.getLangOpts().OpenMPIsDevice && + CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) + continue; if (!CE->getAddress()) { unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, @@ -7452,7 +7459,10 @@ // Track if the map information being generated is the first for a capture. bool IsCaptureFirstInfo = IsFirstComponentList; - bool IsLink = false; // Is this variable a "declare target link"? + // When the variable is on a declare target link or in a to clause with + // unified memory, a reference is needed to hold the host/device address + // of the variable. + bool RequiresReference = false; // Scan the components from the base to the complete expression. auto CI = Components.rbegin(); @@ -7482,11 +7492,14 @@ if (const auto *VD = dyn_cast_or_null(I->getAssociatedDeclaration())) { if (llvm::Optional Res = - OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) - if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { - IsLink = true; - BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { + if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { + RequiresReference = true; + BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetClause(VD); } + } } // If the variable is a pointer and is being dereferenced (i.e. is not @@ -7652,7 +7665,8 @@ // (there is a set of entries for each capture). OpenMPOffloadMappingFlags Flags = getMapTypeBits( MapType, MapModifiers, IsImplicit, - !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); + !IsExpressionFirstInfo || RequiresReference, + IsCaptureFirstInfo && !RequiresReference); if (!IsExpressionFirstInfo) { // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, @@ -9124,7 +9138,9 @@ llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( cast(GD.getDecl())); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory)) { DeferredGlobalVariables.insert(cast(GD.getDecl())); return true; } @@ -9183,8 +9199,9 @@ StringRef VarName; CharUnits VarSize; llvm::GlobalValue::LinkageTypes Linkage; - switch (*Res) { - case OMPDeclareTargetDeclAttr::MT_To: + + if (*Res == OMPDeclareTargetDeclAttr::MT_To && + !HasRequiresUnifiedSharedMemory) { Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; VarName = CGM.getMangledName(VD); if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { @@ -9207,20 +9224,27 @@ CGM.addCompilerUsedGlobal(GVAddrRef); } } - break; - case OMPDeclareTargetDeclAttr::MT_Link: - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; + } else if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory)) { + if (*Res == OMPDeclareTargetDeclAttr::MT_Link) + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; + else + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; + if (CGM.getLangOpts().OpenMPIsDevice) { VarName = Addr->getName(); Addr = nullptr; } else { - VarName = getAddrOfDeclareTargetLink(VD).getName(); - Addr = cast(getAddrOfDeclareTargetLink(VD).getPointer()); + VarName = getAddrOfDeclareTargetClause(VD).getName(); + Addr = cast(getAddrOfDeclareTargetClause(VD).getPointer()); } VarSize = CGM.getPointerSize(); Linkage = llvm::GlobalValue::WeakAnyLinkage; - break; + } else { + llvm_unreachable("Declare target attribute must be to or link."); } + OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( VarName, Addr, VarSize, Flags, Linkage); } @@ -9239,12 +9263,15 @@ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) continue; - if (*Res == OMPDeclareTargetDeclAttr::MT_To) { + if (*Res == OMPDeclareTargetDeclAttr::MT_To && + !HasRequiresUnifiedSharedMemory) { CGM.EmitGlobal(VD); } else { - assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && - "Expected to or link clauses."); - (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory)) && + "Expected link clause or to clause with unified memory."); + (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetClause(VD); } } } Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -2475,13 +2475,19 @@ // Emit declaration of the must-be-emitted declare target variable. if (llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { - if (*Res == OMPDeclareTargetDeclAttr::MT_To) { + bool UnifiedMemoryEnabled = + getOpenMPRuntime().hasRequiresUnifiedSharedMemory(); + if (*Res == OMPDeclareTargetDeclAttr::MT_To && + !UnifiedMemoryEnabled) { (void)GetAddrOfGlobalVar(VD); } else { - assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && - "link claue expected."); - (void)getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + UnifiedMemoryEnabled)) && + "Link clause or to clause with unified memory expected."); + (void)getOpenMPRuntime().getAddrOfDeclareTargetClause(VD); } + return; } } Index: test/OpenMP/declare_target_codegen.cpp =================================================================== --- test/OpenMP/declare_target_codegen.cpp +++ test/OpenMP/declare_target_codegen.cpp @@ -20,10 +20,10 @@ // CHECK-DAG: weak constant %struct.__tgt_offload_entry { i8* bitcast (i32* @bbb to i8*), // CHECK-DAG: @ccc = external global i32, // CHECK-DAG: @ddd ={{ dso_local | }}global i32 0, -// CHECK-DAG: @hhh_decl_tgt_link_ptr = common global i32* null -// CHECK-DAG: @ggg_decl_tgt_link_ptr = common global i32* null -// CHECK-DAG: @fff_decl_tgt_link_ptr = common global i32* null -// CHECK-DAG: @eee_decl_tgt_link_ptr = common global i32* null +// CHECK-DAG: @hhh_decl_tgt_ref_ptr = common global i32* null +// CHECK-DAG: @ggg_decl_tgt_ref_ptr = common global i32* null +// CHECK-DAG: @fff_decl_tgt_ref_ptr = common global i32* null +// CHECK-DAG: @eee_decl_tgt_ref_ptr = common global i32* null // CHECK-DAG: @{{.*}}maini1{{.*}}aaa = internal global i64 23, // CHECK-DAG: @b ={{ dso_local | }}global i32 15, // CHECK-DAG: @d ={{ dso_local | }}global i32 0, Index: test/OpenMP/declare_target_link_codegen.cpp =================================================================== --- test/OpenMP/declare_target_link_codegen.cpp +++ test/OpenMP/declare_target_link_codegen.cpp @@ -18,15 +18,15 @@ #define HEADER // HOST-DAG: @c = external global i32, -// HOST-DAG: @c_decl_tgt_link_ptr = global i32* @c +// HOST-DAG: @c_decl_tgt_ref_ptr = global i32* @c // DEVICE-NOT: @c = -// DEVICE: @c_decl_tgt_link_ptr = common global i32* null +// DEVICE: @c_decl_tgt_ref_ptr = common global i32* null // HOST: [[SIZES:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 4] // HOST: [[MAPTYPES:@.+]] = private unnamed_addr constant [2 x i64] [i64 35, i64 531] -// HOST: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_link_ptr\00" -// HOST: @.omp_offloading.entry.c_decl_tgt_link_ptr = weak constant %struct.__tgt_offload_entry { i8* bitcast (i32** @c_decl_tgt_link_ptr to i8*), i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @.omp_offloading.entry_name, i32 0, i32 0), i64 8, i32 1, i32 0 }, section ".omp_offloading.entries", align 1 -// DEVICE-NOT: internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_link_ptr\00" -// CHECK: @llvm.used = appending global [1 x i8*] [i8* bitcast (i32** @c_decl_tgt_link_ptr to i8*)] +// HOST: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_ref_ptr\00" +// HOST: @.omp_offloading.entry.c_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { i8* bitcast (i32** @c_decl_tgt_ref_ptr to i8*), i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @.omp_offloading.entry_name, i32 0, i32 0), i64 8, i32 1, i32 0 }, section ".omp_offloading.entries", align 1 +// DEVICE-NOT: internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_ref_ptr\00" +// CHECK: @llvm.used = appending global [1 x i8*] [i8* bitcast (i32** @c_decl_tgt_ref_ptr to i8*)] extern int c; #pragma omp declare target link(c) @@ -44,7 +44,7 @@ } // DEVICE: define weak void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l[[@LINE-10]](i32* dereferenceable{{[^,]*}} -// DEVICE: [[C_REF:%.+]] = load i32*, i32** @c_decl_tgt_link_ptr, +// DEVICE: [[C_REF:%.+]] = load i32*, i32** @c_decl_tgt_ref_ptr, // DEVICE: [[C:%.+]] = load i32, i32* [[C_REF]], // DEVICE: store i32 [[C]], i32* % @@ -55,7 +55,7 @@ // HOST: getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // HOST: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASEPTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 // HOST: [[BP1_CAST:%.+]] = bitcast i8** [[BP1]] to i32*** -// HOST: store i32** @c_decl_tgt_link_ptr, i32*** [[BP1_CAST]], +// HOST: store i32** @c_decl_tgt_ref_ptr, i32*** [[BP1_CAST]], // HOST: [[P1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 // HOST: [[P1_CAST:%.+]] = bitcast i8** [[P1]] to i32** // HOST: store i32* @c, i32** [[P1_CAST]], @@ -69,5 +69,5 @@ // HOST: [[C:%.*]] = load i32, i32* @c, // HOST: store i32 [[C]], i32* % -// CHECK: !{i32 1, !"c_decl_tgt_link_ptr", i32 1, i32 {{[0-9]+}}} +// CHECK: !{i32 1, !"c_decl_tgt_ref_ptr", i32 1, i32 {{[0-9]+}}} #endif // HEADER Index: test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp =================================================================== --- test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp +++ test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp @@ -1,5 +1,10 @@ // Test declare target link under unified memory requirement. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-HOST + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_70 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK-DEVICE + // expected-no-diagnostics #ifndef HEADER @@ -8,53 +13,74 @@ #define N 1000 double var = 10.0; +double to_var = 20.0; #pragma omp requires unified_shared_memory #pragma omp declare target link(var) +#pragma omp declare target to(to_var) int bar(int n){ double sum = 0; #pragma omp target for(int i = 0; i < n; i++) { - sum += var; + sum += var + to_var; } return sum; } -// CHECK: [[VAR:@.+]] = global double 1.000000e+01 -// CHECK: [[VAR_DECL_TGT_LINK_PTR:@.+]] = global double* [[VAR]] +// CHECK-HOST: [[VAR:@.+]] = global double 1.000000e+01 +// CHECK-HOST: [[VAR_DECL_TGT_LINK_PTR:@.+]] = global double* [[VAR]] + +// CHECK-HOST: [[TO_VAR:@.+]] = global double 2.000000e+01 +// CHECK-HOST: [[VAR_DECL_TGT_TO_PTR:@.+]] = global double* [[TO_VAR]] + +// CHECK-HOST: [[OFFLOAD_SIZES:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 8] +// CHECK-HOST: [[OFFLOAD_MAPTYPES:@.+]] = private unnamed_addr constant [2 x i64] [i64 800, i64 800] + +// CHECK-HOST: [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR_NAME:@.+]] = internal unnamed_addr constant [21 x i8] +// CHECK-HOST: [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR:@.+]] = weak constant %struct.__tgt_offload_entry { i8* bitcast (double** [[VAR_DECL_TGT_LINK_PTR]] to i8*), i8* getelementptr inbounds ([21 x i8], [21 x i8]* [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR_NAME]], i32 0, i32 0), i64 8, i32 1, i32 0 }, section ".omp_offloading.entries" + +// CHECK-HOST: [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR_NAME:@.+]] = internal unnamed_addr constant [24 x i8] +// CHECK-HOST: [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR:@.+]] = weak constant %struct.__tgt_offload_entry { i8* bitcast (double** [[VAR_DECL_TGT_TO_PTR]] to i8*), i8* getelementptr inbounds ([24 x i8], [24 x i8]* [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR_NAME]], i32 0, i32 0), i64 8, i32 0, i32 0 }, section ".omp_offloading.entries" + +// CHECK-HOST: @llvm.used = appending global [2 x i8*] [i8* bitcast (double** [[VAR_DECL_TGT_LINK_PTR]] to i8*), i8* bitcast (double** [[VAR_DECL_TGT_TO_PTR]] to i8*)], section "llvm.metadata" + +// CHECK-HOST: [[N_CASTED:%.+]] = alloca i64 +// CHECK-HOST: [[SUM_CASTED:%.+]] = alloca i64 + +// CHECK-HOST: [[OFFLOAD_BASEPTRS:%.+]] = alloca [2 x i8*] +// CHECK-HOST: [[OFFLOAD_PTRS:%.+]] = alloca [2 x i8*] -// CHECK: [[OFFLOAD_SIZES:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 8] -// CHECK: [[OFFLOAD_MAPTYPES:@.+]] = private unnamed_addr constant [2 x i64] [i64 800, i64 800] +// CHECK-HOST: [[LOAD1:%.+]] = load i64, i64* [[N_CASTED]] +// CHECK-HOST: [[LOAD2:%.+]] = load i64, i64* [[SUM_CASTED]] -// CHECK: [[N_CASTED:%.+]] = alloca i64 -// CHECK: [[SUM_CASTED:%.+]] = alloca i64 +// CHECK-HOST: [[BPTR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-HOST: [[BCAST1:%.+]] = bitcast i8** [[BPTR1]] to i64* +// CHECK-HOST: store i64 [[LOAD1]], i64* [[BCAST1]] +// CHECK-HOST: [[BPTR2:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-HOST: [[BCAST2:%.+]] = bitcast i8** [[BPTR2]] to i64* +// CHECK-HOST: store i64 [[LOAD1]], i64* [[BCAST2]] -// CHECK: [[OFFLOAD_BASEPTRS:%.+]] = alloca [2 x i8*] -// CHECK: [[OFFLOAD_PTRS:%.+]] = alloca [2 x i8*] +// CHECK-HOST: [[BPTR3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-HOST: [[BCAST3:%.+]] = bitcast i8** [[BPTR3]] to i64* +// CHECK-HOST: store i64 [[LOAD2]], i64* [[BCAST3]] +// CHECK-HOST: [[BPTR4:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-HOST: [[BCAST4:%.+]] = bitcast i8** [[BPTR4]] to i64* +// CHECK-HOST: store i64 [[LOAD2]], i64* [[BCAST4]] -// CHECK: [[LOAD1:%.+]] = load i64, i64* [[N_CASTED]] -// CHECK: [[LOAD2:%.+]] = load i64, i64* [[SUM_CASTED]] +// CHECK-HOST: [[BPTR7:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-HOST: [[BPTR8:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0 -// CHECK: [[BPTR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK: [[BCAST1:%.+]] = bitcast i8** [[BPTR1]] to i64* -// CHECK: store i64 [[LOAD1]], i64* [[BCAST1]] -// CHECK: [[BPTR2:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0 -// CHECK: [[BCAST2:%.+]] = bitcast i8** [[BPTR2]] to i64* -// CHECK: store i64 [[LOAD1]], i64* [[BCAST2]] +// CHECK-HOST: call i32 @__tgt_target(i64 -1, i8* @{{.*}}.region_id, i32 2, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0)) -// CHECK: [[BPTR3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK: [[BCAST3:%.+]] = bitcast i8** [[BPTR3]] to i64* -// CHECK: store i64 [[LOAD2]], i64* [[BCAST3]] -// CHECK: [[BPTR4:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 1 -// CHECK: [[BCAST4:%.+]] = bitcast i8** [[BPTR4]] to i64* -// CHECK: store i64 [[LOAD2]], i64* [[BCAST4]] +// CHECK-DEVICE: [[VAR_LINK:@.+]] = common global double* null +// CHECK-DEVICE: [[VAR_TO:@.+]] = common global double* null -// CHECK: [[BPTR7:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK: [[BPTR8:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-DEVICE: @llvm.used = appending global [2 x i8*] [i8* bitcast (double** [[VAR_LINK]] to i8*), i8* bitcast (double** [[VAR_TO]] to i8*)], section "llvm.metadata" -// CHECK: call i32 @__tgt_target(i64 -1, i8* @{{.*}}.region_id, i32 2, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0)) +// CHECK-DEVICE: [[VAR_TO_PTR:%.+]] = load double*, double** [[VAR_TO]] +// CHECK-DEVICE: load double, double* [[VAR_TO_PTR]] #endif