diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1207,8 +1207,8 @@ Sizes.NumComponents = getComponentsTotalNumber(ComponentLists); // We need to allocate: - // 3 x NumVars x Expr* - we have an original list expression for each clause - // list entry and an equal number of private copies and inits. + // NumVars x Expr* - we have an original list expression for each clause + // list entry. // NumUniqueDeclarations x ValueDecl* - unique base declarations associated // with each component list. // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -7031,7 +7031,7 @@ OMP_MAP_TARGET_PARAM = 0x20, /// Signal that the runtime library has to return the device pointer /// in the current position for the data being mapped. Used when we have the - /// use_device_ptr clause. + /// use_device_ptr or use_device_addr clause. OMP_MAP_RETURN_PARAM = 0x40, /// This flag signals that the reference being passed is a pointer to /// private data. @@ -7099,26 +7099,30 @@ ArrayRef MapModifiers; bool ReturnDevicePointer = false; bool IsImplicit = false; + bool ForDeviceAddr = false; MapInfo() = default; MapInfo( OMPClauseMappableExprCommon::MappableExprComponentListRef Components, OpenMPMapClauseKind MapType, - ArrayRef MapModifiers, - bool ReturnDevicePointer, bool IsImplicit) + ArrayRef MapModifiers, bool ReturnDevicePointer, + bool IsImplicit, bool ForDeviceAddr = false) : Components(Components), MapType(MapType), MapModifiers(MapModifiers), - ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} + ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), + ForDeviceAddr(ForDeviceAddr) {} }; - /// If use_device_ptr is used on a pointer which is a struct member and there - /// is no map information about it, then emission of that entry is deferred - /// until the whole struct has been processed. + /// If use_device_ptr or use_device_addr is used on a decl which is a struct + /// member and there is no map information about it, then emission of that + /// entry is deferred until the whole struct has been processed. struct DeferredDevicePtrEntryTy { const Expr *IE = nullptr; const ValueDecl *VD = nullptr; + bool ForDeviceAddr = false; - DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) - : IE(IE), VD(VD) {} + DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, + bool ForDeviceAddr) + : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} }; /// The target directive from where the mappable clauses were extracted. It @@ -7306,13 +7310,12 @@ /// \a IsFirstComponent should be set to true if the provided set of /// components is the first associated with a capture. void generateInfoForComponentList( - OpenMPMapClauseKind MapType, - ArrayRef MapModifiers, + OpenMPMapClauseKind MapType, ArrayRef MapModifiers, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, - bool IsImplicit, + bool IsImplicit, bool ForDeviceAddr = false, ArrayRef OverlappedElements = llvm::None) const { // The following summarizes what has to be generated for each map and the @@ -7623,8 +7626,8 @@ // If this component is a pointer inside the base struct then we don't // need to create any entry for it - it will be combined with the object // it is pointing to into a single PTR_AND_OBJ entry. - bool IsMemberPointer = - IsPointer && EncounteredME && + bool IsMemberPointerOrAddr = + (IsPointer || ForDeviceAddr) && EncounteredME && (dyn_cast(I->getAssociatedExpression()) == EncounteredME); if (!OverlappedElements.empty()) { @@ -7691,7 +7694,7 @@ break; } llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); - if (!IsMemberPointer) { + if (!IsMemberPointerOrAddr) { BasePointers.push_back(BP.getPointer()); Pointers.push_back(LB.getPointer()); Sizes.push_back( @@ -7952,17 +7955,18 @@ // Helper function to fill the information map for the different supported // clauses. - auto &&InfoGen = [&Info]( - const ValueDecl *D, - OMPClauseMappableExprCommon::MappableExprComponentListRef L, - OpenMPMapClauseKind MapType, - ArrayRef MapModifiers, - bool ReturnDevicePointer, bool IsImplicit) { - const ValueDecl *VD = - D ? cast(D->getCanonicalDecl()) : nullptr; - Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, - IsImplicit); - }; + auto &&InfoGen = + [&Info](const ValueDecl *D, + OMPClauseMappableExprCommon::MappableExprComponentListRef L, + OpenMPMapClauseKind MapType, + ArrayRef MapModifiers, + bool ReturnDevicePointer, bool IsImplicit, + bool ForDeviceAddr = false) { + const ValueDecl *VD = + D ? cast(D->getCanonicalDecl()) : nullptr; + Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, + IsImplicit, ForDeviceAddr); + }; assert(CurDir.is() && "Expect a executable directive"); @@ -8032,7 +8036,7 @@ // partial struct. InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); - DeferredInfo[nullptr].emplace_back(IE, VD); + DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); } else { llvm::Value *Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); @@ -8044,6 +8048,70 @@ } } + // Look at the use_device_addr clause information and mark the existing map + // entries as such. If there is no map information for an entry in the + // use_device_addr list, we create one with map type 'alloc' and zero size + // section. It is the user fault if that was not mapped before. If there is + // no map information and the pointer is a struct member, then we defer the + // emission of that entry until the whole struct has been processed. + llvm::SmallDenseSet, 4> Processed; + for (const auto *C : + CurExecDir->getClausesOfKind()) { + for (const auto L : C->component_lists()) { + assert(!L.second.empty() && "Not expecting empty list of components!"); + const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); + if (!Processed.insert(VD).second) + continue; + VD = cast(VD->getCanonicalDecl()); + const Expr *IE = L.second.back().getAssociatedExpression(); + // If the first component is a member expression, we have to look into + // 'this', which maps to null in the map of map information. Otherwise + // look directly for the information. + auto It = Info.find(isa(IE) ? nullptr : VD); + + // We potentially have map information for this declaration already. + // Look for the first set of components that refer to it. + if (It != Info.end()) { + auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { + return MI.Components.back().getAssociatedDeclaration() == VD; + }); + // If we found a map entry, signal that the pointer has to be returned + // and move on to the next declaration. + if (CI != It->second.end()) { + CI->ReturnDevicePointer = true; + continue; + } + } + + // We didn't find any match in our map information - generate a zero + // size array section - if the pointer is a struct member we defer this + // action until the whole struct has been processed. + if (isa(IE)) { + // Insert the pointer into Info to be processed by + // generateInfoForComponentList. Because it is a member pointer + // without a pointee, no entry will be generated for it, therefore + // we need to generate one after the whole struct has been processed. + // Nonetheless, generateInfoForComponentList must be called to take + // the pointer into account for the calculation of the range of the + // partial struct. + InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, + /*ReturnDevicePointer=*/false, C->isImplicit(), + /*ForDeviceAddr=*/true); + DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); + } else { + llvm::Value *Ptr; + if (IE->isGLValue()) + Ptr = CGF.EmitLValue(IE).getPointer(CGF); + else + Ptr = CGF.EmitScalarExpr(IE); + BasePointers.emplace_back(Ptr, VD); + Pointers.push_back(Ptr); + Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); + Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); + } + } + } + for (const auto &M : Info) { // We need to know when we generate information for the first component // associated with a capture, because the mapping flags depend on it. @@ -8062,10 +8130,10 @@ // Remember the current base pointer index. unsigned CurrentBasePointersIdx = CurBasePointers.size(); - generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, - CurBasePointers, CurPointers, CurSizes, - CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit); + generateInfoForComponentList( + L.MapType, L.MapModifiers, L.Components, CurBasePointers, + CurPointers, CurSizes, CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. @@ -8085,21 +8153,35 @@ } // Append any pending zero-length pointers which are struct members and - // used with use_device_ptr. + // used with use_device_ptr or use_device_addr. auto CI = DeferredInfo.find(M.first); if (CI != DeferredInfo.end()) { for (const DeferredDevicePtrEntryTy &L : CI->second) { - llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); - llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( - this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); + llvm::Value *BasePtr; + llvm::Value *Ptr; + if (L.ForDeviceAddr) { + if (L.IE->isGLValue()) + Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); + else + Ptr = this->CGF.EmitScalarExpr(L.IE); + BasePtr = Ptr; + // Entry is RETURN_PARAM. Also, set the placeholder value + // MEMBER_OF=FFFF so that the entry is later updated with the + // correct value of MEMBER_OF. + CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); + } else { + BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); + Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), + L.IE->getExprLoc()); + // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder + // value MEMBER_OF=FFFF so that the entry is later updated with the + // correct value of MEMBER_OF. + CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | + OMP_MAP_MEMBER_OF); + } CurBasePointers.emplace_back(BasePtr, L.VD); CurPointers.push_back(Ptr); CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); - // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder - // value MEMBER_OF=FFFF so that the entry is later updated with the - // correct value of MEMBER_OF. - CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | - OMP_MAP_MEMBER_OF); } } @@ -8168,10 +8250,10 @@ for (const MapInfo &L : M.second) { assert(!L.Components.empty() && "Not expecting declaration with no component lists."); - generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, - CurBasePointers, CurPointers, CurSizes, - CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit); + generateInfoForComponentList( + L.MapType, L.MapModifiers, L.Components, CurBasePointers, + CurPointers, CurSizes, CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); IsFirstComponentList = false; } @@ -8437,10 +8519,10 @@ ArrayRef OverlappedComponents = Pair.getSecond(); bool IsFirstComponentList = true; - generateInfoForComponentList(MapType, MapModifiers, Components, - BasePointers, Pointers, Sizes, Types, - PartialStruct, IsFirstComponentList, - IsImplicit, OverlappedComponents); + generateInfoForComponentList( + MapType, MapModifiers, Components, BasePointers, Pointers, Sizes, + Types, PartialStruct, IsFirstComponentList, IsImplicit, + /*ForDeviceAddr=*/false, OverlappedComponents); } // Go through other elements without overlapped elements. bool IsFirstComponentList = OverlappedData.empty(); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -31,6 +31,8 @@ using namespace CodeGen; using namespace llvm::omp; +static const VarDecl *getBaseDecl(const Expr *Ref); + namespace { /// Lexical scope for OpenMP executable constructs, that handles correct codegen /// for captured expressions. @@ -220,6 +222,12 @@ if (const auto *OED = dyn_cast(D)) CGF.EmitVarDecl(*OED); } + } else if (const auto *UDP = dyn_cast(C)) { + for (const Expr *E : UDP->varlists()) { + const Decl *D = getBaseDecl(E); + if (const auto *OED = dyn_cast(D)) + CGF.EmitVarDecl(*OED); + } } } if (!isOpenMPSimdDirective(S.getDirectiveKind())) @@ -5696,9 +5704,8 @@ } void CodeGenFunction::EmitOMPUseDevicePtrClause( - const OMPClause &NC, OMPPrivateScope &PrivateScope, + const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, const llvm::DenseMap &CaptureDeviceAddrMap) { - const auto &C = cast(NC); auto OrigVarIt = C.varlist_begin(); auto InitIt = C.inits().begin(); for (const Expr *PvtVarIt : C.private_copies()) { @@ -5759,6 +5766,60 @@ } } +static const VarDecl *getBaseDecl(const Expr *Ref) { + const Expr *Base = Ref->IgnoreParenImpCasts(); + while (const auto *OASE = dyn_cast(Base)) + Base = OASE->getBase()->IgnoreParenImpCasts(); + while (const auto *ASE = dyn_cast(Base)) + Base = ASE->getBase()->IgnoreParenImpCasts(); + return cast(cast(Base)->getDecl()); +} + +void CodeGenFunction::EmitOMPUseDeviceAddrClause( + const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, + const llvm::DenseMap &CaptureDeviceAddrMap) { + llvm::SmallDenseSet, 4> Processed; + for (const Expr *Ref : C.varlists()) { + const VarDecl *OrigVD = getBaseDecl(Ref); + if (!Processed.insert(OrigVD).second) + continue; + // In order to identify the right initializer we need to match the + // declaration used by the mapping logic. In some cases we may get + // OMPCapturedExprDecl that refers to the original declaration. + const ValueDecl *MatchingVD = OrigVD; + if (const auto *OED = dyn_cast(MatchingVD)) { + // OMPCapturedExprDecl are used to privative fields of the current + // structure. + const auto *ME = cast(OED->getInit()); + assert(isa(ME->getBase()) && + "Base should be the current struct!"); + MatchingVD = ME->getMemberDecl(); + } + + // If we don't have information about the current list item, move on to + // the next one. + auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); + if (InitAddrIt == CaptureDeviceAddrMap.end()) + continue; + + Address PrivAddr = InitAddrIt->getSecond(); + // For declrefs and variable length array need to load the pointer for + // correct mapping, since the pointer to the data was passed to the runtime. + if (isa(Ref->IgnoreParenImpCasts()) || + MatchingVD->getType()->isArrayType()) + PrivAddr = + EmitLoadOfPointer(PrivAddr, getContext() + .getPointerType(OrigVD->getType()) + ->castAs()); + llvm::Type *RealTy = + ConvertTypeForMem(OrigVD->getType().getNonReferenceType()) + ->getPointerTo(); + PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy); + + (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; }); + } +} + // Generate the instructions for '#pragma omp target data' directive. void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { @@ -5803,6 +5864,9 @@ for (const auto *C : S.getClausesOfKind()) CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, Info.CaptureDeviceAddrMap); + for (const auto *C : S.getClausesOfKind()) + CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, + Info.CaptureDeviceAddrMap); (void)PrivateScope.Privatize(); RCG(CGF); } else { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -76,6 +76,8 @@ class ObjCAtThrowStmt; class ObjCAtSynchronizedStmt; class ObjCAutoreleasePoolStmt; +class OMPUseDevicePtrClause; +class OMPUseDeviceAddrClause; class ReturnsNonNullAttr; class SVETypeFlags; @@ -3173,7 +3175,10 @@ void EmitOMPPrivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); void EmitOMPUseDevicePtrClause( - const OMPClause &C, OMPPrivateScope &PrivateScope, + const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, + const llvm::DenseMap &CaptureDeviceAddrMap); + void EmitOMPUseDeviceAddrClause( + const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, const llvm::DenseMap &CaptureDeviceAddrMap); /// Emit code for copyin clause in \a D directive. The next code is /// generated at the start of outlined functions for directives: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -18500,8 +18500,15 @@ // only need a component. MVLI.VarBaseDeclarations.push_back(D); MVLI.VarComponents.emplace_back(); + Expr *Component = SimpleRefExpr; + if (VD && (isa(RefExpr->IgnoreParenImpCasts()) || + isa(RefExpr->IgnoreParenImpCasts()))) { + Component = DefaultFunctionArrayLvalueConversion(SimpleRefExpr).get(); + // Component = CreateBuiltinUnaryOp(ELoc, UO_Deref, Component).get(); + // Component = DefaultFunctionArrayLvalueConversion(Component).get(); + } MVLI.VarComponents.back().push_back( - OMPClauseMappableExprCommon::MappableComponent(SimpleRefExpr, D)); + OMPClauseMappableExprCommon::MappableComponent(Component, D)); } if (MVLI.ProcessedVarList.empty()) diff --git a/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp b/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp @@ -0,0 +1,224 @@ +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[SIZES1:@.+]] = private unnamed_addr constant [5 x i64] zeroinitializer +// 96 = 0x60 = OMP_MAP_TARGET_PARAM | OMP_MAP_RETURN_PARAM +// CHECK-DAG: [[MAPTYPES1:@.+]] = private unnamed_addr constant [5 x i64] [i64 96, i64 96, i64 96, i64 96, i64 96] +// 32 = 0x20 = OMP_MAP_TARGET_PARAM +// 281474976710720 = 0x1000000000040 = OMP_MAP_MEMBER_OF | OMP_MAP_RETURN_PARAM +// CHECK-DAG: [[MAPTYPES2:@.+]] = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710720, i64 281474976710720, i64 281474976710720, i64 281474976710720] +struct S { + int a = 0; + int *ptr = &a; + int &ref = a; + int arr[4]; + S() {} + void foo() { +#pragma omp target data use_device_addr(a, ptr [3:4], ref, ptr[0], arr[:a]) + ++a, ++*ptr, ++ref, ++arr[0]; + } +}; + +int main() { + float a = 0; + float *ptr = &a; + float &ref = a; + float arr[4]; + float vla[(int)a]; + S s; + s.foo(); +#pragma omp target data use_device_addr(a, ptr [3:4], ref, ptr[0], arr[:(int)a], vla[0]) + ++a, ++*ptr, ++ref, ++arr[0], ++vla[0]; + return a; +} + +// CHECK-LABEL: @main() +// CHECK: [[A_ADDR:%.+]] = alloca float, +// CHECK: [[PTR_ADDR:%.+]] = alloca float*, +// CHECK: [[REF_ADDR:%.+]] = alloca float*, +// CHECK: [[ARR_ADDR:%.+]] = alloca [4 x float], +// CHECK: [[BPTRS:%.+]] = alloca [5 x i8*], +// CHECK: [[PTRS:%.+]] = alloca [5 x i8*], +// CHECK: [[VLA_ADDR:%.+]] = alloca float, i64 %{{.+}}, +// CHECK: [[PTR:%.+]] = load float*, float** [[PTR_ADDR]], +// CHECK: [[REF:%.+]] = load float*, float** [[REF_ADDR]], +// CHECK: [[ARR:%.+]] = getelementptr inbounds [4 x float], [4 x float]* [[ARR_ADDR]], i64 0, i64 0 +// CHECK: [[BPTR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0 +// CHECK: [[BPTR0_A_ADDR:%.+]] = bitcast i8** [[BPTR0]] to float** +// CHECK: store float* [[A_ADDR]], float** [[BPTR0_A_ADDR]], +// CHECK: [[PTR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0 +// CHECK: [[PTR0_A_ADDR:%.+]] = bitcast i8** [[PTR0]] to float** +// CHECK: store float* [[A_ADDR]], float** [[PTR0_A_ADDR]], +// CHECK: [[BPTR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 1 +// CHECK: [[BPTR1_PTR_ADDR:%.+]] = bitcast i8** [[BPTR1]] to float** +// CHECK: store float* [[PTR]], float** [[BPTR1_PTR_ADDR]], +// CHECK: [[PTR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 1 +// CHECK: [[PTR1_PTR_ADDR:%.+]] = bitcast i8** [[PTR1]] to float** +// CHECK: store float* [[PTR]], float** [[PTR1_PTR_ADDR]], +// CHECK: [[BPTR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 2 +// CHECK: [[BPTR2_REF_ADDR:%.+]] = bitcast i8** [[BPTR2]] to float** +// CHECK: store float* [[REF]], float** [[BPTR2_REF_ADDR]], +// CHECK: [[PTR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 2 +// CHECK: [[PTR2_REF_ADDR:%.+]] = bitcast i8** [[PTR2]] to float** +// CHECK: store float* [[REF]], float** [[PTR2_REF_ADDR]], +// CHECK: [[BPTR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 3 +// CHECK: [[BPTR3_ARR_ADDR:%.+]] = bitcast i8** [[BPTR3]] to float** +// CHECK: store float* [[ARR]], float** [[BPTR3_ARR_ADDR]], +// CHECK: [[PTR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 3 +// CHECK: [[PTR3_ARR_ADDR:%.+]] = bitcast i8** [[PTR3]] to float** +// CHECK: store float* [[ARR]], float** [[PTR3_ARR_ADDR]], +// CHECK: [[BPTR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 4 +// CHECK: [[BPTR4_VLA_ADDR:%.+]] = bitcast i8** [[BPTR4]] to float** +// CHECK: store float* [[VLA_ADDR]], float** [[BPTR4_VLA_ADDR]], +// CHECK: [[PTR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 4 +// CHECK: [[PTR4_VLA_ADDR:%.+]] = bitcast i8** [[PTR4]] to float** +// CHECK: store float* [[VLA_ADDR]], float** [[PTR4_VLA_ADDR]], +// CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0 +// CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_begin(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0)) +// CHECK: [[A_REF:%.+]] = load float*, float** [[BPTR0_A_ADDR]], +// CHECK: [[REF_REF:%.+]] = load float*, float** [[BPTR2_REF_ADDR]], +// CHECK: store float* [[REF_REF]], float** [[TMP_REF_ADDR:%.+]], +// CHECK: [[ARR:%.+]] = load float*, float** [[BPTR3_ARR_ADDR]], +// CHECK: [[ARR_REF:%.+]] = bitcast float* [[ARR]] to [4 x float]* +// CHECK: [[VLA_REF:%.+]] = load float*, float** [[BPTR4_VLA_ADDR]], +// CHECK: [[A:%.+]] = load float, float* [[A_REF]], +// CHECK: [[INC:%.+]] = fadd float [[A]], 1.000000e+00 +// CHECK: store float [[INC]], float* [[A_REF]], +// CHECK: [[PTR_ADDR:%.+]] = load float*, float** [[BPTR1_PTR_ADDR]], +// CHECK: [[VAL:%.+]] = load float, float* [[PTR_ADDR]], +// CHECK: [[INC:%.+]] = fadd float [[VAL]], 1.000000e+00 +// CHECK: store float [[INC]], float* [[PTR_ADDR]], +// CHECK: [[REF_ADDR:%.+]] = load float*, float** [[TMP_REF_ADDR]], +// CHECK: [[REF:%.+]] = load float, float* [[REF_ADDR]], +// CHECK: [[INC:%.+]] = fadd float [[REF]], 1.000000e+00 +// CHECK: store float [[INC]], float* [[REF_ADDR]], +// CHECK: [[ARR0_ADDR:%.+]] = getelementptr inbounds [4 x float], [4 x float]* [[ARR_REF]], i64 0, i64 0 +// CHECK: [[ARR0:%.+]] = load float, float* [[ARR0_ADDR]], +// CHECK: [[INC:%.+]] = fadd float [[ARR0]], 1.000000e+00 +// CHECK: store float [[INC]], float* [[ARR0_ADDR]], +// CHECK: [[VLA0_ADDR:%.+]] = getelementptr inbounds float, float* [[VLA_REF]], i64 0 +// CHECK: [[VLA0:%.+]] = load float, float* [[VLA0_ADDR]], +// CHECK: [[INC:%.+]] = fadd float [[VLA0]], 1.000000e+00 +// CHECK: store float [[INC]], float* [[VLA0_ADDR]], +// CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0 +// CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_end(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0)) + +// CHECK: foo +// %this.addr = alloca %struct.S*, align 8 +// CHECK: [[BPTRS:%.+]] = alloca [5 x i8*], +// CHECK: [[PTRS:%.+]] = alloca [5 x i8*], +// CHECK: [[SIZES:%.+]] = alloca [5 x i64], +// %tmp = alloca i32*, align 8 +// %tmp6 = alloca i32**, align 8 +// %tmp7 = alloca i32*, align 8 +// %tmp8 = alloca i32**, align 8 +// %tmp9 = alloca [4 x i32]*, align 8 +// store %struct.S* %this, %struct.S** %this.addr, align 8 +// %this1 = load %struct.S*, %struct.S** %this.addr, align 8 +// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS:%.+]], i32 0, i32 0 +// %ptr = getelementptr inbounds %struct.S, %struct.S* %this1, i32 0, i32 1 +// %ref = getelementptr inbounds %struct.S, %struct.S* %this1, i32 0, i32 2 +// %0 = load i32*, i32** %ref, align 8 +// CHECK: [[ARR_ADDR:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 3 +// CHECK: [[A_ADDR2:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 0 +// CHECK: [[PTR_ADDR:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 1 +// CHECK: [[REF_REF:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 2 +// CHECK: [[REF_PTR:%.+]] = load i32*, i32** [[REF_REF]], +// CHECK: [[ARR_ADDR2:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 3 +// CHECK: [[ARR_END:%.+]] = getelementptr [4 x i32], [4 x i32]* [[ARR_ADDR]], i32 1 +// CHECK: [[BEGIN:%.+]] = bitcast i32* [[A_ADDR]] to i8* +// CHECK: [[END:%.+]] = bitcast [4 x i32]* [[ARR_END]] to i8* +// CHECK: [[E:%.+]] = ptrtoint i8* [[END]] to i64 +// CHECK: [[B:%.+]] = ptrtoint i8* [[BEGIN]] to i64 +// CHECK: [[DIFF:%.+]] = sub i64 [[E]], [[B]] +// CHECK: [[SZ:%.+]] = sdiv exact i64 [[DIFF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK: [[BPTR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0 +// CHECK: [[BPTR0_S:%.+]] = bitcast i8** [[BPTR0]] to %struct.S** +// CHECK: store %struct.S* [[THIS]], %struct.S** [[BPTR0_S]], +// CHECK: [[PTR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0 +// CHECK: [[PTR0_BEGIN:%.+]] = bitcast i8** [[PTR0]] to i32** +// CHECK: store i32* [[A_ADDR]], i32** [[PTR0_BEGIN]], +// CHECK: [[SIZE0:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 0 +// CHECK: store i64 [[SZ]], i64* [[SIZE0]], +// CHECK: [[BPTR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 1 +// CHECK: [[BPTR1_A_ADDR:%.+]] = bitcast i8** [[BPTR1]] to i32** +// CHECK: store i32* [[A_ADDR2]], i32** [[BPTR1_A_ADDR]], +// CHECK: [[PTR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 1 +// CHECK: [[PTR1_A_ADDR:%.+]] = bitcast i8** [[PTR1]] to i32** +// CHECK: store i32* [[A_ADDR2]], i32** [[PTR1_A_ADDR]], +// CHECK: [[SIZE1:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 1 +// CHECK: store i64 0, i64* [[SIZE1]], +// CHECK: [[BPTR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 2 +// CHECK: [[BPTR2_PTR_ADDR:%.+]] = bitcast i8** [[BPTR2]] to i32*** +// CHECK: store i32** [[PTR_ADDR]], i32*** [[BPTR2_PTR_ADDR]], +// CHECK: [[PTR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 2 +// CHECK: [[PTR2_PTR_ADDR:%.+]] = bitcast i8** [[PTR2]] to i32*** +// CHECK: store i32** [[PTR_ADDR]], i32*** [[PTR2_PTR_ADDR]], +// CHECK: [[SIZE2:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 2 +// CHECK: store i64 0, i64* [[SIZE2]], +// CHECK: [[BPTR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 3 +// CHECK: [[BPTR3_REF_PTR:%.+]] = bitcast i8** [[BPTR3]] to i32** +// CHECK: store i32* [[REF_PTR]], i32** [[BPTR3_REF_PTR]], +// CHECK: [[PTR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 3 +// CHECK: [[PTR3_REF_PTR:%.+]] = bitcast i8** [[PTR3]] to i32** +// CHECK: store i32* [[REF_PTR]], i32** [[PTR3_REF_PTR]], +// CHECK: [[SIZE3:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 3 +// CHECK: store i64 0, i64* [[SIZE3]], +// CHECK: [[BPTR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 4 +// CHECK: [[BPTR4_ARR_ADDR:%.+]] = bitcast i8** [[BPTR4]] to [4 x i32]** +// CHECK: store [4 x i32]* [[ARR_ADDR2]], [4 x i32]** [[BPTR4_ARR_ADDR]], +// CHECK: [[PTR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 4 +// CHECK: [[PTR4_ARR_ADDR:%.+]] = bitcast i8** [[PTR4]] to [4 x i32]** +// CHECK: store [4 x i32]* [[ARR_ADDR2]], [4 x i32]** [[PTR4_ARR_ADDR]], +// CHECK: [[SIZE4:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 4 +// CHECK: store i64 0, i64* [[SIZE4]], +// CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0 +// CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0 +// CHECK: [[SIZE:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_begin(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0)) +// CHECK: [[A_ADDR:%.+]] = load i32*, i32** [[BPTR1_A_ADDR]], +// CHECK: store i32* [[A_ADDR]], i32** [[A_REF:%.+]], +// CHECK: [[PTR_ADDR:%.+]] = load i32**, i32*** [[BPTR2_PTR_ADDR]], +// CHECK: store i32** [[PTR_ADDR]], i32*** [[PTR_REF:%.+]], +// CHECK: [[REF_PTR:%.+]] = load i32*, i32** [[BPTR3_REF_PTR]], +// CHECK: store i32* [[REF_PTR]], i32** [[REF_REF:%.+]], +// CHECK: [[PTR_ADDR:%.+]] = load i32**, i32*** [[BPTR2_PTR_ADDR]], +// CHECK: store i32** [[PTR_ADDR]], i32*** [[PTR_REF2:%.+]], +// CHECK: [[ARR_ADDR:%.+]] = load [4 x i32]*, [4 x i32]** [[BPTR4_ARR_ADDR]], +// CHECK: store [4 x i32]* [[ARR_ADDR]], [4 x i32]** [[ARR_REF:%.+]], +// CHECK: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]], +// CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]], +// CHECK: [[INC:%.+]] = add nsw i32 [[A]], 1 +// CHECK: store i32 [[INC]], i32* [[A_ADDR]], +// CHECK: [[PTR_PTR:%.+]] = load i32**, i32*** [[PTR_REF2]], +// CHECK: [[PTR:%.+]] = load i32*, i32** [[PTR_PTR]], +// CHECK: [[VAL:%.+]] = load i32, i32* [[PTR]], +// CHECK: [[INC:%.+]] = add nsw i32 [[VAL]], 1 +// CHECK: store i32 [[INC]], i32* [[PTR]], +// CHECK: [[REF_PTR:%.+]] = load i32*, i32** [[REF_REF]], +// CHECK: [[VAL:%.+]] = load i32, i32* [[REF_PTR]], +// CHECK: [[INC:%.+]] = add nsw i32 [[VAL]], 1 +// CHECK: store i32 [[INC]], i32* [[REF_PTR]], +// CHECK: [[ARR_ADDR:%.+]] = load [4 x i32]*, [4 x i32]** [[ARR_REF]], +// CHECK: [[ARR0_ADDR:%.+]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR_ADDR]], i64 0, i64 0 +// CHECK: [[VAL:%.+]] = load i32, i32* [[ARR0_ADDR]], +// CHECK: [[INC:%.+]] = add nsw i32 [[VAL]], 1 +// CHECK: store i32 [[INC]], i32* [[ARR0_ADDR]], +// CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0 +// CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0 +// CHECK: [[SIZE:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_end(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0)) + +#endif