diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -31,12 +31,12 @@ static const char *const DataLayoutStringR600 = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" - "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" - "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" "-ni:7"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2508,12 +2508,16 @@ // Finally, store the address point. Use the same LLVM types as the field to // support optimization. + unsigned GlobalsAS = CGM.getDataLayout().getDefaultGlobalsAddressSpace(); + unsigned ProgAS = CGM.getDataLayout().getProgramAddressSpace(); llvm::Type *VTablePtrTy = llvm::FunctionType::get(CGM.Int32Ty, /*isVarArg=*/true) - ->getPointerTo() - ->getPointerTo(); - VTableField = Builder.CreateBitCast(VTableField, VTablePtrTy->getPointerTo()); - VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy); + ->getPointerTo(ProgAS) + ->getPointerTo(GlobalsAS); + VTableField = Builder.CreatePointerBitCastOrAddrSpaceCast( + VTableField, VTablePtrTy->getPointerTo(GlobalsAS)); + VTableAddressPoint = Builder.CreatePointerBitCastOrAddrSpaceCast( + VTableAddressPoint, VTablePtrTy); llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField); TBAAAccessInfo TBAAInfo = CGM.getTBAAVTablePtrAccessInfo(VTablePtrTy); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3075,11 +3075,12 @@ llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), - llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), - llvm::ConstantInt::get(CGM.SizeTy, Size), - llvm::ConstantInt::get(CGM.Int32Ty, Flags), - llvm::ConstantInt::get(CGM.Int32Ty, 0)}; + llvm::Constant *Data[] = { + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), + llvm::ConstantInt::get(CGM.SizeTy, Size), + llvm::ConstantInt::get(CGM.Int32Ty, Flags), + llvm::ConstantInt::get(CGM.Int32Ty, 0)}; std::string EntryName = getName({"omp_offloading", "entry", ""}); llvm::GlobalVariable *Entry = createGlobalStruct( CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, diff --git a/clang/lib/CodeGen/CGVTT.cpp b/clang/lib/CodeGen/CGVTT.cpp --- a/clang/lib/CodeGen/CGVTT.cpp +++ b/clang/lib/CodeGen/CGVTT.cpp @@ -42,10 +42,8 @@ llvm::GlobalVariable::LinkageTypes Linkage, const CXXRecordDecl *RD) { VTTBuilder Builder(CGM.getContext(), RD, /*GenerateDefinition=*/true); - - llvm::Type *Int8PtrTy = CGM.Int8PtrTy, *Int32Ty = CGM.Int32Ty; llvm::ArrayType *ArrayType = - llvm::ArrayType::get(Int8PtrTy, Builder.getVTTComponents().size()); + llvm::ArrayType::get(CGM.Int8PtrTy, Builder.getVTTComponents().size()); SmallVector VTables; SmallVector VTableAddressPoints; @@ -74,16 +72,17 @@ } llvm::Value *Idxs[] = { - llvm::ConstantInt::get(Int32Ty, 0), - llvm::ConstantInt::get(Int32Ty, AddressPoint.VTableIndex), - llvm::ConstantInt::get(Int32Ty, AddressPoint.AddressPointIndex), + llvm::ConstantInt::get(CGM.Int32Ty, 0), + llvm::ConstantInt::get(CGM.Int32Ty, AddressPoint.VTableIndex), + llvm::ConstantInt::get(CGM.Int32Ty, AddressPoint.AddressPointIndex), }; llvm::Constant *Init = llvm::ConstantExpr::getGetElementPtr( VTable->getValueType(), VTable, Idxs, /*InBounds=*/true, /*InRangeIndex=*/1); - Init = llvm::ConstantExpr::getBitCast(Init, Int8PtrTy); + Init = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Init, + CGM.Int8PtrTy); VTTComponents.push_back(Init); } diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2331,7 +2331,8 @@ CGM.getDataLayout().getABITypeAlignment(guardTy)); } } - llvm::PointerType *guardPtrTy = guardTy->getPointerTo(); + llvm::PointerType *guardPtrTy = guardTy->getPointerTo( + CGF.CGM.getDataLayout().getDefaultGlobalsAddressSpace()); // Create the guard variable if we don't already have it (as we // might if we're double-emitting this function body). diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -168,20 +168,20 @@ // RUN: %clang_cc1 -triple r600-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=R600 -// R600: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +// R600: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" // RUN: %clang_cc1 -triple r600-unknown -target-cpu cayman -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600D -// R600D: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +// R600D: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" // Test default -target-cpu // RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SIDefault -// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" +// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=AARCH64 diff --git a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl --- a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s -// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" +// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" void foo(void) {} diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -215,7 +215,7 @@ GV->setAlignment(Align(8)); Ident = GV; } - return Ident; + return Builder.CreatePointerCast(Ident, IdentPtr); } Type *OpenMPIRBuilder::getLanemaskType() { diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -4380,11 +4380,17 @@ } std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { - StringRef AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64"; + Triple T(TT); + // For AMDGPU we uprgrade older DataLayouts to include the default globals + // address space of 1. + if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) { + return DL.empty() ? std::string("G1") : (DL + "-G1").str(); + } + std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64"; // If X86, and the datalayout matches the expected format, add pointer size // address spaces to the datalayout. - if (!Triple(TT).isX86() || DL.contains(AddrSpaces)) + if (!T.isX86() || DL.contains(AddrSpaces)) return std::string(DL); SmallVector Groups; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -342,15 +342,15 @@ static StringRef computeDataLayout(const Triple &TT) { if (TT.getArch() == Triple::r600) { // 32-bit pointers. - return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" - "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; + return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; } // 32-bit private, local, and region pointers. 64-bit global, constant and // flat, non-integral buffer fat pointers. - return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" + return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" - "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" "-ni:7"; } diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -27,6 +27,10 @@ "-f80:32-n8:16:32-S32"); EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128" "-n32:64-S128"); + + // Check that AMDGPU targets add -G1 if it's not present. + EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64", "amdgcn"), "e-p:64:64-G1"); } TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { @@ -46,6 +50,13 @@ EXPECT_EQ(DL2, "e-p:32:32"); EXPECT_EQ(DL3, "e-m:e-i64:64-n32:64"); EXPECT_EQ(DL4, "e-m:o-i64:64-i128:128-n32:64-S128"); + + // Check that AMDGPU targets don't add -G1 if there is already a -G flag. + EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G2", "r600"), "e-p:32:32-G2"); + EXPECT_EQ(UpgradeDataLayoutString("G2", "r600"), "G2"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), "e-p:64:64-G2"); + EXPECT_EQ(UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), "G2-e-p:64:64"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), "e-p:64:64-G0"); } TEST(DataLayoutUpgradeTest, EmptyDataLayout) { @@ -54,6 +65,10 @@ "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128", ""); EXPECT_EQ(DL1, ""); EXPECT_EQ(DL2, "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128"); + + // Check that AMDGPU targets add G1 if it's not present. + EXPECT_EQ(UpgradeDataLayoutString("", "r600"), "G1"); + EXPECT_EQ(UpgradeDataLayoutString("", "amdgcn"), "G1"); } } // end namespace