Index: include/clang/AST/ASTContext.h =================================================================== --- include/clang/AST/ASTContext.h +++ include/clang/AST/ASTContext.h @@ -2301,12 +2301,12 @@ QualType typeDomain) const; unsigned getTargetAddressSpace(QualType T) const { + if (T->isFunctionType() && !T.getQualifiers().hasAddressSpace()) + return 0; return getTargetAddressSpace(T.getQualifiers()); } - unsigned getTargetAddressSpace(Qualifiers Q) const { - return getTargetAddressSpace(Q.getAddressSpace()); - } + unsigned getTargetAddressSpace(Qualifiers Q) const; unsigned getTargetAddressSpace(unsigned AS) const { if (AS < LangAS::Offset || AS >= LangAS::Offset + LangAS::Count) @@ -2319,6 +2319,8 @@ /// constant folding. uint64_t getTargetNullPointerValue(QualType QT) const; + unsigned getTargetConstantAddressSpace() const; + bool addressSpaceMapManglingFor(unsigned AS) const { return AddrSpaceMapMangling || AS < LangAS::Offset || Index: include/clang/Basic/TargetInfo.h =================================================================== --- include/clang/Basic/TargetInfo.h +++ include/clang/Basic/TargetInfo.h @@ -307,6 +307,10 @@ return 0; } + virtual unsigned getConstantAddressSpace() const { + return 0; + } + /// \brief Return the size of '_Bool' and C++ 'bool' for this target, in bits. unsigned getBoolWidth() const { return BoolWidth; } @@ -953,6 +957,10 @@ return *AddrSpaceMap; } + virtual unsigned getDefaultTargetAddressSpace(const LangOptions &Opt) const { + return 0; + } + /// \brief Retrieve the name of the platform as it is used in the /// availability attribute. StringRef getPlatformName() const { return PlatformName; } Index: lib/AST/ASTContext.cpp =================================================================== --- lib/AST/ASTContext.cpp +++ lib/AST/ASTContext.cpp @@ -9538,6 +9538,16 @@ return getTargetInfo().getNullPointerValue(AS); } +unsigned ASTContext::getTargetConstantAddressSpace() const { + return getTargetInfo().getConstantAddressSpace(); +} + +unsigned ASTContext::getTargetAddressSpace(Qualifiers Q) const { + return Q.hasAddressSpace() + ? getTargetAddressSpace(Q.getAddressSpace()) + : getTargetInfo().getDefaultTargetAddressSpace(LangOpts); +} + // Explicitly instantiate this in case a Redeclarable is used from a TU that // doesn't include ASTContext.h template Index: lib/Basic/Targets.cpp =================================================================== --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -1997,16 +1997,6 @@ return llvm::makeArrayRef(GCCRegNames); } -static const unsigned AMDGPUAddrSpaceMap[] = { - 1, // opencl_global - 3, // opencl_local - 2, // opencl_constant - 4, // opencl_generic - 1, // cuda_device - 2, // cuda_constant - 3 // cuda_shared -}; - // If you edit the description strings, make sure you update // getPointerWidthV(). @@ -2020,9 +2010,18 @@ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; class AMDGPUTargetInfo final : public TargetInfo { + static const unsigned AddrSpaceMap_[7]; static const Builtin::Info BuiltinInfo[]; static const char * const GCCRegNames[]; + enum AddrSpaceKind { + AS_Private = 0, + AS_Global = 1, + AS_Constant = 2, + AS_Local = 3, + AS_Generic = 4 + }; + /// \brief The GPU profiles supported by the AMDGPU target. enum GPUKind { GK_NONE, @@ -2066,7 +2065,7 @@ resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ? DataLayoutStringSI : DataLayoutStringR600); - AddrSpaceMap = &AMDGPUAddrSpaceMap; + AddrSpaceMap = &AddrSpaceMap_; UseAddrSpaceMapMangling = true; } @@ -2254,6 +2253,15 @@ } } + unsigned + getDefaultTargetAddressSpace(const LangOptions &Opts) const override { + // OpenCL sets address space explicitly in AST. The default case (type + // qualifier containing no address space) represents private address space. + if (Opts.OpenCL) + return AS_Private; + return AS_Generic; + } + LangAS::ID getOpenCLImageAddrSpace() const override { return LangAS::opencl_constant; } @@ -2276,6 +2284,16 @@ } }; +const unsigned AMDGPUTargetInfo::AddrSpaceMap_[] = { + AS_Global, // opencl_global + AS_Local, // opencl_local + AS_Constant, // opencl_constant + AS_Generic, // opencl_generic + AS_Global, // cuda_device + AS_Constant, // cuda_constant + AS_Local // cuda_shared +}; + const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { #define BUILTIN(ID, TYPE, ATTRS) \ { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr }, Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -2312,7 +2312,7 @@ case Builtin::BI__GetExceptionInfo: { if (llvm::GlobalVariable *GV = CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) - return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); + return RValue::get(llvm::ConstantExpr::getPointerCast(GV, CGM.Int8PtrTy)); break; } Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -3643,18 +3643,19 @@ if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) { ArgMemoryLayout = CGM.getDataLayout().getStructLayout(ArgStruct); llvm::Instruction *IP = CallArgs.getStackBase(); - llvm::AllocaInst *AI; + llvm::Instruction *CastedAI; if (IP) { IP = IP->getNextNode(); - AI = new llvm::AllocaInst(ArgStruct, "argmem", IP); + CastedAI = CreateAlloca(ArgStruct, "argmem", IP); } else { - AI = CreateTempAlloca(ArgStruct, "argmem"); + CastedAI = CreateTempAlloca(ArgStruct, "argmem"); } auto Align = CallInfo.getArgStructAlignment(); + auto *AI = getAddrSpaceCastedAlloca(CastedAI); AI->setAlignment(Align.getQuantity()); AI->setUsedWithInAlloca(true); assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca()); - ArgMemory = Address(AI, Align); + ArgMemory = Address(CastedAI, Align); } // Helper function to drill into the inalloca allocation. Index: lib/CodeGen/CGClass.cpp =================================================================== --- lib/CodeGen/CGClass.cpp +++ lib/CodeGen/CGClass.cpp @@ -2372,12 +2372,16 @@ // Finally, store the address point. Use the same LLVM types as the field to // support optimization. + auto DefAddr = CGM.getTarget().getDefaultTargetAddressSpace( + CGM.getLangOpts()); llvm::Type *VTablePtrTy = llvm::FunctionType::get(CGM.Int32Ty, /*isVarArg=*/true) - ->getPointerTo() - ->getPointerTo(); - VTableField = Builder.CreateBitCast(VTableField, VTablePtrTy->getPointerTo()); - VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy); + ->getPointerTo(DefAddr) + ->getPointerTo(DefAddr); + VTableField = Builder.CreatePointerBitCastOrAddrSpaceCast(VTableField, + VTablePtrTy->getPointerTo(DefAddr)); + VTableAddressPoint = Builder.CreatePointerBitCastOrAddrSpaceCast( + VTableAddressPoint, VTablePtrTy); llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField); CGM.DecorateInstructionWithTBAA(Store, CGM.getTBAAInfoForVTablePtr()); Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -1075,7 +1075,15 @@ llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla"); vla->setAlignment(alignment.getQuantity()); - address = Address(vla, alignment); + llvm::Value *V = vla; + auto DefaultAddr = getTarget().getDefaultTargetAddressSpace(getLangOpts()); + if (DefaultAddr != 0) { + auto *DestTy = + llvm::PointerType::get(vla->getType()->getElementType(), DefaultAddr); + V = Builder.CreateAddrSpaceCast(vla, DestTy); + } + + address = Address(V, alignment); } setAddrOfLocalVar(&D, address); @@ -1244,7 +1252,7 @@ // Otherwise, create a temporary global with the initializer then // memcpy from the global to the alloca. std::string Name = getStaticDeclName(CGM, D); - unsigned AS = 0; + unsigned AS = CGM.getContext().getTargetConstantAddressSpace(); if (getLangOpts().OpenCL) { AS = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant); BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS); Index: lib/CodeGen/CGDeclCXX.cpp =================================================================== --- lib/CodeGen/CGDeclCXX.cpp +++ lib/CodeGen/CGDeclCXX.cpp @@ -135,7 +135,7 @@ CharUnits WidthChars = CGF.getContext().getTypeSizeInChars(D.getType()); uint64_t Width = WidthChars.getQuantity(); llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(CGF.Int64Ty, Width), - llvm::ConstantExpr::getBitCast(Addr, CGF.Int8PtrTy)}; + llvm::ConstantExpr::getPointerCast(Addr, CGF.Int8PtrTy)}; CGF.Builder.CreateCall(InvariantStart, Args); } Index: lib/CodeGen/CGException.cpp =================================================================== --- lib/CodeGen/CGException.cpp +++ lib/CodeGen/CGException.cpp @@ -237,7 +237,7 @@ static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM, const EHPersonality &Personality) { llvm::Constant *Fn = getPersonalityFn(CGM, Personality); - return llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); + return llvm::ConstantExpr::getPointerCast(Fn, CGM.Int8PtrTy); } /// Check whether a landingpad instruction only uses C++ features. @@ -1520,7 +1520,7 @@ llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration( &CGM.getModule(), llvm::Intrinsic::localrecover); llvm::Constant *ParentI8Fn = - llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy); + llvm::ConstantExpr::getPointerCast(ParentCGF.CurFn, Int8PtrTy); RecoverCall = Builder.CreateCall( FrameRecoverFn, {ParentI8Fn, ParentFP, llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)}); @@ -1585,7 +1585,7 @@ llvm::Function *RecoverFPIntrin = CGM.getIntrinsic(llvm::Intrinsic::x86_seh_recoverfp); llvm::Constant *ParentI8Fn = - llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy); + llvm::ConstantExpr::getPointerCast(ParentCGF.CurFn, Int8PtrTy); ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP}); } @@ -1812,7 +1812,7 @@ llvm::Function *FilterFunc = HelperCGF.GenerateSEHFilterFunction(*this, *Except); llvm::Constant *OpaqueFunc = - llvm::ConstantExpr::getBitCast(FilterFunc, Int8PtrTy); + llvm::ConstantExpr::getPointerCast(FilterFunc, Int8PtrTy); CatchScope->setHandler(0, OpaqueFunc, createBasicBlock("__except.ret")); } Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -62,16 +62,37 @@ /// block. Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, const Twine &Name) { - auto Alloca = CreateTempAlloca(Ty, Name); + auto CastedAlloca = CreateTempAlloca(Ty, Name); + auto *Alloca = getAddrSpaceCastedAlloca(CastedAlloca); Alloca->setAlignment(Align.getQuantity()); - return Address(Alloca, Align); + return Address(CastedAlloca, Align); } /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. -llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, - const Twine &Name) { - return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt); +llvm::Instruction *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, + const Twine &Name) { + return CreateAlloca(Ty, Name, AllocaInsertPt); +} + +llvm::Instruction *CodeGenFunction::CreateAlloca(llvm::Type *Ty, + const Twine &Name, + llvm::Instruction *InsertPos) { + llvm::Instruction *V = new llvm::AllocaInst(Ty, nullptr, Name, InsertPos); + auto DefaultAddr = getTarget().getDefaultTargetAddressSpace(getLangOpts()); + if (DefaultAddr != 0) { + auto *DestTy = llvm::PointerType::get(V->getType()->getPointerElementType(), + DefaultAddr); + V = new llvm::AddrSpaceCastInst(V, DestTy, "", InsertPos); + } + return V; +} + +llvm::AllocaInst * +CodeGenFunction::getAddrSpaceCastedAlloca(llvm::Instruction *V) const { + if (auto *Cast = dyn_cast(V)) + return cast(Cast->getOperand(0)); + return cast(V); } /// CreateDefaultAlignTempAlloca - This creates an alloca with the Index: lib/CodeGen/CGExprConstant.cpp =================================================================== --- lib/CodeGen/CGExprConstant.cpp +++ lib/CodeGen/CGExprConstant.cpp @@ -1316,7 +1316,7 @@ if (!Offset->isNullValue()) { unsigned AS = C->getType()->getPointerAddressSpace(); llvm::Type *CharPtrTy = Int8Ty->getPointerTo(AS); - llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(C, CharPtrTy); + llvm::Constant *Casted = llvm::ConstantExpr::getPointerCast(C, CharPtrTy); Casted = llvm::ConstantExpr::getGetElementPtr(Int8Ty, Casted, Offset); C = llvm::ConstantExpr::getPointerCast(Casted, C->getType()); } Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -924,7 +924,7 @@ DefaultOpenMPPSource = CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); DefaultOpenMPPSource = - llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); + llvm::ConstantExpr::getPointerCast(DefaultOpenMPPSource, CGM.Int8PtrTy); } ConstantInitBuilder builder(CGM); @@ -2918,7 +2918,7 @@ llvm::Module &M = CGM.getModule(); // Make sure the address has the right type. - llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); + llvm::Constant *AddrPtr = llvm::ConstantExpr::getPointerCast(ID, CGM.VoidPtrTy); // Create constant string with the name. llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); @@ -2928,7 +2928,7 @@ llvm::GlobalValue::InternalLinkage, StrPtrInit, ".omp_offloading.entry_name"); Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); + llvm::Constant *StrPtr = llvm::ConstantExpr::getPointerCast(Str, CGM.Int8PtrTy); // We can't have any padding between symbols, so we need to have 1-byte // alignment. @@ -4871,7 +4871,7 @@ // the device, because these functions will be entry points to the device. if (CGM.getLangOpts().OpenMPIsDevice) { - OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); + OutlinedFnID = llvm::ConstantExpr::getPointerCast(OutlinedFn, CGM.Int8PtrTy); OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); } else OutlinedFnID = new llvm::GlobalVariable( Index: lib/CodeGen/CGVTT.cpp =================================================================== --- lib/CodeGen/CGVTT.cpp +++ lib/CodeGen/CGVTT.cpp @@ -84,7 +84,7 @@ VTable->getValueType(), VTable, Idxs, /*InBounds=*/true, /*InRangeIndex=*/1); - Init = llvm::ConstantExpr::getBitCast(Init, Int8PtrTy); + Init = llvm::ConstantExpr::getPointerCast(Init, Int8PtrTy); VTTComponents.push_back(Init); } Index: lib/CodeGen/CGVTables.cpp =================================================================== --- lib/CodeGen/CGVTables.cpp +++ lib/CodeGen/CGVTables.cpp @@ -550,7 +550,7 @@ return addOffsetConstant(component.getOffsetToTop()); case VTableComponent::CK_RTTI: - return builder.add(llvm::ConstantExpr::getBitCast(rtti, CGM.Int8PtrTy)); + return builder.add(llvm::ConstantExpr::getPointerCast(rtti, CGM.Int8PtrTy)); case VTableComponent::CK_FunctionPointer: case VTableComponent::CK_CompleteDtorPointer: @@ -594,7 +594,7 @@ llvm::Constant *fn = CGM.CreateRuntimeFunction(fnTy, name); if (auto f = dyn_cast(fn)) f->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - return llvm::ConstantExpr::getBitCast(fn, CGM.Int8PtrTy); + return llvm::ConstantExpr::getPointerCast(fn, CGM.Int8PtrTy); }; llvm::Constant *fnPtr; @@ -628,7 +628,7 @@ fnPtr = CGM.GetAddrOfFunction(GD, fnTy, /*ForVTable=*/true); } - fnPtr = llvm::ConstantExpr::getBitCast(fnPtr, CGM.Int8PtrTy); + fnPtr = llvm::ConstantExpr::getPointerCast(fnPtr, CGM.Int8PtrTy); builder.add(fnPtr); return; } Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -377,7 +377,7 @@ }; /// i32s containing the indexes of the cleanup destinations. - llvm::AllocaInst *NormalCleanupDest; + llvm::Instruction *NormalCleanupDest; unsigned NextCleanupDestIndex; @@ -392,8 +392,8 @@ llvm::Value *ExceptionSlot; /// The selector slot. Under the MandatoryCleanup model, all landing pads - /// write the current selector value into this alloca. - llvm::AllocaInst *EHSelectorSlot; + /// write the current selector value into this instruction. + llvm::Instruction *EHSelectorSlot; /// A stack of exception code slots. Entering an __except block pushes a slot /// on the stack and leaving pops one. The __exception_code() intrinsic loads @@ -428,11 +428,11 @@ /// An i1 variable indicating whether or not the @finally is /// running for an exception. - llvm::AllocaInst *ForEHVar; + llvm::Instruction *ForEHVar; /// An i8* variable into which the exception pointer to rethrow /// has been saved. - llvm::AllocaInst *SavedExnVar; + llvm::Instruction *SavedExnVar; public: void enter(CodeGenFunction &CGF, const Stmt *Finally, @@ -1858,14 +1858,23 @@ AlignmentSource *Source = nullptr); LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy); + /// Create an alloca instruction. If the default address space is not 0, + /// insert addrspacecast instruction which casts the alloca instruction + /// to the default address space. + llvm::Instruction *CreateAlloca(llvm::Type *Ty, const Twine &Name = "tmp", + llvm::Instruction *InsertPos = nullptr); /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. The caller is responsible for setting an appropriate alignment on - /// the alloca. - llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, - const Twine &Name = "tmp"); + /// the alloca. If the default address space is not 0, insert addrspacecast. + llvm::Instruction *CreateTempAlloca(llvm::Type *Ty, + const Twine &Name = "tmp"); Address CreateTempAlloca(llvm::Type *Ty, CharUnits align, const Twine &Name = "tmp"); + /// Get alloca instruction operand of an addrspacecast instruction. + /// If \p Inst is alloca instruction, returns \p Inst; + llvm::AllocaInst *getAddrSpaceCastedAlloca(llvm::Instruction *Inst) const; + /// CreateDefaultAlignedTempAlloca - This creates an alloca with the /// default ABI alignment of the given LLVM type. /// Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -442,7 +442,7 @@ "callsite"); llvm::Value *args[] = { - llvm::ConstantExpr::getBitCast(CurFn, PointerTy), + llvm::ConstantExpr::getPointerCast(CurFn, PointerTy), CallSite }; Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -47,6 +47,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" @@ -90,6 +91,7 @@ VMContext(M.getContext()), Types(*this), VTables(*this), SanitizerMD(new SanitizerMetadata(*this)) { + unsigned DefaultTargetAddressSpace = Target.getDefaultTargetAddressSpace(LangOpts); // Initialize the type cache. llvm::LLVMContext &LLVMContext = M.getContext(); VoidTy = llvm::Type::getVoidTy(LLVMContext); @@ -99,9 +101,9 @@ Int64Ty = llvm::Type::getInt64Ty(LLVMContext); FloatTy = llvm::Type::getFloatTy(LLVMContext); DoubleTy = llvm::Type::getDoubleTy(LLVMContext); - PointerWidthInBits = C.getTargetInfo().getPointerWidth(0); + PointerWidthInBits = C.getTargetInfo().getPointerWidth(DefaultTargetAddressSpace); PointerAlignInBytes = - C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(0)).getQuantity(); + C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(DefaultTargetAddressSpace)).getQuantity(); SizeSizeInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity(); IntAlignInBytes = @@ -109,8 +111,8 @@ IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth()); IntPtrTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getMaxPointerWidth()); - Int8PtrTy = Int8Ty->getPointerTo(0); - Int8PtrPtrTy = Int8PtrTy->getPointerTo(0); + Int8PtrTy = Int8Ty->getPointerTo(DefaultTargetAddressSpace); + Int8PtrPtrTy = Int8PtrTy->getPointerTo(DefaultTargetAddressSpace); RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); BuiltinCC = getTargetCodeGenInfo().getABIInfo().getBuiltinCC(); @@ -750,7 +752,7 @@ ctor.addInt(Int32Ty, I.Priority); ctor.add(llvm::ConstantExpr::getBitCast(I.Initializer, CtorPFTy)); if (I.AssociatedData) - ctor.add(llvm::ConstantExpr::getBitCast(I.AssociatedData, VoidPtrTy)); + ctor.add(llvm::ConstantExpr::getPointerCast(I.AssociatedData, VoidPtrTy)); else ctor.addNullPointer(VoidPtrTy); ctor.finishAndAddTo(ctors); @@ -1418,10 +1420,13 @@ *LineNoCst = EmitAnnotationLineNo(L); // Create the ConstantStruct for the global annotation. + unsigned AS = GV->getType()->getAddressSpace(); + llvm::PointerType *I8PTy = (AS == Int8PtrTy->getAddressSpace()) ? + Int8PtrTy : Int8Ty->getPointerTo(AS); llvm::Constant *Fields[4] = { - llvm::ConstantExpr::getBitCast(GV, Int8PtrTy), - llvm::ConstantExpr::getBitCast(AnnoGV, Int8PtrTy), - llvm::ConstantExpr::getBitCast(UnitGV, Int8PtrTy), + llvm::ConstantExpr::getPointerCast(GV, I8PTy), + llvm::ConstantExpr::getPointerCast(AnnoGV, I8PTy), + llvm::ConstantExpr::getPointerCast(UnitGV, I8PTy), LineNoCst }; return llvm::ConstantStruct::getAnon(Fields); @@ -1548,7 +1553,7 @@ llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee()); if (Entry) { unsigned AS = getContext().getTargetAddressSpace(VD->getType()); - auto Ptr = llvm::ConstantExpr::getBitCast(Entry, DeclTy->getPointerTo(AS)); + auto Ptr = llvm::ConstantExpr::getPointerCast(Entry, DeclTy->getPointerTo(AS)); return ConstantAddress(Ptr, Alignment); } @@ -1900,7 +1905,7 @@ /// GetOrCreateLLVMFunction - If the specified mangled name is not in the /// module, create and return an llvm Function with the specified type. If there /// is something in the module with the specified name, return it potentially -/// bitcasted to the right type. +/// casted to the right type. /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the function when it is first created. @@ -1952,7 +1957,7 @@ // (If function is requested for a definition, we always need to create a new // function, not just return a bitcast.) if (!IsForDefinition) - return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo()); + return llvm::ConstantExpr::getPointerCast(Entry, Ty->getPointerTo()); } // This function doesn't have a complete type (for example, the return @@ -2060,7 +2065,7 @@ } llvm::Type *PTy = llvm::PointerType::getUnqual(Ty); - return llvm::ConstantExpr::getBitCast(F, PTy); + return llvm::ConstantExpr::getPointerCast(F, PTy); } /// GetAddrOfFunction - Return the address of the given function. If Ty is @@ -2189,7 +2194,7 @@ /// GetOrCreateLLVMGlobal - If the specified mangled name is not in the module, /// create and return an llvm GlobalVariable with the specified type. If there /// is something in the module with the specified name, return it potentially -/// bitcasted to the right type. +/// casted to the right type. /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the global when it is first created. @@ -2237,14 +2242,10 @@ } } - // Make sure the result is of the correct type. - if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace()) - return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty); - // (If global is requested for a definition, we always need to create a new // global, not just return a bitcast.) if (!IsForDefinition) - return llvm::ConstantExpr::getBitCast(Entry, Ty); + return llvm::ConstantExpr::getPointerCast(Entry, Ty); } unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace()); @@ -2260,7 +2261,7 @@ if (!Entry->use_empty()) { llvm::Constant *NewPtrForOldDecl = - llvm::ConstantExpr::getBitCast(GV, Entry->getType()); + llvm::ConstantExpr::getPointerCast(GV, Entry->getType()); Entry->replaceAllUsesWith(NewPtrForOldDecl); } @@ -2372,7 +2373,7 @@ if (!OldGV->use_empty()) { llvm::Constant *NewPtrForOldDecl = - llvm::ConstantExpr::getBitCast(GV, OldGV->getType()); + llvm::ConstantExpr::getPointerCast(GV, OldGV->getType()); OldGV->replaceAllUsesWith(NewPtrForOldDecl); } @@ -2621,7 +2622,7 @@ // Replace all uses of the old global with the new global llvm::Constant *NewPtrForOldDecl = - llvm::ConstantExpr::getBitCast(GV, Entry->getType()); + llvm::ConstantExpr::getPointerCast(GV, Entry->getType()); Entry->replaceAllUsesWith(NewPtrForOldDecl); // Erase the old global, since it is no longer used. @@ -3116,7 +3117,7 @@ // Remove it and replace uses of it with the alias. GA->takeName(Entry); - Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GA, + Entry->replaceAllUsesWith(llvm::ConstantExpr::getPointerCast(GA, Entry->getType())); Entry->eraseFromParent(); } else { @@ -3334,7 +3335,7 @@ if (isUTF16) // Cast the UTF16 string to the correct type. - Str = llvm::ConstantExpr::getBitCast(Str, Int8PtrTy); + Str = llvm::ConstantExpr::getPointerCast(Str, Int8PtrTy); Fields.add(Str); // String length. Index: lib/CodeGen/ItaniumCXXABI.cpp =================================================================== --- lib/CodeGen/ItaniumCXXABI.cpp +++ lib/CodeGen/ItaniumCXXABI.cpp @@ -1108,7 +1108,7 @@ if (!Record->hasTrivialDestructor()) { CXXDestructorDecl *DtorD = Record->getDestructor(); Dtor = CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete); - Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy); + Dtor = llvm::ConstantExpr::getPointerCast(Dtor, CGM.Int8PtrTy); } } if (!Dtor) Dtor = llvm::Constant::getNullValue(CGM.Int8PtrTy); @@ -2171,7 +2171,7 @@ llvm::Value *args[] = { llvm::ConstantExpr::getBitCast(dtor, dtorTy), - llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy), + llvm::ConstantExpr::getPointerCast(addr, CGF.Int8PtrTy), handle }; CGF.EmitNounwindRuntimeCall(atexit, args); @@ -2584,7 +2584,7 @@ } } - return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); + return llvm::ConstantExpr::getPointerCast(GV, CGM.Int8PtrTy); } /// TypeInfoIsInStandardLibrary - Given a builtin type, returns whether the type @@ -2913,7 +2913,7 @@ llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2); VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8PtrTy, VTable, Two); - VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy); + VTable = llvm::ConstantExpr::getPointerCast(VTable, CGM.Int8PtrTy); Fields.push_back(VTable); } @@ -2986,7 +2986,7 @@ assert(!OldGV->hasAvailableExternallyLinkage() && "available_externally typeinfos not yet implemented"); - return llvm::ConstantExpr::getBitCast(OldGV, CGM.Int8PtrTy); + return llvm::ConstantExpr::getPointerCast(OldGV, CGM.Int8PtrTy); } // Check if there is already an external RTTI descriptor for this type. @@ -3022,7 +3022,7 @@ TypeNameField = llvm::ConstantExpr::getIntToPtr(TypeNameField, CGM.Int8PtrTy); } else { - TypeNameField = llvm::ConstantExpr::getBitCast(TypeName, CGM.Int8PtrTy); + TypeNameField = llvm::ConstantExpr::getPointerCast(TypeName, CGM.Int8PtrTy); } Fields.push_back(TypeNameField); @@ -3177,7 +3177,7 @@ } } - return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); + return llvm::ConstantExpr::getPointerCast(GV, CGM.Int8PtrTy); } /// BuildObjCObjectTypeInfo - Build the appropriate kind of type_info Index: test/CodeGenCUDA/address-spaces.cu =================================================================== --- test/CodeGenCUDA/address-spaces.cu +++ test/CodeGenCUDA/address-spaces.cu @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck --check-prefixes=NVPTX,CHECK %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck --check-prefixes=AMDGCN,CHECK %s // Verifies Clang emits correct address spaces and addrspacecast instructions // for CUDA code. @@ -8,7 +9,8 @@ // CHECK: @i = addrspace(1) externally_initialized global __device__ int i; -// CHECK: @j = addrspace(4) externally_initialized global +// AMDGCN: @j = addrspace(2) externally_initialized global +// NVPTX: @j = addrspace(4) externally_initialized global __constant__ int j; // CHECK: @k = addrspace(3) global @@ -27,17 +29,21 @@ // CHECK: @b = addrspace(3) global float undef __device__ void foo() { - // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*) + // NVPTX: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*) + // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @i to i32 addrspace(4)*) i++; - // CHECK: load i32, i32* addrspacecast (i32 addrspace(4)* @j to i32*) + // NVPTX: load i32, i32* addrspacecast (i32 addrspace(4)* @j to i32*) + // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(2)* @j to i32 addrspace(4)*) j++; - // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @k to i32*) + // NVPTX: load i32, i32* addrspacecast (i32 addrspace(3)* @k to i32*) + // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @k to i32 addrspace(4)*) k++; __shared__ int lk; - // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*) + // NVPTX: load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*) + // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32 addrspace(4)*) lk++; } @@ -47,8 +53,9 @@ ap->data1 = 1; ap->data2 = 2; } -// CHECK: define void @_Z5func0v() -// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %ap +// CHECK-LABEL: define void @_Z5func0v() +// NVPTX: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %ap +// AMDGCN: store %struct.MyStruct addrspace(4)* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct addrspace(4)*), %struct.MyStruct addrspace(4)* addrspace(4)* %ap __device__ void callee(float *ap) { *ap = 1.0f; @@ -58,37 +65,42 @@ __shared__ float a; callee(&a); // implicit cast from parameters } -// CHECK: define void @_Z5func1v() -// CHECK: call void @_Z6calleePf(float* addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float*)) +// CHECK-LABEL: define void @_Z5func1v() +// NVPTX: call void @_Z6calleePf(float* addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float*)) +// AMDGCN: call void @_Z6calleePf(float addrspace(4)* addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float addrspace(4)*)) __device__ void func2() { __shared__ float a[256]; float *ap = &a[128]; // implicit cast from a decayed array *ap = 1.0f; } -// CHECK: define void @_Z5func2v() -// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i32 128), float** %ap - +// CHECK-LABEL: define void @_Z5func2v() +// NVPTX: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i32 128), float** %ap +// AMDGCN: store float addrspace(4)* getelementptr inbounds ([256 x float], [256 x float] addrspace(4)* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float] addrspace(4)*), i64 0, i64 128), float addrspace(4)* addrspace(4)* %ap __device__ void func3() { __shared__ float a; float *ap = reinterpret_cast(&a); // explicit cast *ap = 1.0f; } -// CHECK: define void @_Z5func3v() -// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %ap +// CHECK-LABEL: define void @_Z5func3v() +// NVPTX: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %ap +// AMDGCN: store float addrspace(4)* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float addrspace(4)*), float addrspace(4)* addrspace(4)* %ap __device__ void func4() { __shared__ float a; float *ap = (float *)&a; // explicit c-style cast *ap = 1.0f; } -// CHECK: define void @_Z5func4v() -// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %ap +// CHECK-LABEL: define void @_Z5func4v() +// NVPTX: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %ap +// AMDGCN: store float addrspace(4)* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float addrspace(4)*), float addrspace(4)* addrspace(4)* %ap __shared__ float b; __device__ float *func5() { return &b; // implicit cast from a return value } -// CHECK: define float* @_Z5func5v() -// CHECK: ret float* addrspacecast (float addrspace(3)* @b to float*) +// NVPTX-LABEL: define float* @_Z5func5v() +// AMDGCN-LABEL: define float addrspace(4)* @_Z5func5v() +// NVPTX: ret float* addrspacecast (float addrspace(3)* @b to float*) +// AMDGCN: ret float addrspace(4)* addrspacecast (float addrspace(3)* @b to float addrspace(4)*) Index: test/CodeGenCUDA/convergent.cu =================================================================== --- test/CodeGenCUDA/convergent.cu +++ test/CodeGenCUDA/convergent.cu @@ -2,6 +2,9 @@ // REQUIRES: nvptx-registered-target // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s -DNVPTX | FileCheck -check-prefixes=DEVICE,NVPTX %s + +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn -emit-llvm \ // RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix DEVICE %s // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ @@ -25,9 +28,11 @@ __host__ __device__ void bar() { // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]] baz(); - // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]] + #ifdef NVPTX + // NVPTX: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]] int x; asm ("trap;" : "=l"(x)); + #endif // DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]] asm volatile ("trap;"); } Index: test/CodeGenCUDA/device-var-init.cu =================================================================== --- test/CodeGenCUDA/device-var-init.cu +++ test/CodeGenCUDA/device-var-init.cu @@ -4,7 +4,10 @@ // variables, but accept empty constructors allowed by CUDA. // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ -// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,NVPTX %s + +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \ +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCN %s #ifdef __clang__ #include "Inputs/cuda.h" @@ -18,7 +21,8 @@ __shared__ int s_v; // CHECK: @s_v = addrspace(3) global i32 undef, __constant__ int c_v; -// CHECK: addrspace(4) externally_initialized global i32 0, +// NVPTX: addrspace(4) externally_initialized global i32 0, +// AMDGCN: addrspace(2) externally_initialized global i32 0, __device__ int d_v_i = 1; // CHECK: @d_v_i = addrspace(1) externally_initialized global i32 1, @@ -29,12 +33,14 @@ __shared__ T s_t; // CHECK: @s_t = addrspace(3) global %struct.T undef, __constant__ T c_t; -// CHECK: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, +// NVPTX: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, +// AMDGCN: @c_t = addrspace(2) externally_initialized global %struct.T zeroinitializer, __device__ T d_t_i = {2}; // CHECK: @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 }, __constant__ T c_t_i = {2}; -// CHECK: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, +// NVPTX: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, +// AMDGCN: @c_t_i = addrspace(2) externally_initialized global %struct.T { i32 2 }, // empty constructor __device__ EC d_ec; @@ -42,7 +48,8 @@ __shared__ EC s_ec; // CHECK: @s_ec = addrspace(3) global %struct.EC undef, __constant__ EC c_ec; -// CHECK: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, +// NVPTX: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, +// AMDGCN: @c_ec = addrspace(2) externally_initialized global %struct.EC zeroinitializer, // empty destructor __device__ ED d_ed; @@ -50,14 +57,16 @@ __shared__ ED s_ed; // CHECK: @s_ed = addrspace(3) global %struct.ED undef, __constant__ ED c_ed; -// CHECK: @c_ed = addrspace(4) externally_initialized global %struct.ED zeroinitializer, +// NVPTX: @c_ed = addrspace(4) externally_initialized global %struct.ED zeroinitializer, +// AMDGCN: @c_ed = addrspace(2) externally_initialized global %struct.ED zeroinitializer, __device__ ECD d_ecd; // CHECK: @d_ecd = addrspace(1) externally_initialized global %struct.ECD zeroinitializer, __shared__ ECD s_ecd; // CHECK: @s_ecd = addrspace(3) global %struct.ECD undef, __constant__ ECD c_ecd; -// CHECK: @c_ecd = addrspace(4) externally_initialized global %struct.ECD zeroinitializer, +// NVPTX: @c_ecd = addrspace(4) externally_initialized global %struct.ECD zeroinitializer, +// AMDGCN: @c_ecd = addrspace(2) externally_initialized global %struct.ECD zeroinitializer, // empty templated constructor -- allowed with no arguments __device__ ETC d_etc; @@ -65,12 +74,14 @@ __shared__ ETC s_etc; // CHECK: @s_etc = addrspace(3) global %struct.ETC undef, __constant__ ETC c_etc; -// CHECK: @c_etc = addrspace(4) externally_initialized global %struct.ETC zeroinitializer, +// NVPTX: @c_etc = addrspace(4) externally_initialized global %struct.ETC zeroinitializer, +// AMDGCN: @c_etc = addrspace(2) externally_initialized global %struct.ETC zeroinitializer, __device__ NCFS d_ncfs; // CHECK: @d_ncfs = addrspace(1) externally_initialized global %struct.NCFS { i32 3 } __constant__ NCFS c_ncfs; -// CHECK: @c_ncfs = addrspace(4) externally_initialized global %struct.NCFS { i32 3 } +// NVPTX: @c_ncfs = addrspace(4) externally_initialized global %struct.NCFS { i32 3 } +// AMDGCN: @c_ncfs = addrspace(2) externally_initialized global %struct.NCFS { i32 3 } // Regular base class -- allowed __device__ T_B_T d_t_b_t; @@ -78,7 +89,8 @@ __shared__ T_B_T s_t_b_t; // CHECK: @s_t_b_t = addrspace(3) global %struct.T_B_T undef, __constant__ T_B_T c_t_b_t; -// CHECK: @c_t_b_t = addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer, +// NVPTX: @c_t_b_t = addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer, +// AMDGCN: @c_t_b_t = addrspace(2) externally_initialized global %struct.T_B_T zeroinitializer, // Incapsulated object of allowed class -- allowed __device__ T_F_T d_t_f_t; @@ -86,7 +98,8 @@ __shared__ T_F_T s_t_f_t; // CHECK: @s_t_f_t = addrspace(3) global %struct.T_F_T undef, __constant__ T_F_T c_t_f_t; -// CHECK: @c_t_f_t = addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer, +// NVPTX: @c_t_f_t = addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer, +// AMDGCN: @c_t_f_t = addrspace(2) externally_initialized global %struct.T_F_T zeroinitializer, // array of allowed objects -- allowed __device__ T_FA_T d_t_fa_t; @@ -94,7 +107,8 @@ __shared__ T_FA_T s_t_fa_t; // CHECK: @s_t_fa_t = addrspace(3) global %struct.T_FA_T undef, __constant__ T_FA_T c_t_fa_t; -// CHECK: @c_t_fa_t = addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer, +// NVPTX: @c_t_fa_t = addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer, +// AMDGCN: @c_t_fa_t = addrspace(2) externally_initialized global %struct.T_FA_T zeroinitializer, // Calling empty base class initializer is OK @@ -103,7 +117,8 @@ __shared__ EC_I_EC s_ec_i_ec; // CHECK: @s_ec_i_ec = addrspace(3) global %struct.EC_I_EC undef, __constant__ EC_I_EC c_ec_i_ec; -// CHECK: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer, +// NVPTX: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer, +// AMDGCN: @c_ec_i_ec = addrspace(2) externally_initialized global %struct.EC_I_EC zeroinitializer, // We should not emit global initializers for device-side variables. // CHECK-NOT: @__cxx_global_var_init @@ -114,41 +129,53 @@ T t; // CHECK-NOT: call EC ec; - // CHECK: call void @_ZN2ECC1Ev(%struct.EC* %ec) + // NVPTX: call void @_ZN2ECC1Ev(%struct.EC* %ec) + // AMDGCN: call void @_ZN2ECC1Ev(%struct.EC addrspace(4)* %ec) ED ed; // CHECK-NOT: call ECD ecd; - // CHECK: call void @_ZN3ECDC1Ev(%struct.ECD* %ecd) + // NVPTX: call void @_ZN3ECDC1Ev(%struct.ECD* %ecd) + // AMDGCN: call void @_ZN3ECDC1Ev(%struct.ECD addrspace(4)* %ecd) ETC etc; - // CHECK: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc) + // NVPTX: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc) + // AMDGCN: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC addrspace(4)* %etc) UC uc; // undefined constructor -- not allowed - // CHECK: call void @_ZN2UCC1Ev(%struct.UC* %uc) + // NVPTX: call void @_ZN2UCC1Ev(%struct.UC* %uc) + // AMDGCN: call void @_ZN2UCC1Ev(%struct.UC addrspace(4)* %uc) UD ud; // undefined destructor -- not allowed // CHECK-NOT: call ECI eci; // empty constructor w/ initializer list -- not allowed - // CHECK: call void @_ZN3ECIC1Ev(%struct.ECI* %eci) + // NVPTX: call void @_ZN3ECIC1Ev(%struct.ECI* %eci) + // AMDGCN: call void @_ZN3ECIC1Ev(%struct.ECI addrspace(4)* %eci) NEC nec; // non-empty constructor -- not allowed - // CHECK: call void @_ZN3NECC1Ev(%struct.NEC* %nec) + // NVPTX: call void @_ZN3NECC1Ev(%struct.NEC* %nec) + // AMDGCN: call void @_ZN3NECC1Ev(%struct.NEC addrspace(4)* %nec) // non-empty destructor -- not allowed NED ned; // no-constructor, virtual method -- not allowed - // CHECK: call void @_ZN3NCVC1Ev(%struct.NCV* %ncv) + // NVPTX: call void @_ZN3NCVC1Ev(%struct.NCV* %ncv) + // AMDGCN: call void @_ZN3NCVC1Ev(%struct.NCV addrspace(4)* %ncv) NCV ncv; // CHECK-NOT: call VD vd; - // CHECK: call void @_ZN2VDC1Ev(%struct.VD* %vd) + // NVPTX: call void @_ZN2VDC1Ev(%struct.VD* %vd) + // AMDGCN: call void @_ZN2VDC1Ev(%struct.VD addrspace(4)* %vd) NCF ncf; - // CHECK: call void @_ZN3NCFC1Ev(%struct.NCF* %ncf) + // NVPTX: call void @_ZN3NCFC1Ev(%struct.NCF* %ncf) + // AMDGCN: call void @_ZN3NCFC1Ev(%struct.NCF addrspace(4)* %ncf) NCFS ncfs; - // CHECK: call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs) + // NVPTX: call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs) + // AMDGCN: call void @_ZN4NCFSC1Ev(%struct.NCFS addrspace(4)* %ncfs) UTC utc; - // CHECK: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc) + // NVPTX: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc) + // AMDGCN: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC addrspace(4)* %utc) NETC netc; - // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc) + // NVPTX: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc) + // AMDGCN: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC addrspace(4)* %netc) T_B_T t_b_t; // CHECK-NOT: call T_F_T t_f_t; @@ -156,17 +183,23 @@ T_FA_T t_fa_t; // CHECK-NOT: call EC_I_EC ec_i_ec; - // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec) + // NVPTX: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec) + // AMDGCN: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC addrspace(4)* %ec_i_ec) EC_I_EC1 ec_i_ec1; - // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1) + // NVPTX: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1) + // AMDGCN: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1 addrspace(4)* %ec_i_ec1) T_V_T t_v_t; - // CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) + // NVPTX: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) + // AMDGCN: call void @_ZN5T_V_TC1Ev(%struct.T_V_T addrspace(4)* %t_v_t) T_B_NEC t_b_nec; - // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) + // NVPTX: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) + // AMDGCN: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC addrspace(4)* %t_b_nec) T_F_NEC t_f_nec; - // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) + // NVPTX: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) + // AMDGCN: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC addrspace(4)* %t_f_nec) T_FA_NEC t_fa_nec; - // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec) + // NVPTX: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec) + // AMDGCN: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC addrspace(4)* %t_fa_nec) T_B_NED t_b_ned; // CHECK-NOT: call T_F_NED t_f_ned; @@ -174,22 +207,33 @@ T_FA_NED t_fa_ned; // CHECK-NOT: call static __shared__ EC s_ec; - // CHECK-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*)) + // NVPTX-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*)) + // AMDGCN-NOT: call void @_ZN2ECC1Ev(%struct.EC addrspace(4)* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC addrspace(4)*)) static __shared__ ETC s_etc; - // CHECK-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*)) + // NVPTX-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*)) + // AMDGCN-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC addrspace(4)* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC addrspace(4)*)) // anchor point separating constructors and destructors df(); // CHECK: call void @_Z2dfv() // Verify that we only call non-empty destructors - // CHECK-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned) - // CHECK-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned) - // CHECK-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned) - // CHECK-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd) - // CHECK-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned) - // CHECK-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud) - // CHECK-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd) - // CHECK-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed) + // NVPTX-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned) + // NVPTX-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned) + // NVPTX-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned) + // NVPTX-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd) + // NVPTX-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned) + // NVPTX-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud) + // NVPTX-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd) + // NVPTX-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed) + + // AMDGCN-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED addrspace(4)* %t_fa_ned) + // AMDGCN-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED addrspace(4)* %t_f_ned) + // AMDGCN-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED addrspace(4)* %t_b_ned) + // AMDGCN-NEXT: call void @_ZN2VDD1Ev(%struct.VD addrspace(4)* %vd) + // AMDGCN-NEXT: call void @_ZN3NEDD1Ev(%struct.NED addrspace(4)* %ned) + // AMDGCN-NEXT: call void @_ZN2UDD1Ev(%struct.UD addrspace(4)* %ud) + // AMDGCN-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD addrspace(4)* %ecd) + // AMDGCN-NEXT: call void @_ZN2EDD1Ev(%struct.ED addrspace(4)* %ed) // CHECK-NEXT: ret void } Index: test/CodeGenCUDA/device-vtable.cu =================================================================== --- test/CodeGenCUDA/device-vtable.cu +++ test/CodeGenCUDA/device-vtable.cu @@ -10,6 +10,8 @@ // RUN: | FileCheck %s -check-prefix=CHECK-HOST -check-prefix=CHECK-BOTH // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s \ // RUN: | FileCheck %s -check-prefix=CHECK-DEVICE -check-prefix=CHECK-BOTH +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -o - %s \ +// RUN: | FileCheck %s -check-prefix=CHECK-DEVICE -check-prefix=CHECK-BOTH #include "Inputs/cuda.h" Index: test/CodeGenCUDA/filter-decl.cu =================================================================== --- test/CodeGenCUDA/filter-decl.cu +++ test/CodeGenCUDA/filter-decl.cu @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-HOST %s -// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - -fcuda-is-device | FileCheck -check-prefix=CHECK-DEVICE %s +// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - -fcuda-is-device | FileCheck -check-prefixes=CHECK-DEVICE,ITANIUM %s +// RUN: %clang_cc1 -triple amdgcn -emit-llvm %s -o - -fcuda-is-device | FileCheck -check-prefixes=CHECK-DEVICE,AMDGCN %s #include "Inputs/cuda.h" @@ -10,15 +11,18 @@ __asm__("file scope asm is host only"); // CHECK-HOST: constantdata = internal global -// CHECK-DEVICE: constantdata = externally_initialized global +// ITANIUM: constantdata = externally_initialized global +// AMDGCN: constantdata = addrspace(2) externally_initialized global __constant__ char constantdata[256]; // CHECK-HOST: devicedata = internal global -// CHECK-DEVICE: devicedata = externally_initialized global +// ITANIUM: devicedata = externally_initialized global +// AMDGCN: devicedata = addrspace(1) externally_initialized global __device__ char devicedata[256]; // CHECK-HOST: shareddata = internal global -// CHECK-DEVICE: shareddata = global +// ITANIUM: shareddata = global +// AMDGCN: shareddata = addrspace(3) global __shared__ char shareddata[256]; // CHECK-HOST: hostdata = global Index: test/CodeGenCUDA/function-overload.cu =================================================================== --- test/CodeGenCUDA/function-overload.cu +++ test/CodeGenCUDA/function-overload.cu @@ -8,6 +8,8 @@ // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s #include "Inputs/cuda.h" Index: test/CodeGenCUDA/kernel-args-alignment.cu =================================================================== --- test/CodeGenCUDA/kernel-args-alignment.cu +++ test/CodeGenCUDA/kernel-args-alignment.cu @@ -1,8 +1,11 @@ // RUN: %clang_cc1 --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \ -// RUN: FileCheck -check-prefix HOST -check-prefix CHECK %s +// RUN: FileCheck -check-prefixes=HOST,CHECK %s // RUN: %clang_cc1 --std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda \ -// RUN: -emit-llvm -o - %s | FileCheck -check-prefix DEVICE -check-prefix CHECK %s +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,CHECK,NVPTX %s + +// RUN: %clang_cc1 --std=c++11 -fcuda-is-device -triple amdgcn-amd-amdhsa \ +// RUN: -emit-llvm -o - %s -DAMDGCN| FileCheck -check-prefixes=DEVICE,CHECK,AMDGCN %s #include "Inputs/cuda.h" @@ -18,9 +21,17 @@ // Clang should generate a packed LLVM struct for S (denoted by the <>s), // otherwise this test isn't interesting. -// CHECK: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }> - +// HOST: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }> +// NVPTX: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }> +// ToDo: Fix padding on amdgcn target to be the same as host +// AMDGCN: %struct.S = type <{ i32 addrspace(4)*, i8, %struct.U, i8 }> + +// ToDo: Fix struct padding on amdgcn so that alignof(S) == 8 +#ifdef AMDGCN +static_assert(alignof(S) == 4, "Unexpected alignment."); +#else static_assert(alignof(S) == 8, "Unexpected alignment."); +#endif // HOST-LABEL: @_Z6kernelc1SPi // Marshalled kernel args should be: @@ -32,5 +43,7 @@ // HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24) // DEVICE-LABEL: @_Z6kernelc1SPi -// DEVICE-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32* +// NVPTX-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32* +// ToDo: Fix amdgcn so that align of struct.S is 8 +// AMDGCN-SAME: i8{{[^,]*}}, %struct.S* byval align 4{{[^,]*}}, i32 addrspace(4)* __global__ void kernel(char a, S s, int *b) {} Index: test/OpenMP/nvptx_parallel_codegen.cpp =================================================================== --- test/OpenMP/nvptx_parallel_codegen.cpp +++ test/OpenMP/nvptx_parallel_codegen.cpp @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=amdgcn -emit-llvm-bc %s -o %t-x86-host.bc // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // expected-no-diagnostics