Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -118,10 +118,12 @@ return; if (TM.getTargetTriple().getOS() != Triple::AMDHSA && + TM.getTargetTriple().getOS() != Triple::CUDA && TM.getTargetTriple().getOS() != Triple::AMDPAL) return; - if (TM.getTargetTriple().getOS() == Triple::AMDHSA) + if (TM.getTargetTriple().getOS() == Triple::AMDHSA || + TM.getTargetTriple().getOS() == Triple::CUDA) HSAMetadataStream.begin(M); if (TM.getTargetTriple().getOS() == Triple::AMDPAL) @@ -132,7 +134,8 @@ return; // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2. - if (TM.getTargetTriple().getOS() == Triple::AMDHSA) + if (TM.getTargetTriple().getOS() == Triple::AMDHSA || + TM.getTargetTriple().getOS() == Triple::CUDA) getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2. @@ -156,7 +159,8 @@ getTargetStreamer()->EmitISAVersion(ISAVersionStream.str()); // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA). - if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { + if (TM.getTargetTriple().getOS() == Triple::AMDHSA || + TM.getTargetTriple().getOS() == Triple::CUDA) { HSAMetadataStream.end(); getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata()); } @@ -202,7 +206,8 @@ getTargetStreamer()->EmitAMDKernelCodeT(KernelCode); } - if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + if (TM.getTargetTriple().getOS() != Triple::AMDHSA && + TM.getTargetTriple().getOS() != Triple::CUDA) return; HSAMetadataStream.emitKernel(MF->getFunction(), Index: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -145,7 +145,7 @@ const Triple &TT = TM->getTargetTriple(); IsAMDGCN = TT.getArch() == Triple::amdgcn; - IsAMDHSA = TT.getOS() == Triple::AMDHSA; + IsAMDHSA = (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::CUDA); const AMDGPUSubtarget &ST = TM->getSubtarget(F); if (!ST.isPromoteAllocaEnabled()) Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -205,7 +205,8 @@ void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool isAmdHsaOS() const { - return TargetTriple.getOS() == Triple::AMDHSA; + return (TargetTriple.getOS() == Triple::AMDHSA || + TargetTriple.getOS() == Triple::CUDA); } bool isMesa3DOS() const { Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -270,7 +270,7 @@ // 32-bit private, local, and region pointers. 64-bit global, constant and // flat. - if (TT.getEnvironmentName() == "amdgiz" || + if (TT.getOS() == Triple::CUDA || TT.getEnvironmentName() == "amdgiz" || TT.getEnvironmentName() == "amdgizcl") return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2599,7 +2599,8 @@ } bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { - if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { + if ((getSTI().getTargetTriple().getOS() != Triple::AMDHSA) && + (getSTI().getTargetTriple().getOS() != Triple::CUDA)) { return Error(getParser().getTok().getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is " "not available on non-amdhsa OSes")).str()); Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -171,11 +171,13 @@ uint8_t OSABI = ELF::ELFOSABI_NONE; public: - ELFAMDGPUAsmBackend(const Target &T, const Triple &TT) : - AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn), - HasRelocationAddend(TT.getOS() == Triple::AMDHSA) { + ELFAMDGPUAsmBackend(const Target &T, const Triple &TT) + : AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn), + HasRelocationAddend((TT.getOS() == Triple::AMDHSA) || + (TT.getOS() == Triple::CUDA)) { switch (TT.getOS()) { case Triple::AMDHSA: + case Triple::CUDA: OSABI = ELF::ELFOSABI_AMDGPU_HSA; break; case Triple::AMDPAL: Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -5794,6 +5794,7 @@ return SplitVectorStore(Op, DAG); } else { llvm_unreachable("unhandled address space"); + printf("------> unhandled address space %d\n", AS); } } Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -451,7 +451,7 @@ } bool shouldEmitConstantsToTextSection(const Triple &TT) { - return TT.getOS() != Triple::AMDHSA; + return (TT.getOS() != Triple::AMDHSA && TT.getOS() != Triple::CUDA); } int getIntegerAttribute(const Function &F, StringRef Name, int Default) { @@ -937,7 +937,7 @@ AMDGPUAS getAMDGPUAS(Triple T) { auto Env = T.getEnvironmentName(); AMDGPUAS AS; - if (Env == "amdgiz" || Env == "amdgizcl") { + if (Env == "amdgiz" || Env == "amdgizcl" || (T.getOS() == Triple::CUDA)) { AS.FLAT_ADDRESS = 0; AS.PRIVATE_ADDRESS = 5; AS.REGION_ADDRESS = 4;