Index: lib/Support/TargetParser.cpp =================================================================== --- lib/Support/TargetParser.cpp +++ lib/Support/TargetParser.cpp @@ -169,12 +169,14 @@ } AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { - if (GPU == "generic") - return {7, 0, 0}; - AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); - if (AK == AMDGPU::GPUKind::GK_NONE) + if (AK == AMDGPU::GPUKind::GK_NONE) { + if (GPU == "generic-hsa") + return {7, 0, 0}; + if (GPU == "generic") + return {6, 0, 0}; return {0, 0, 0}; + } switch (AK) { case GK_GFX600: return {6, 0, 0}; Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -80,7 +80,7 @@ SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. - FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; + FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; // FIXME: I don't think think Evergreen has any useful support for // denormals, but should be checked. Should we issue a warning somewhere @@ -155,7 +155,7 @@ AMDGPUGenSubtargetInfo(TT, GPU, FS), AMDGPUSubtarget(TT), TargetTriple(TT), - Gen(SOUTHERN_ISLANDS), + Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS), InstrItins(getInstrItineraryForCPU(GPU)), LDSBankCount(0), MaxPrivateElementSize(0), Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -306,8 +306,9 @@ if (!GPU.empty()) return GPU; + // Need to default to a target with flat support for HSA. if (TT.getArch() == Triple::amdgcn) - return "generic"; + return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic"; return "r600"; } Index: lib/Target/AMDGPU/GCNProcessors.td =================================================================== --- lib/Target/AMDGPU/GCNProcessors.td +++ lib/Target/AMDGPU/GCNProcessors.td @@ -12,6 +12,10 @@ [FeatureWavefrontSize64] >; +def : ProcessorModel<"generic-hsa", NoSchedModel, + [FeatureWavefrontSize64, FeatureFlatAddressSpace] +>; + //===------------------------------------------------------------===// // GCN GFX6 (Southern Islands (SI)). //===------------------------------------------------------------===// Index: test/Analysis/CostModel/AMDGPU/fdiv.ll =================================================================== --- test/Analysis/CostModel/AMDGPU/fdiv.ll +++ test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -1,7 +1,7 @@ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=verde -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,SLOWFP32DENORMS,NOFP16,NOFP16-FP32DENORM %s ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,FASTFP32DENORMS,FP16 %s Index: test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll @@ -0,0 +1,14 @@ +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s + +; Flat instructions should not select if the target device doesn't +; support them. The default device should be able to select for HSA. + +; ERROR: LLVM ERROR: Cannot select: t{{[0-9]+}}: i32,ch = load<(volatile load 4 from %ir.flat.ptr.load)> +define amdgpu_kernel void @load_flat_i32(i32* %flat.ptr) { + %load = load volatile i32, i32* %flat.ptr, align 4 + ret void +} Index: test/CodeGen/AMDGPU/lower-kernargs.ll =================================================================== --- test/CodeGen/AMDGPU/lower-kernargs.ll +++ test/CodeGen/AMDGPU/lower-kernargs.ll @@ -531,7 +531,10 @@ define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 { ; HSA-LABEL: @kern_lds_ptr_si( ; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() -; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 +; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_SI_KERNARG_SEGMENT]], i64 0 +; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)* +; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 +; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_lds_ptr_si(