Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -200,7 +200,7 @@ // always initialized. bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); - if (TT.getOS() == Triple::AMDHSA) { + if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) { Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); for (Function &F : M) { Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -119,7 +119,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const AMDGPUSubtarget &STM = MF->getSubtarget(); SIProgramInfo KernelInfo; - if (STM.isAmdHsaOS()) { + if (STM.isAmdCodeObjectV2()) { getSIProgramInfo(KernelInfo, *MF); EmitAmdKernelCodeT(*MF, KernelInfo); } @@ -128,7 +128,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo(); const AMDGPUSubtarget &STM = MF->getSubtarget(); - if (MFI->isKernel() && STM.isAmdHsaOS()) { + if (MFI->isKernel() && STM.isAmdCodeObjectV2()) { AMDGPUTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); TS->EmitAMDGPUSymbolType(CurrentFnSym->getName(), Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -131,6 +131,10 @@ return TargetTriple.getOS() == Triple::AMDHSA; } + bool isMesa3DOS() const { + return TargetTriple.getOS() == Triple::Mesa3D; + } + Generation getGeneration() const { return Gen; } @@ -263,6 +267,10 @@ return EnableXNACK; } + bool isAmdCodeObjectV2() const { + return isAmdHsaOS() || isMesa3DOS(); + } + unsigned getMaxWavesPerCU() const { if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) return 10; @@ -274,7 +282,7 @@ /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset() const { - return isAmdHsaOS() ? 0 : 36; + return isAmdCodeObjectV2() ? 0 : 36; } unsigned getStackAlignment() const { Index: lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIFrameLowering.cpp +++ lib/Target/AMDGPU/SIFrameLowering.cpp @@ -76,7 +76,7 @@ MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; - if (ST.isAmdHsaOS()) { + if (ST.isAmdCodeObjectV2()) { PreloadedPrivateBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); } @@ -136,7 +136,7 @@ MRI.addLiveIn(PreloadedScratchWaveOffsetReg); MBB.addLiveIn(PreloadedScratchWaveOffsetReg); - if (ST.isAmdHsaOS()) { + if (ST.isAmdCodeObjectV2()) { MRI.addLiveIn(PreloadedPrivateBufferReg); MBB.addLiveIn(PreloadedPrivateBufferReg); } @@ -218,7 +218,7 @@ .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); } - if (ST.isAmdHsaOS()) { + if (ST.isAmdCodeObjectV2()) { // Insert copies from argument register. assert( !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchRsrcReg) && Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -863,12 +863,12 @@ if (HasStackObjects) Info->setHasNonSpillStackObjects(true); - if (ST.isAmdHsaOS()) { + if (ST.isAmdCodeObjectV2()) { // TODO: Assume we will spill without optimizations. if (HasStackObjects) { // If we have stack objects, we unquestionably need the private buffer - // resource. For the HSA ABI, this will be the first 4 user SGPR - // inputs. We can reserve those and use them directly. + // resource. For the Code Object V2 ABI, this will be the first 4 user + // SGPR inputs. We can reserve those and use them directly. unsigned PrivateSegmentBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); @@ -1970,7 +1970,7 @@ switch (IntrinsicID) { case Intrinsic::amdgcn_dispatch_ptr: case Intrinsic::amdgcn_queue_ptr: { - if (!Subtarget->isAmdHsaOS()) { + if (!Subtarget->isAmdCodeObjectV2()) { DiagnosticInfoUnsupported BadIntrin( *MF.getFunction(), "unsupported hsa intrinsic without hsa target", DL.getDebugLoc()); Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -118,7 +118,7 @@ if (HasStackObjects || MaySpill) PrivateSegmentWaveByteOffset = true; - if (ST.isAmdHsaOS()) { + if (ST.isAmdCodeObjectV2()) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -924,7 +924,8 @@ case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: - assert(ST.isAmdHsaOS() && "Non-HSA ABI currently uses relocations"); + assert(ST.isAmdCodeObjectV2() && + "Non-CodeObjectV2 ABI currently uses relocations"); assert(MFI->hasPrivateSegmentBuffer()); return MFI->PrivateSegmentBufferUserSGPR; case SIRegisterInfo::KERNARG_SEGMENT_PTR: Index: test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -1,11 +1,12 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefix=MESA -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CO-V2 -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefix=CO-V2 -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefix=OS-UNKNOWN -check-prefix=ALL %s ; ALL-LABEL: {{^}}test: -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: s_load_dword s{{[0-9]+}}, s[4:5], 0xa +; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 +; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa -; MESA: s_load_dword s{{[0-9]+}}, s[0:1], 0xa +; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa define void @test(i32 addrspace(1)* %out) #1 { %kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() %header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)* @@ -17,7 +18,7 @@ ; ALL-LABEL: {{^}}test_implicit: ; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15 -; MESA: s_load_dword s{{[0-9]+}}, s[0:1], 0x15 +; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15 define void @test_implicit(i32 addrspace(1)* %out) #1 { %implicitarg.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() %header.ptr = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)* Index: test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll @@ -1,7 +1,9 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=CI-HSA %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=VI-HSA %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=SI-MESA %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=VI-MESA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=CI-HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=VI-HSA %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=UNKNOWN-OS -check-prefix=SI-MESA %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=UNKNOWN-OS -check-prefix=VI-MESA %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,SI-MESA %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,VI-MESA %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -9,25 +11,25 @@ ; ALL-LABEL {{^}}test_workgroup_id_x: -; HSA: .amd_kernel_code_t -; HSA: compute_pgm_rsrc2_user_sgpr = 6 -; HSA: compute_pgm_rsrc2_tgid_x_en = 1 -; HSA: compute_pgm_rsrc2_tgid_y_en = 0 -; HSA: compute_pgm_rsrc2_tgid_z_en = 0 -; HSA: compute_pgm_rsrc2_tg_size_en = 0 -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 -; HSA: enable_sgpr_grid_workgroup_count_x = 0 -; HSA: enable_sgpr_grid_workgroup_count_y = 0 -; HSA: enable_sgpr_grid_workgroup_count_z = 0 -; HSA: .end_amd_kernel_code_t +; CO-V2: .amd_kernel_code_t +; CO-V2: compute_pgm_rsrc2_user_sgpr = 6 +; CO-V2: compute_pgm_rsrc2_tgid_x_en = 1 +; CO-V2: compute_pgm_rsrc2_tgid_y_en = 0 +; CO-V2: compute_pgm_rsrc2_tgid_z_en = 0 +; CO-V2: compute_pgm_rsrc2_tg_size_en = 0 +; CO-V2: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 +; CO-V2: .end_amd_kernel_code_t -; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s2{{$}} -; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}} +; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s2{{$}} +; CO-V2: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}} ; ALL-NOT: [[VCOPY]] ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 ; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 @@ -40,22 +42,22 @@ } ; ALL-LABEL {{^}}test_workgroup_id_y: -; HSA: compute_pgm_rsrc2_user_sgpr = 6 -; HSA: compute_pgm_rsrc2_tgid_x_en = 1 -; HSA: compute_pgm_rsrc2_tgid_y_en = 1 -; HSA: compute_pgm_rsrc2_tgid_z_en = 0 -; HSA: compute_pgm_rsrc2_tg_size_en = 0 -; HSA: enable_sgpr_grid_workgroup_count_x = 0 -; HSA: enable_sgpr_grid_workgroup_count_y = 0 -; HSA: enable_sgpr_grid_workgroup_count_z = 0 +; CO-V2: compute_pgm_rsrc2_user_sgpr = 6 +; CO-V2: compute_pgm_rsrc2_tgid_x_en = 1 +; CO-V2: compute_pgm_rsrc2_tgid_y_en = 1 +; CO-V2: compute_pgm_rsrc2_tgid_z_en = 0 +; CO-V2: compute_pgm_rsrc2_tg_size_en = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 -; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} -; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} +; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} +; CO-V2: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} ; ALL-NOT: [[VCOPY]] ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 ; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 @@ -68,30 +70,30 @@ } ; ALL-LABEL {{^}}test_workgroup_id_z: -; HSA: compute_pgm_rsrc2_user_sgpr = 6 -; HSA: compute_pgm_rsrc2_tgid_x_en = 1 -; HSA: compute_pgm_rsrc2_tgid_y_en = 0 -; HSA: compute_pgm_rsrc2_tgid_z_en = 1 -; HSA: compute_pgm_rsrc2_tg_size_en = 0 -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 -; HSA: enable_sgpr_private_segment_buffer = 1 -; HSA: enable_sgpr_dispatch_ptr = 0 -; HSA: enable_sgpr_queue_ptr = 0 -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: enable_sgpr_dispatch_id = 0 -; HSA: enable_sgpr_flat_scratch_init = 0 -; HSA: enable_sgpr_private_segment_size = 0 -; HSA: enable_sgpr_grid_workgroup_count_x = 0 -; HSA: enable_sgpr_grid_workgroup_count_y = 0 -; HSA: enable_sgpr_grid_workgroup_count_z = 0 +; CO-V2: compute_pgm_rsrc2_user_sgpr = 6 +; CO-V2: compute_pgm_rsrc2_tgid_x_en = 1 +; CO-V2: compute_pgm_rsrc2_tgid_y_en = 0 +; CO-V2: compute_pgm_rsrc2_tgid_z_en = 1 +; CO-V2: compute_pgm_rsrc2_tg_size_en = 0 +; CO-V2: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; CO-V2: enable_sgpr_private_segment_buffer = 1 +; CO-V2: enable_sgpr_dispatch_ptr = 0 +; CO-V2: enable_sgpr_queue_ptr = 0 +; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 +; CO-V2: enable_sgpr_dispatch_id = 0 +; CO-V2: enable_sgpr_flat_scratch_init = 0 +; CO-V2: enable_sgpr_private_segment_size = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_x = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_y = 0 +; CO-V2: enable_sgpr_grid_workgroup_count_z = 0 -; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} -; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} +; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} +; CO-V2: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} ; ALL-NOT: [[VCOPY]] ; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] -; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6 ; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll @@ -1,7 +1,9 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=CI-HSA %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=VI-HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=CI-HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=VI-HSA %s ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=SI-MESA %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=VI-MESA %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,SI-MESA %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,VI-MESA %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 @@ -12,7 +14,7 @@ ; MESA-NEXT: .long 132{{$}} ; ALL-LABEL {{^}}test_workitem_id_x: -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; CO-V2: compute_pgm_rsrc2_tidig_comp_cnt = 0 ; ALL-NOT: v0 ; ALL: {{buffer|flat}}_store_dword {{.*}}v0 @@ -27,7 +29,7 @@ ; MESA-NEXT: .long 2180{{$}} ; ALL-LABEL {{^}}test_workitem_id_y: -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1 +; CO-V2: compute_pgm_rsrc2_tidig_comp_cnt = 1 ; ALL-NOT: v1 ; ALL: {{buffer|flat}}_store_dword {{.*}}v1 @@ -42,7 +44,7 @@ ; MESA-NEXT: .long 4228{{$}} ; ALL-LABEL {{^}}test_workitem_id_z: -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2 +; CO-V2: compute_pgm_rsrc2_tidig_comp_cnt = 2 ; ALL-NOT: v2 ; ALL: {{buffer|flat}}_store_dword {{.*}}v2