diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1570,6 +1570,10 @@ Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">, AssemblerPredicate<(all_of FeatureGFX11Insts)>; +def HasGFX11Insts : + Predicate<"Subtarget->hasGFX11Insts()">, + AssemblerPredicate<(all_of FeatureGFX11Insts)>; + def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -854,6 +854,8 @@ /// \returns true if the subtarget has the v_permlane64_b32 instruction. bool hasPermLane64() const { return getGeneration() >= GFX11; } + bool hasGFX11Insts() const { return GFX11Insts; } + bool hasDPP() const { return HasDPP; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7998,6 +7998,12 @@ MCOp = NMCOp; } + if (ST.hasGFX11Insts()) { + uint16_t NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX11); + if (NMCOp != (uint16_t)-1) + MCOp = NMCOp; + } + // (uint16_t)-1 means that Opcode is a pseudo instruction that has // no encoding in the given subtarget generation. if (MCOp == (uint16_t)-1) diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -385,7 +385,7 @@ } // End Uses = [M0] } // End SubtargetPredicate = isGFX10Plus -let SubtargetPredicate = isGFX11Plus in { +let SubtargetPredicate = HasGFX11Insts in { let hasSideEffects = 1 in { // For s_sendmsg_rtn_* the src0 field encodes the message type directly; it // is not an SGPR number. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -37,28 +37,28 @@ // Disable lint checking for this block since it makes the table unreadable. // NOLINTBEGIN const CustomOperand Msg[] = { - {{""}}, - {{"MSG_INTERRUPT"}, ID_INTERRUPT}, - {{"MSG_GS"}, ID_GS_PreGFX11, isNotGFX11Plus}, - {{"MSG_GS_DONE"}, ID_GS_DONE_PreGFX11, isNotGFX11Plus}, - {{"MSG_SAVEWAVE"}, ID_SAVEWAVE, isGFX8_GFX9_GFX10}, - {{"MSG_STALL_WAVE_GEN"}, ID_STALL_WAVE_GEN, isGFX9Plus}, - {{"MSG_HALT_WAVES"}, ID_HALT_WAVES, isGFX9Plus}, - {{"MSG_ORDERED_PS_DONE"}, ID_ORDERED_PS_DONE, isGFX9Plus}, - {{"MSG_EARLY_PRIM_DEALLOC"}, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10}, - {{"MSG_GS_ALLOC_REQ"}, ID_GS_ALLOC_REQ, isGFX9Plus}, - {{"MSG_GET_DOORBELL"}, ID_GET_DOORBELL, isGFX9_GFX10}, - {{"MSG_GET_DDID"}, ID_GET_DDID, isGFX10}, - {{"MSG_HS_TESSFACTOR"}, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus}, - {{"MSG_DEALLOC_VGPRS"}, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus}, - {{""}}, - {{"MSG_SYSMSG"}, ID_SYSMSG}, - {{"MSG_RTN_GET_DOORBELL"}, ID_RTN_GET_DOORBELL, isGFX11Plus}, - {{"MSG_RTN_GET_DDID"}, ID_RTN_GET_DDID, isGFX11Plus}, - {{"MSG_RTN_GET_TMA"}, ID_RTN_GET_TMA, isGFX11Plus}, - {{"MSG_RTN_GET_REALTIME"}, ID_RTN_GET_REALTIME, isGFX11Plus}, - {{"MSG_RTN_SAVE_WAVE"}, ID_RTN_SAVE_WAVE, isGFX11Plus}, - {{"MSG_RTN_GET_TBA"}, ID_RTN_GET_TBA, isGFX11Plus}, + {{""}}, + {{"MSG_INTERRUPT"}, ID_INTERRUPT}, + {{"MSG_GS"}, ID_GS_PreGFX11, isNotGFX11Plus}, + {{"MSG_GS_DONE"}, ID_GS_DONE_PreGFX11, isNotGFX11Plus}, + {{"MSG_SAVEWAVE"}, ID_SAVEWAVE, isGFX8_GFX9_GFX10}, + {{"MSG_STALL_WAVE_GEN"}, ID_STALL_WAVE_GEN, isGFX9Plus}, + {{"MSG_HALT_WAVES"}, ID_HALT_WAVES, isGFX9Plus}, + {{"MSG_ORDERED_PS_DONE"}, ID_ORDERED_PS_DONE, isGFX9Plus}, + {{"MSG_EARLY_PRIM_DEALLOC"}, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10}, + {{"MSG_GS_ALLOC_REQ"}, ID_GS_ALLOC_REQ, isGFX9Plus}, + {{"MSG_GET_DOORBELL"}, ID_GET_DOORBELL, isGFX9_GFX10}, + {{"MSG_GET_DDID"}, ID_GET_DDID, isGFX10}, + {{"MSG_HS_TESSFACTOR"}, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus}, + {{"MSG_DEALLOC_VGPRS"}, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus}, + {{""}}, + {{"MSG_SYSMSG"}, ID_SYSMSG}, + {{"MSG_RTN_GET_DOORBELL"}, ID_RTN_GET_DOORBELL, hasGFX11Insts}, + {{"MSG_RTN_GET_DDID"}, ID_RTN_GET_DDID, hasGFX11Insts}, + {{"MSG_RTN_GET_TMA"}, ID_RTN_GET_TMA, hasGFX11Insts}, + {{"MSG_RTN_GET_REALTIME"}, ID_RTN_GET_REALTIME, hasGFX11Insts}, + {{"MSG_RTN_SAVE_WAVE"}, ID_RTN_SAVE_WAVE, hasGFX11Insts}, + {{"MSG_RTN_GET_TBA"}, ID_RTN_GET_TBA, hasGFX11Insts}, }; // NOLINTEND diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1031,6 +1031,7 @@ bool isGFX10Before1030(const MCSubtargetInfo &STI); bool isGFX11(const MCSubtargetInfo &STI); bool isGFX11Plus(const MCSubtargetInfo &STI); +bool hasGFX11Insts(const MCSubtargetInfo &STI); bool isNotGFX11Plus(const MCSubtargetInfo &STI); bool isGCN3Encoding(const MCSubtargetInfo &STI); bool isGFX10_AEncoding(const MCSubtargetInfo &STI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1643,7 +1643,7 @@ namespace SendMsg { static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { - return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; + return hasGFX11Insts(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; } int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) { @@ -1681,7 +1681,7 @@ if (MsgId == ID_SYSMSG) return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; - if (!isGFX11Plus(STI)) { + if (!hasGFX11Insts(STI)) { switch (MsgId) { case ID_GS_PreGFX11: return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; @@ -1705,7 +1705,7 @@ if (!Strict) return 0 <= StreamId && isUInt(StreamId); - if (!isGFX11Plus(STI)) { + if (!hasGFX11Insts(STI)) { switch (MsgId) { case ID_GS_PreGFX11: return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; @@ -1720,21 +1720,21 @@ bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { return MsgId == ID_SYSMSG || - (!isGFX11Plus(STI) && - (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); + (!hasGFX11Insts(STI) && + (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); } bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI) { - return !isGFX11Plus(STI) && - (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && - OpId != OP_GS_NOP; + return !hasGFX11Insts(STI) && + (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && + OpId != OP_GS_NOP; } void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI) { MsgId = Val & getMsgIdMask(STI); - if (isGFX11Plus(STI)) { + if (hasGFX11Insts(STI)) { OpId = 0; StreamId = 0; } else { @@ -1896,6 +1896,10 @@ return isGFX11(STI); } +bool hasGFX11Insts(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX11Insts]; +} + bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll @@ -1,8 +1,25 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji -mattr="+gfx11-insts" -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8,GFX8-SDAG %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji -mattr="+gfx11-insts" -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8,GFX8-GISEL %s ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL %s +; Note: GFX8 w/ +gfx11-insts is also tested as we want to support selecting those +; intrinsics as long as the attribute is present. GPU generation should +; not matter. + define amdgpu_kernel void @test_get_doorbell(i32 addrspace(1)* %out) { +; GFX8-LABEL: test_get_doorbell: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_DOORBELL) +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_store_b32 v[0:1], v2 +; GFX8-NEXT: s_endpgm +; ; GFX11-SDAG-LABEL: test_get_doorbell: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -28,6 +45,17 @@ } define amdgpu_kernel void @test_get_ddid(i32 addrspace(1)* %out) { +; GFX8-LABEL: test_get_ddid: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_DDID) +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_store_b32 v[0:1], v2 +; GFX8-NEXT: s_endpgm +; ; GFX11-SDAG-LABEL: test_get_ddid: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -53,6 +81,18 @@ } define amdgpu_kernel void @test_get_tma(i64 addrspace(1)* %out) { +; GFX8-SDAG-LABEL: test_get_tma: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-SDAG-NEXT: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_TMA) +; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-SDAG-NEXT: flat_store_b64 v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_endpgm +; ; GFX11-LABEL: test_get_tma: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -63,12 +103,36 @@ ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm +; +; GFX8-GISEL-LABEL: test_get_tma: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-GISEL-NEXT: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_TMA) +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-GISEL-NEXT: flat_store_b64 v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_endpgm %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 130) store i64 %ret, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @test_get_realtime(i64 addrspace(1)* %out) { +; GFX8-SDAG-LABEL: test_get_realtime: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-SDAG-NEXT: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_REALTIME) +; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-SDAG-NEXT: flat_store_b64 v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_endpgm +; ; GFX11-LABEL: test_get_realtime: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -79,12 +143,35 @@ ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm +; +; GFX8-GISEL-LABEL: test_get_realtime: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-GISEL-NEXT: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_REALTIME) +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-GISEL-NEXT: flat_store_b64 v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_endpgm %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 131) store i64 %ret, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @test_savewave(i32 addrspace(1)* %out) { +; GFX8-LABEL: test_savewave: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_SAVE_WAVE) +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_store_b32 v[0:1], v2 +; GFX8-NEXT: s_endpgm +; ; GFX11-SDAG-LABEL: test_savewave: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -110,6 +197,18 @@ } define amdgpu_kernel void @test_get_tba(i64 addrspace(1)* %out) { +; GFX8-SDAG-LABEL: test_get_tba: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-SDAG-NEXT: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_TBA) +; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-SDAG-NEXT: flat_store_b64 v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_endpgm +; ; GFX11-LABEL: test_get_tba: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -120,12 +219,35 @@ ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm +; +; GFX8-GISEL-LABEL: test_get_tba: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-GISEL-NEXT: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_TBA) +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-GISEL-NEXT: flat_store_b64 v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_endpgm %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 133) store i64 %ret, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @test_get_0_i32(i32 addrspace(1)* %out) { +; GFX8-LABEL: test_get_0_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(0, 0, 0) +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_store_b32 v[0:1], v2 +; GFX8-NEXT: s_endpgm +; ; GFX11-SDAG-LABEL: test_get_0_i32: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -151,6 +273,18 @@ } define amdgpu_kernel void @test_get_99999_i64(i64 addrspace(1)* %out) { +; GFX8-SDAG-LABEL: test_get_99999_i64: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-SDAG-NEXT: s_sendmsg_rtn_b64 s[2:3], 99999 +; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-SDAG-NEXT: flat_store_b64 v[0:1], v[2:3] +; GFX8-SDAG-NEXT: s_endpgm +; ; GFX11-LABEL: test_get_99999_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -161,6 +295,18 @@ ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm +; +; GFX8-GISEL-LABEL: test_get_99999_i64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX8-GISEL-NEXT: s_sendmsg_rtn_b64 s[2:3], 99999 +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-GISEL-NEXT: flat_store_b64 v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_endpgm %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 99999) store i64 %ret, i64 addrspace(1)* %out ret void