Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -384,17 +384,32 @@ FlatUsed = true; continue; + case AMDGPU::TBA: + case AMDGPU::TBA_LO: + case AMDGPU::TBA_HI: + case AMDGPU::TMA: + case AMDGPU::TMA_LO: + case AMDGPU::TMA_HI: + llvm_unreachable("Trap Handler registers should not be used"); + continue; + default: break; } if (AMDGPU::SReg_32RegClass.contains(reg)) { + if (AMDGPU::TTMP_32RegClass.contains(reg)) { + llvm_unreachable("Trap Handler registers should not be used"); + } isSGPR = true; width = 1; } else if (AMDGPU::VGPR_32RegClass.contains(reg)) { isSGPR = false; width = 1; } else if (AMDGPU::SReg_64RegClass.contains(reg)) { + if (AMDGPU::TTMP_64RegClass.contains(reg)) { + llvm_unreachable("Trap Handler registers should not be used"); + } isSGPR = true; width = 2; } else if (AMDGPU::VReg_64RegClass.contains(reg)) { Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -574,8 +574,10 @@ } -static int getRegClass(bool IsVgpr, unsigned RegWidth) { - if (IsVgpr) { +enum RegisterKind { IS_VGPR, IS_SGPR, IS_TTMP }; + +static int getRegClass(RegisterKind Is, unsigned RegWidth) { + if (Is == IS_VGPR) { switch (RegWidth) { default: return -1; case 1: return AMDGPU::VGPR_32RegClassID; @@ -585,16 +587,23 @@ case 8: return AMDGPU::VReg_256RegClassID; case 16: return AMDGPU::VReg_512RegClassID; } + } else if (Is == IS_TTMP) { + switch (RegWidth) { + default: return -1; + case 1: return AMDGPU::TTMP_32RegClassID; + case 2: return AMDGPU::TTMP_64RegClassID; + } + } else if (Is == IS_SGPR) { + switch (RegWidth) { + default: return -1; + case 1: return AMDGPU::SGPR_32RegClassID; + case 2: return AMDGPU::SGPR_64RegClassID; + case 4: return AMDGPU::SReg_128RegClassID; + case 8: return AMDGPU::SReg_256RegClassID; + case 16: return AMDGPU::SReg_512RegClassID; + } } - - switch (RegWidth) { - default: return -1; - case 1: return AMDGPU::SGPR_32RegClassID; - case 2: return AMDGPU::SGPR_64RegClassID; - case 4: return AMDGPU::SReg_128RegClassID; - case 8: return AMDGPU::SReg_256RegClassID; - case 16: return AMDGPU::SReg_512RegClassID; - } + return -1; } static unsigned getRegForName(StringRef RegName) { @@ -611,6 +620,10 @@ .Case("vcc_hi", AMDGPU::VCC_HI) .Case("exec_lo", AMDGPU::EXEC_LO) .Case("exec_hi", AMDGPU::EXEC_HI) + .Case("tma_lo", AMDGPU::TMA_LO) + .Case("tma_hi", AMDGPU::TMA_HI) + .Case("tba_lo", AMDGPU::TBA_LO) + .Case("tba_hi", AMDGPU::TBA_HI) .Default(0); } @@ -641,21 +654,21 @@ TRI, &getSTI(), false); } - // Match vgprs and sgprs - if (RegName[0] != 's' && RegName[0] != 'v') + // Match vgprs, sgprs and ttmps + if (RegName[0] != 's' && RegName[0] != 'v' && !RegName.startswith("ttmp")) return nullptr; - bool IsVgpr = RegName[0] == 'v'; + const RegisterKind Is = RegName[0] == 'v' ? IS_VGPR : RegName[0] == 's' ? IS_SGPR : IS_TTMP; unsigned RegWidth; unsigned RegIndexInClass; - if (RegName.size() > 1) { - // We have a 32-bit register + if (RegName.size() > (Is == IS_TTMP ? strlen("ttmp") : 1) ) { + // We have a single 32-bit register. Syntax: vXX RegWidth = 1; - if (RegName.substr(1).getAsInteger(10, RegIndexInClass)) + if (RegName.substr(Is == IS_TTMP ? strlen("ttmp") : 1).getAsInteger(10, RegIndexInClass)) return nullptr; Parser.Lex(); } else { - // We have a register greater than 32-bits. + // We have a register greater than 32-bits (a range of single registers). Syntax: v[XX:YY] int64_t RegLo, RegHi; Parser.Lex(); @@ -678,11 +691,11 @@ Parser.Lex(); RegWidth = (RegHi - RegLo) + 1; - if (IsVgpr) { + if (Is == IS_VGPR) { // VGPR registers aren't aligned. RegIndexInClass = RegLo; } else { - // SGPR registers are aligned. Max alignment is 4 dwords. + // SGPR and TTMP registers must be are aligned. Max required alignment is 4 dwords. unsigned Size = std::min(RegWidth, 4u); if (RegLo % Size != 0) return nullptr; @@ -691,7 +704,7 @@ } } - int RCID = getRegClass(IsVgpr, RegWidth); + int RCID = getRegClass(Is, RegWidth); if (RCID == -1) return nullptr; Index: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -18,6 +18,8 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include + using namespace llvm; void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, @@ -189,6 +191,18 @@ case AMDGPU::VCC_HI: O << "vcc_hi"; return; + case AMDGPU::TBA_LO: + O << "tba_lo"; + return; + case AMDGPU::TBA_HI: + O << "tba_hi"; + return; + case AMDGPU::TMA_LO: + O << "tma_lo"; + return; + case AMDGPU::TMA_HI: + O << "tma_hi"; + return; case AMDGPU::EXEC_LO: O << "exec_lo"; return; @@ -205,41 +219,44 @@ break; } - char Type; + std::string Type; unsigned NumRegs; if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(reg)) { - Type = 'v'; + Type = "v"; NumRegs = 1; } else if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(reg)) { - Type = 's'; + Type = "s"; NumRegs = 1; } else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(reg)) { - Type = 'v'; + Type = "v"; + NumRegs = 2; + } else if (MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(reg)) { + Type = "s"; NumRegs = 2; - } else if (MRI.getRegClass(AMDGPU::SReg_64RegClassID).contains(reg)) { - Type = 's'; + } else if (MRI.getRegClass(AMDGPU::TTMP_64RegClassID).contains(reg)) { + Type = "ttmp"; NumRegs = 2; } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) { - Type = 'v'; + Type = "v"; NumRegs = 4; } else if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) { - Type = 's'; + Type = "s"; NumRegs = 4; } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) { - Type = 'v'; + Type = "v"; NumRegs = 3; } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(reg)) { - Type = 'v'; + Type = "v"; NumRegs = 8; } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(reg)) { - Type = 's'; + Type = "s"; NumRegs = 8; } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(reg)) { - Type = 'v'; + Type = "v"; NumRegs = 16; } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(reg)) { - Type = 's'; + Type = "s"; NumRegs = 16; } else { O << getRegisterName(reg); @@ -249,6 +266,8 @@ // The low 8 bits of the encoding value is the register index, for both VGPRs // and SGPRs. unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1); + if (Type == "ttmp") + RegIdx -= 112; // Trap temps start at offset 112. TODO: Get this from tablegen. if (NumRegs == 1) { O << Type << RegIdx; return; Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -114,6 +114,16 @@ reserveRegisterTuples(Reserved, AMDGPU::EXEC); reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); + // Reserve Trap Handler registers - support is not implemented in Codegen. + reserveRegisterTuples(Reserved, AMDGPU::TBA); + reserveRegisterTuples(Reserved, AMDGPU::TMA); + reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1); + reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3); + reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5); + reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7); + reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); + reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); + // Reserve the last 2 registers so we will always have at least 2 more that // will physically contain VCC. reserveRegisterTuples(Reserved, AMDGPU::SGPR102_SGPR103); @@ -640,7 +650,21 @@ switch(Channel) { case 0: return AMDGPU::VCC_LO; case 1: return AMDGPU::VCC_HI; - default: llvm_unreachable("Invalid SubIdx for VCC"); + default: llvm_unreachable("Invalid SubIdx for VCC"); break; + } + + case AMDGPU::TBA: + switch(Channel) { + case 0: return AMDGPU::TBA_LO; + case 1: return AMDGPU::TBA_HI; + default: llvm_unreachable("Invalid SubIdx for TBA"); break; + } + + case AMDGPU::TMA: + switch(Channel) { + case 0: return AMDGPU::TMA_LO; + case 1: return AMDGPU::TMA_HI; + default: llvm_unreachable("Invalid SubIdx for TMA"); break; } case AMDGPU::FLAT_SCR: Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td @@ -44,6 +44,40 @@ def SCC : SIReg<"scc", 253>; def M0 : SIReg <"m0", 124>; +// Trap handler registers +def TBA_LO : SIReg<"tba_lo", 108>; +def TBA_HI : SIReg<"tba_hi", 109>; + +def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, + DwarfRegAlias { + let Namespace = "AMDGPU"; + let SubRegIndices = [sub0, sub1]; + let HWEncoding = 108; +} + +def TMA_LO : SIReg<"tma_lo", 110>; +def TMA_HI : SIReg<"tma_hi", 111>; + +def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, + DwarfRegAlias { + let Namespace = "AMDGPU"; + let SubRegIndices = [sub0, sub1]; + let HWEncoding = 110; +} + +def TTMP0 : SIReg <"ttmp0", 112>; +def TTMP1 : SIReg <"ttmp1", 113>; +def TTMP2 : SIReg <"ttmp2", 114>; +def TTMP3 : SIReg <"ttmp3", 115>; +def TTMP4 : SIReg <"ttmp4", 116>; +def TTMP5 : SIReg <"ttmp5", 117>; +def TTMP6 : SIReg <"ttmp6", 118>; +def TTMP7 : SIReg <"ttmp7", 119>; +def TTMP8 : SIReg <"ttmp8", 120>; +def TTMP9 : SIReg <"ttmp9", 121>; +def TTMP10 : SIReg <"ttmp10", 122>; +def TTMP11 : SIReg <"ttmp11", 123>; + multiclass FLAT_SCR_LOHI_m ci_e, bits<16> vi_e> { def _ci : SIReg; def _vi : SIReg; @@ -135,6 +169,24 @@ (add (decimate (shl SGPR_32, 14), 4)), (add (decimate (shl SGPR_32, 15), 4))]>; +// Trap handler TMP 32-bit registers +def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32, + (add (sequence "TTMP%u", 0, 11))> { + let isAllocatable = 0; +} + +// Trap handler TMP 64-bit registers +def TTMP_64Regs : RegisterTuples<[sub0, sub1], + [(add (decimate TTMP_32, 2)), + (add (decimate (shl TTMP_32, 1), 2))]>; + +// Trap handler TMP 128-bit registers +def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], + [(add (decimate TTMP_32, 4)), + (add (decimate (shl TTMP_32, 1), 4)), + (add (decimate (shl TTMP_32, 2), 4)), + (add (decimate (shl TTMP_32, 3), 4))]>; + // VGPR 32-bit registers def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add (sequence "VGPR%u", 0, 255))>; @@ -199,13 +251,18 @@ // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32, - (add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI) + (add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI, + TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI) >; def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>; +def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> { + let isAllocatable = 0; +} + def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, - (add SGPR_64, VCC, EXEC, FLAT_SCR) + (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64) >; def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> { Index: llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll +++ llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -201,22 +201,14 @@ ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16: -; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} -; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} -; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} -; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} -; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}} -; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}} -; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}} -; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}} +; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} +; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} +; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} +; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} +; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}} +; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}} +; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}} +; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}} ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} Index: llvm/trunk/test/MC/AMDGPU/trap.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/trap.s +++ llvm/trunk/test/MC/AMDGPU/trap.s @@ -0,0 +1,99 @@ +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=VI + +//===----------------------------------------------------------------------===// +// Trap Handler related - 32 bit registers +//===----------------------------------------------------------------------===// + +s_add_u32 ttmp0, ttmp0, 4 +// SICI: s_add_u32 ttmp0, ttmp0, 4 ; encoding: [0x70,0x84,0x70,0x80] +// VI: s_add_u32 ttmp0, ttmp0, 4 ; encoding: [0x70,0x84,0x70,0x80] + +s_add_u32 ttmp4, 8, ttmp4 +// SICI: s_add_u32 ttmp4, 8, ttmp4 ; encoding: [0x88,0x74,0x74,0x80] +// VI: s_add_u32 ttmp4, 8, ttmp4 ; encoding: [0x88,0x74,0x74,0x80] + +s_add_u32 ttmp4, ttmp4, 0x00000100 +// SICI: s_add_u32 ttmp4, ttmp4, 0x100 ; encoding: [0x74,0xff,0x74,0x80,0x00,0x01,0x00,0x00] +// VI: s_add_u32 ttmp4, ttmp4, 0x100 ; encoding: [0x74,0xff,0x74,0x80,0x00,0x01,0x00,0x00] + +s_add_u32 ttmp4, ttmp4, 4 +// SICI: s_add_u32 ttmp4, ttmp4, 4 ; encoding: [0x74,0x84,0x74,0x80] +// VI: s_add_u32 ttmp4, ttmp4, 4 ; encoding: [0x74,0x84,0x74,0x80] + +s_add_u32 ttmp4, ttmp8, ttmp4 +// SICI: s_add_u32 ttmp4, ttmp8, ttmp4 ; encoding: [0x78,0x74,0x74,0x80] +// VI: s_add_u32 ttmp4, ttmp8, ttmp4 ; encoding: [0x78,0x74,0x74,0x80] + +s_and_b32 ttmp10, ttmp8, 0x00000080 +// SICI: s_and_b32 ttmp10, ttmp8, 0x80 ; encoding: [0x78,0xff,0x7a,0x87,0x80,0x00,0x00,0x00] +// VI: s_and_b32 ttmp10, ttmp8, 0x80 ; encoding: [0x78,0xff,0x7a,0x86,0x80,0x00,0x00,0x00] + +s_and_b32 ttmp9, tma_hi, 0x0000ffff +// SICI: s_and_b32 ttmp9, tma_hi, 0xffff ; encoding: [0x6f,0xff,0x79,0x87,0xff,0xff,0x00,0x00] +// VI: s_and_b32 ttmp9, tma_hi, 0xffff ; encoding: [0x6f,0xff,0x79,0x86,0xff,0xff,0x00,0x00] + +s_and_b32 ttmp9, ttmp9, 0x000001ff +// SICI: s_and_b32 ttmp9, ttmp9, 0x1ff ; encoding: [0x79,0xff,0x79,0x87,0xff,0x01,0x00,0x00] +// VI: s_and_b32 ttmp9, ttmp9, 0x1ff ; encoding: [0x79,0xff,0x79,0x86,0xff,0x01,0x00,0x00] + +s_and_b32 ttmp9, tma_lo, 0xffff0000 +// SICI: s_and_b32 ttmp9, tma_lo, 0xffff0000 ; encoding: [0x6e,0xff,0x79,0x87,0x00,0x00,0xff,0xff] +// VI: s_and_b32 ttmp9, tma_lo, 0xffff0000 ; encoding: [0x6e,0xff,0x79,0x86,0x00,0x00,0xff,0xff] + +s_and_b32 ttmp9, ttmp9, ttmp8 +// SICI: s_and_b32 ttmp9, ttmp9, ttmp8 ; encoding: [0x79,0x78,0x79,0x87] +// VI: s_and_b32 ttmp9, ttmp9, ttmp8 ; encoding: [0x79,0x78,0x79,0x86] + +s_and_b32 ttmp8, ttmp1, 0x01000000 +// SICI: s_and_b32 ttmp8, ttmp1, 0x1000000 ; encoding: [0x71,0xff,0x78,0x87,0x00,0x00,0x00,0x01] +// VI: s_and_b32 ttmp8, ttmp1, 0x1000000 ; encoding: [0x71,0xff,0x78,0x86,0x00,0x00,0x00,0x01] + +s_cmp_eq_i32 ttmp8, 0 +// SICI: s_cmp_eq_i32 ttmp8, 0 ; encoding: [0x78,0x80,0x00,0xbf] +// VI: s_cmp_eq_i32 ttmp8, 0 ; encoding: [0x78,0x80,0x00,0xbf] + +s_cmp_eq_i32 ttmp8, 0x000000fe +// SICI: s_cmp_eq_i32 ttmp8, 0xfe ; encoding: [0x78,0xff,0x00,0xbf,0xfe,0x00,0x00,0x00] +// VI: s_cmp_eq_i32 ttmp8, 0xfe ; encoding: [0x78,0xff,0x00,0xbf,0xfe,0x00,0x00,0x00] + +s_lshr_b32 ttmp8, ttmp8, 12 +// SICI: s_lshr_b32 ttmp8, ttmp8, 12 ; encoding: [0x78,0x8c,0x78,0x90] +// VI: s_lshr_b32 ttmp8, ttmp8, 12 ; encoding: [0x78,0x8c,0x78,0x8f] + +s_mov_b32 m0, ttmp8 +// SICI: s_mov_b32 m0, ttmp8 ; encoding: [0x78,0x03,0xfc,0xbe] +// VI: s_mov_b32 m0, ttmp8 ; encoding: [0x78,0x00,0xfc,0xbe] + +s_mov_b32 ttmp10, 0 +// SICI: s_mov_b32 ttmp10, 0 ; encoding: [0x80,0x03,0xfa,0xbe] +// VI: s_mov_b32 ttmp10, 0 ; encoding: [0x80,0x00,0xfa,0xbe] + +s_mov_b32 ttmp11, 0x01024fac +// SICI: s_mov_b32 ttmp11, 0x1024fac ; encoding: [0xff,0x03,0xfb,0xbe,0xac,0x4f,0x02,0x01] +// VI: s_mov_b32 ttmp11, 0x1024fac ; encoding: [0xff,0x00,0xfb,0xbe,0xac,0x4f,0x02,0x01] + +s_mov_b32 ttmp8, m0 +// SICI: s_mov_b32 ttmp8, m0 ; encoding: [0x7c,0x03,0xf8,0xbe] +// VI: s_mov_b32 ttmp8, m0 ; encoding: [0x7c,0x00,0xf8,0xbe] + +s_mov_b32 ttmp8, tma_lo +// SICI: s_mov_b32 ttmp8, tma_lo ; encoding: [0x6e,0x03,0xf8,0xbe] +// VI: s_mov_b32 ttmp8, tma_lo ; encoding: [0x6e,0x00,0xf8,0xbe] + +s_mul_i32 ttmp8, 0x00000324, ttmp8 +// SICI: s_mul_i32 ttmp8, 0x324, ttmp8 ; encoding: [0xff,0x78,0x78,0x93,0x24,0x03,0x00,0x00] +// VI: s_mul_i32 ttmp8, 0x324, ttmp8 ; encoding: [0xff,0x78,0x78,0x92,0x24,0x03,0x00,0x00] + +s_or_b32 ttmp9, ttmp9, 0x00280000 +// SICI: s_or_b32 ttmp9, ttmp9, 0x280000 ; encoding: [0x79,0xff,0x79,0x88,0x00,0x00,0x28,0x00] +// VI: s_or_b32 ttmp9, ttmp9, 0x280000 ; encoding: [0x79,0xff,0x79,0x87,0x00,0x00,0x28,0x00] + +//===----------------------------------------------------------------------===// +// Trap Handler related - Pairs and quadruples of registers +//===----------------------------------------------------------------------===// + +s_mov_b64 ttmp[4:5], exec +// SICI: s_mov_b64 ttmp[4:5], exec ; encoding: [0x7e,0x04,0xf4,0xbe] +// VI: s_mov_b64 ttmp[4:5], exec ; encoding: [0x7e,0x01,0xf4,0xbe]