Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -43,7 +43,8 @@ SGPRSpill = 1 << 24, VOPAsmPrefer32Bit = 1 << 25, Gather4 = 1 << 26, - DisableWQM = 1 << 27 + DisableWQM = 1 << 27, + SOPK_ZEXT = 1 << 28 }; } Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -56,6 +56,10 @@ // Whether WQM _must_ be disabled for this instruction. field bits<1> DisableWQM = 0; + // Most sopk treat the immediate as a signed 16-bit, however some + // use it as unsigned. + field bits<1> SOPKZext = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = VM_CNT; let TSFlags{1} = EXP_CNT; @@ -89,6 +93,7 @@ let TSFlags{25} = VOPAsmPrefer32Bit; let TSFlags{26} = Gather4; let TSFlags{27} = DisableWQM; + let TSFlags{28} = SOPKZext; let SchedRW = [Write32Bit]; Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -391,6 +391,14 @@ return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; } + static bool sopkIsZext(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; + } + + bool sopkIsZext(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; + } + bool isVGPRCopy(const MachineInstr &MI) const { assert(MI.isCopy()); unsigned Dest = MI.getOperand(0).getReg(); @@ -603,6 +611,9 @@ LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode); + LLVM_READONLY + int getSOPKOp(uint16_t Opcode); + const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1812,6 +1812,21 @@ } } + if (isSOPK(MI)) { + int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm(); + if (sopkIsZext(MI)) { + if (!isUInt<16>(Imm)) { + ErrInfo = "invalid immediate for SOPK instruction"; + return false; + } + } else { + if (!isInt<16>(Imm)) { + ErrInfo = "invalid immediate for SOPK instruction"; + return false; + } + } + } + if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 || Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 || Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -1193,7 +1193,7 @@ string OpName = opName; } -class VOP2_REV { +class Commutable_REV { string RevOp = revOp; bit IsOrig = isOrig; } @@ -1325,7 +1325,7 @@ string revOp> { def "" : VOP2_Pseudo , - VOP2_REV; + Commutable_REV; def _si : VOP2_Real_si ; } @@ -1334,7 +1334,7 @@ string revOp> { def "" : VOP2_Pseudo , - VOP2_REV; + Commutable_REV; def _si : VOP2_Real_si ; @@ -1523,7 +1523,7 @@ bit HasMods = 1> { def "" : VOP3_Pseudo , - VOP2_REV; + Commutable_REV; def _si : VOP3_Real_si , VOP3DisableFields<1, 0, HasMods>; @@ -1537,7 +1537,7 @@ bit HasMods = 1> { def "" : VOP3_Pseudo , - VOP2_REV; + Commutable_REV; def _si : VOP3_Real_si , VOP3DisableFields<1, 0, HasMods>; @@ -1578,7 +1578,7 @@ string revOp, list sched> { def "" : VOP3_Pseudo , - VOP2_REV { + Commutable_REV { let Defs = !if(defExec, [EXEC], []); let SchedRW = sched; } @@ -1829,7 +1829,7 @@ string revOpName = "", string asm = opName#"_e32 "#op_asm, string alias_asm = opName#" "#op_asm> { def "" : VOPC_Pseudo , - VOP2_REV { + Commutable_REV { let Defs = !if(DefExec, [VCC, EXEC], [VCC]); let SchedRW = sched; let isConvergent = DefExec; @@ -2106,7 +2106,7 @@ // Maps an commuted opcode to its original version def getCommuteOrig : InstrMapping { - let FilterClass = "VOP2_REV"; + let FilterClass = "Commutable_REV"; let RowFields = ["RevOp"]; let ColFields = ["IsOrig"]; let KeyCol = ["0"]; @@ -2115,31 +2115,13 @@ // Maps an original opcode to its commuted version def getCommuteRev : InstrMapping { - let FilterClass = "VOP2_REV"; + let FilterClass = "Commutable_REV"; let RowFields = ["RevOp"]; let ColFields = ["IsOrig"]; let KeyCol = ["1"]; let ValueCols = [["0"]]; } -def getCommuteCmpOrig : InstrMapping { - let FilterClass = "VOP2_REV"; - let RowFields = ["RevOp"]; - let ColFields = ["IsOrig"]; - let KeyCol = ["0"]; - let ValueCols = [["1"]]; -} - -// Maps an original opcode to its commuted version -def getCommuteCmpRev : InstrMapping { - let FilterClass = "VOP2_REV"; - let RowFields = ["RevOp"]; - let ColFields = ["IsOrig"]; - let KeyCol = ["1"]; - let ValueCols = [["0"]]; -} - - def getMCOpcodeGen : InstrMapping { let FilterClass = "SIMCInstr"; let RowFields = ["PseudoInstr"]; @@ -2149,6 +2131,15 @@ [!cast(SIEncodingFamily.VI)]]; } +// Get equivalent SOPK instruction. +def getSOPKOp : InstrMapping { + let FilterClass = "SOPKInstTable"; + let RowFields = ["BaseCmpOp"]; + let ColFields = ["IsSOPK"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + def getAddr64Inst : InstrMapping { let FilterClass = "MUBUFAddr64Table"; let RowFields = ["OpName"]; Index: lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -188,6 +188,26 @@ return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4); } +static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) { + return isUInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4); +} + +static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, + const MachineOperand &Src, + bool &IsUnsigned) { + if (isInt<16>(Src.getImm())) { + IsUnsigned = false; + return !TII->isInlineConstant(Src, 4); + } + + if (isUInt<16>(Src.getImm())) { + IsUnsigned = true; + return !TII->isInlineConstant(Src, 4); + } + + return false; +} + /// Copy implicit register operands from specified instruction to this /// instruction that are not part of the instruction definition. static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, @@ -202,6 +222,44 @@ } } +static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) { + // cmpk instructions do scc = dst imm16, so commute the instruction to + // get constants on the RHS. + if (!MI.getOperand(0).isReg()) + TII->commuteInstruction(MI, false, 0, 1); + + const MachineOperand &Src1 = MI.getOperand(1); + if (!Src1.isImm()) + return; + + int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode()); + if (SOPKOpc == -1) + return; + + // eq/ne is special because the imm16 can be treated as signed or unsigned, + // and initially selectd to the signed versions. + if (SOPKOpc == AMDGPU::S_CMPK_EQ_I32 || SOPKOpc == AMDGPU::S_CMPK_LG_I32) { + bool HasUImm; + if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) { + if (HasUImm) { + SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_I32) ? + AMDGPU::S_CMPK_EQ_U32 : AMDGPU::S_CMPK_LG_U32; + } + + MI.setDesc(TII->get(SOPKOpc)); + } + + return; + } + + const MCInstrDesc &NewDesc = TII->get(SOPKOpc); + + if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) || + (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) { + MI.setDesc(NewDesc); + } +} + bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -310,6 +368,12 @@ } } + // Try to use s_cmpk_* + if (MI.isCompare() && TII->isSOPC(MI)) { + shrinkScalarCompare(TII, MI); + continue; + } + // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. if (MI.getOpcode() == AMDGPU::S_MOV_B32) { const MachineOperand &Src = MI.getOperand(1); Index: lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- lib/Target/AMDGPU/SOPInstructions.td +++ lib/Target/AMDGPU/SOPInstructions.td @@ -473,6 +473,11 @@ let Inst{63-32} = imm; } +class SOPKInstTable { + bit IsSOPK = is_sopk; + string BaseCmpOp = cmpOp; +} + class SOPK_32 pattern=[]> : SOPK_Pseudo < opName, (outs SReg_32:$sdst), @@ -480,12 +485,12 @@ "$sdst, $simm16", pattern>; -class SOPK_SCC pattern=[]> : SOPK_Pseudo < +class SOPK_SCC : SOPK_Pseudo < opName, (outs), (ins SReg_32:$sdst, u16imm:$simm16), - "$sdst, $simm16", - pattern> { + "$sdst, $simm16", []>, + SOPKInstTable<1, base_op>{ let Defs = [SCC]; } @@ -521,18 +526,21 @@ // [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))] // >; -def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32">; -def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32">; -def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32">; -def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32">; -def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32">; -def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32">; -def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32">; -def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32">; -def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32">; -def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32">; -def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32">; -def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32">; +def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32">; +def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32">; +def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32">; +def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32">; +def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32">; +def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32">; + +let SOPKZext = 1 in { +def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32">; +def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32">; +def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32">; +def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32">; +def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32">; +def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32">; +} // End SOPKZext = 1 } // End isCompare = 1 let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0", @@ -613,8 +621,14 @@ [(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > { } -class SOPC_CMP_32 op, string opName, PatLeaf cond = COND_NULL> - : SOPC_Helper; +class SOPC_CMP_32 op, string opName, + PatLeaf cond = COND_NULL, string revOp = opName> + : SOPC_Helper, + Commutable_REV, + SOPKInstTable<0, opName> { + let isCompare = 1; + let isCommutable = 1; +} class SOPC_32 op, string opName, list pattern = []> : SOPC_Base; @@ -622,19 +636,19 @@ class SOPC_64_32 op, string opName, list pattern = []> : SOPC_Base; - def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32", COND_EQ>; def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32", COND_NE>; def S_CMP_GT_I32 : SOPC_CMP_32 <0x02, "s_cmp_gt_i32", COND_SGT>; def S_CMP_GE_I32 : SOPC_CMP_32 <0x03, "s_cmp_ge_i32", COND_SGE>; -def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT>; -def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE>; +def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">; +def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE, "s_cmp_ge_i32">; def S_CMP_EQ_U32 : SOPC_CMP_32 <0x06, "s_cmp_eq_u32", COND_EQ>; -def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE >; +def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE>; def S_CMP_GT_U32 : SOPC_CMP_32 <0x08, "s_cmp_gt_u32", COND_UGT>; def S_CMP_GE_U32 : SOPC_CMP_32 <0x09, "s_cmp_ge_u32", COND_UGE>; -def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT>; -def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE>; +def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT, "s_cmp_gt_u32">; +def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE, "s_cmp_ge_u32">; + def S_BITCMP0_B32 : SOPC_32 <0x0c, "s_bitcmp0_b32">; def S_BITCMP1_B32 : SOPC_32 <0x0d, "s_bitcmp1_b32">; def S_BITCMP0_B64 : SOPC_64_32 <0x0e, "s_bitcmp0_b64">; Index: test/CodeGen/AMDGPU/si-annotate-cf.ll =================================================================== --- test/CodeGen/AMDGPU/si-annotate-cf.ll +++ test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -92,7 +92,7 @@ ; SI: s_cmp_gt_i32 ; SI-NEXT: s_cbranch_scc0 [[ENDPGM:BB[0-9]+_[0-9]+]] -; SI: s_cmp_gt_i32 +; SI: s_cmpk_gt_i32 ; SI-NEXT: s_cbranch_scc1 [[ENDPGM]] ; SI: [[INFLOOP:BB[0-9]+_[0-9]+]] Index: test/CodeGen/AMDGPU/sopk-compares.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/sopk-compares.ll @@ -0,0 +1,573 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; Since this intrinsic is exposed as a constant after isel, use it to +; defeat the DAG's compare with constant canonicalizations. +declare i32 @llvm.amdgcn.groupstaticsize() #1 + +@lds = addrspace(3) global [512 x i32] undef, align 4 + +; GCN-LABEL: {{^}}br_scc_eq_i32_inline_imm: +; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 4{{$}} +define void @br_scc_eq_i32_inline_imm(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 4 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max: +; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x7fff{{$}} +define void @br_scc_eq_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 32767 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max_p1: +; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0x8000{{$}} +define void @br_scc_eq_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 32768 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ne_i32_simm16_max_p1: +; GCN: s_cmpk_lg_u32 s{{[0-9]+}}, 0x8000{{$}} +define void @br_scc_ne_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ne i32 %cond, 32768 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min: +; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x8000{{$}} +define void @br_scc_eq_i32_simm16_min(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, -32768 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min_m1: +; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0xffff7fff{{$}} +define void @br_scc_eq_i32_simm16_min_m1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, -32769 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_uimm15_max: +; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}} +define void @br_scc_eq_i32_uimm15_max(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 65535 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max: +; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}} +define void @br_scc_eq_i32_uimm16_max(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 65535 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max_p1: +; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0x10000{{$}} +define void @br_scc_eq_i32_uimm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 65536 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + + +; GCN-LABEL: {{^}}br_scc_eq_i32: +; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x41{{$}} +define void @br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 65 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ne_i32: +; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x41{{$}} +define void @br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ne i32 %cond, 65 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_sgt_i32: +; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x41{{$}} +define void @br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp sgt i32 %cond, 65 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max: +; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x7fff{{$}} +define void @br_scc_sgt_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp sgt i32 %cond, 32767 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max_p1: +; GCN: s_cmp_gt_i32 s{{[0-9]+}}, 0x8000{{$}} +define void @br_scc_sgt_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp sgt i32 %cond, 32768 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_sge_i32: +; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}} +define void @br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp sge i32 %cond, %size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_slt_i32: +; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x41{{$}} +define void @br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp slt i32 %cond, 65 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_sle_i32: +; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}} +define void @br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp sle i32 %cond, %size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ugt_i32: +; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}} +define void @br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ugt i32 %cond, %size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_uge_i32: +; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}} +define void @br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp uge i32 %cond, %size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ult_i32: +; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x41{{$}} +define void @br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ult i32 %cond, 65 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16: +; GCN: s_cmp_lt_u32 s2, 0xffff8000 +define void @br_scc_ult_i32_min_simm16(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ult i32 %cond, -32768 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16_m1: +; GCN: s_cmp_lt_u32 s{{[0-9]+}}, 0xffff7fff{{$}} +define void @br_scc_ult_i32_min_simm16_m1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ult i32 %cond, -32769 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ule_i32: +; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}} +define void @br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ule i32 %cond, %size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_eq_i32: +; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp eq i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_ne_i32: +; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ne i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_sgt_i32: +; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp sgt i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_sge_i32: +; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp sge i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_slt_i32: +; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp slt i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_sle_i32: +; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp sle i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_ugt_i32: +; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ugt i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_uge_i32: +; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp uge i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_ult_i32: +; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ult i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_ule_i32: +; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ule i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ult_i32_non_u16: +; GCN: s_cmp_lt_u32 s2, 0xfffff7ff +define void @br_scc_ult_i32_non_u16(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %not.size = xor i32 %size, -1 + %cmp0 = icmp ult i32 %cond, %not.size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }