Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -72,11 +72,13 @@ bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; + bool selectG_ICMP(MachineInstr &I) const; bool hasVgprParts(ArrayRef AddrInfo) const; void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, SmallVectorImpl &AddrInfo) const; bool selectSMRD(MachineInstr &I, ArrayRef AddrInfo) const; bool selectG_LOAD(MachineInstr &I) const; + bool selectG_SELECT(MachineInstr &I) const; bool selectG_STORE(MachineInstr &I) const; InstructionSelector::ComplexRendererFns Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -59,11 +59,52 @@ const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } +static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) { + if (Reg == AMDGPU::SCC) + return true; + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return false; + + auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); + const TargetRegisterClass *RC = + RegClassOrBank.dyn_cast(); + if (RC) + return RC->getID() == AMDGPU::SReg_32_XM0RegClassID && + MRI.getType(Reg).getSizeInBits() == 1; + + const RegisterBank *RB = RegClassOrBank.get(); + return RB->getID() == AMDGPU::SCCRegBankID; +} + bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); I.setDesc(TII.get(TargetOpcode::COPY)); + + // Special case for COPY from the scc register bank. The scc register bank + // is modeled using 32-bit sgprs. + const MachineOperand &Src = I.getOperand(1); + unsigned SrcReg = Src.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) { + unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI); + unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI); + + // We have a copy from a 32-bit to 64-bit register. This happens + // when we are selecting scc->vcc copies. + if (DstSize == 64) { + const DebugLoc &DL = I.getDebugLoc(); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg()) + .addImm(0) + .addReg(SrcReg); + if (!MRI.getRegClassOrNull(SrcReg)) + MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); + I.eraseFromParent(); + return true; + } + } + for (const MachineOperand &MO : I.operands()) { if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) continue; @@ -262,6 +303,101 @@ return false; } +static unsigned getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) { + assert(Size == 32 || Size == 64); + switch (P) { + default: + llvm_unreachable("Unknown condition code!"); + case CmpInst::ICMP_NE: + return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64; + case CmpInst::ICMP_EQ: + return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64; + case CmpInst::ICMP_SGT: + return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64; + case CmpInst::ICMP_SGE: + return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64; + case CmpInst::ICMP_SLT: + return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64; + case CmpInst::ICMP_SLE: + return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64; + case CmpInst::ICMP_UGT: + return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64; + case CmpInst::ICMP_UGE: + return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64; + case CmpInst::ICMP_ULT: + return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64; + case CmpInst::ICMP_ULE: + return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64; + } +} + +static unsigned getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) { + // FIXME: VI supports 64-bit comparse. + assert(Size == 32); + switch (P) { + default: + llvm_unreachable("Unknown condition code!"); + case CmpInst::ICMP_NE: + return AMDGPU::S_CMP_LG_U32; + case CmpInst::ICMP_EQ: + return AMDGPU::S_CMP_EQ_U32; + case CmpInst::ICMP_SGT: + return AMDGPU::S_CMP_GT_I32; + case CmpInst::ICMP_SGE: + return AMDGPU::S_CMP_GE_I32; + case CmpInst::ICMP_SLT: + return AMDGPU::S_CMP_LT_I32; + case CmpInst::ICMP_SLE: + return AMDGPU::S_CMP_LE_I32; + case CmpInst::ICMP_UGT: + return AMDGPU::S_CMP_GT_U32; + case CmpInst::ICMP_UGE: + return AMDGPU::S_CMP_GE_U32; + case CmpInst::ICMP_ULT: + return AMDGPU::S_CMP_LT_U32; + case CmpInst::ICMP_ULE: + return AMDGPU::S_CMP_LE_U32; + } +} + +bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + DebugLoc DL = I.getDebugLoc(); + + unsigned SrcReg = I.getOperand(2).getReg(); + unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI); + // FIXME: VI supports 64-bit compares. + assert(Size == 32); + + unsigned CCReg = I.getOperand(0).getReg(); + if (isSCC(CCReg, MRI)) { + unsigned Opcode = getS_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size); + MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) + .add(I.getOperand(2)) + .add(I.getOperand(3)); + MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) + .addReg(AMDGPU::SCC); + bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) | + constrainSelectedInstRegOperands(*Copy, TII, TRI, RBI); + I.eraseFromParent(); + return Ret; + } + + assert(Size == 32 || Size == 64); + unsigned Opcode = getV_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size); + MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), + I.getOperand(0).getReg()) + .add(I.getOperand(2)) + .add(I.getOperand(3)); + RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), + AMDGPU::SReg_64RegClass, MRI); + bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); + I.eraseFromParent(); + return Ret; +} + static MachineInstr * buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, @@ -325,6 +461,53 @@ return false; } +bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const DebugLoc &DL = I.getDebugLoc(); + + unsigned DstReg = I.getOperand(0).getReg(); + unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); + assert(Size == 32 || Size == 64); + const MachineOperand &CCOp = I.getOperand(1); + unsigned CCReg = CCOp.getReg(); + if (isSCC(CCReg, MRI)) { + unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 : + AMDGPU::S_CSELECT_B64; + MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) + .addReg(CCReg); + + // The generic constrainSelectedInstRegOperands doesn't work for the scc register + // bank, because it does not cover the register class that we used to represent + // for it. So we need to manually set the register class here. + if (!MRI.getRegClassOrNull(CCReg)) + MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI)); + MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) + .add(I.getOperand(2)) + .add(I.getOperand(3)); + + bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) | + constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); + I.eraseFromParent(); + return Ret; + } + + assert(Size == 32); + // FIXME: Support 64-bit select + MachineInstr *Select = + BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) + .add(I.getOperand(3)) + .addImm(0) + .add(I.getOperand(2)) + .add(I.getOperand(1)); + + bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); + I.eraseFromParent(); + return Ret; +} + bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); @@ -573,10 +756,14 @@ return selectG_INTRINSIC(I, CoverageInfo); case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo); + case TargetOpcode::G_ICMP: + return selectG_ICMP(I); case TargetOpcode::G_LOAD: if (selectImpl(I, CoverageInfo)) return true; return selectG_LOAD(I); + case TargetOpcode::G_SELECT: + return selectG_SELECT(I); case TargetOpcode::G_STORE: return selectG_STORE(I); } Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1688,6 +1688,10 @@ Size = PowerOf2Ceil(Size); switch (Size) { + case 1: + if (RB->getID() == AMDGPU::SCCRegBankID) + return &AMDGPU::SReg_32_XM0RegClass; + break; case 32: return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass : &AMDGPU::SReg_32_XM0RegClass; @@ -1710,8 +1714,9 @@ return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass : &AMDGPU::SReg_512RegClass; default: - llvm_unreachable("not implemented"); + break; } + llvm_unreachable("not implemented"); } unsigned SIRegisterInfo::getVCC() const { Index: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -1,9 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN ---- | - define amdgpu_kernel void @copy(i32 addrspace(1)* %global0) {ret void} -... --- name: copy @@ -22,6 +19,60 @@ %0:sgpr(p1) = COPY $sgpr2_sgpr3 %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_IMPLICIT_DEF - G_STORE %2, %1 :: (store 4 into %ir.global0) + G_STORE %2, %1 :: (store 4, addrspace 1) +... +--- + +name: copy_vcc_scc +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc + ; GCN-LABEL: name: copy_vcc_scc + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc + ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s32) = COPY $vgpr3 + %3:scc(s1) = COPY $scc + %4:vcc(s1) = COPY %3 + %5:vgpr(s32) = G_SELECT %4, %1, %2 + G_STORE %5, %0 :: (store 4, addrspace 1) +... +--- + +name: copy_vcc_scc_2_uses +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc + ; GCN-LABEL: name: copy_vcc_scc_2_uses + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc + ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; GCN: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec + ; GCN: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec + ; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s32) = COPY $vgpr3 + %3:scc(s1) = COPY $scc + %4:vcc(s1) = COPY %3 + %5:vgpr(s32) = G_SELECT %4, %1, %2 + %6:vcc(s1) = COPY %3 + %7:vgpr(s32) = G_SELECT %6, %1, %5 + G_STORE %7, %0 :: (store 4, addrspace 1) ... --- Index: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir @@ -0,0 +1,309 @@ +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- +name: icmp_s_mix +legalized: true +regBankSelected: true + +# GCN: name: icmp_s_mix +# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 +# GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +# GCN: [[SGPR2:%[0-9]+]]:sreg_32 = COPY $sgpr2 +# GCN: [[SGPR3:%[0-9]+]]:sreg_32 = COPY $sgpr3 +# GCN: [[SGPR4:%[0-9]+]]:sreg_32 = COPY $sgpr4 +# GCN: [[SGPR5:%[0-9]+]]:sreg_32 = COPY $sgpr5 +# GCN: [[SGPR6:%[0-9]+]]:sreg_32 = COPY $sgpr6 +# GCN: [[SGPR7:%[0-9]+]]:sreg_32 = COPY $sgpr7 +# GCN: S_CMP_LG_U32 [[SGPR0]], [[SGPR1]], implicit-def $scc +# GCN-NEXT: [[COND0:%[0-9]+]]:sreg_32_xm0 = COPY $scc +# GCN: S_CMP_LG_U32 [[SGPR4]], [[SGPR5]], implicit-def $scc +# GCN-NEXT: [[COND1:%[0-9]+]]:sreg_32_xm0 = COPY $scc +# GCN: $scc = COPY [[COND0]] +# GCN-NEXT: S_CSELECT_B32 [[SGPR6]], [[SGPR7]], implicit $scc +# GCN: $scc = COPY [[COND1]] +# GCN-NEXT: S_CSELECT_B32 [[SGPR2]], [[SGPR3]], implicit $scc + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 + + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s32) = COPY $sgpr1 + %3:sgpr(s32) = COPY $sgpr2 + %4:sgpr(s32) = COPY $sgpr3 + %5:sgpr(s32) = COPY $sgpr4 + %6:sgpr(s32) = COPY $sgpr5 + %7:sgpr(s32) = COPY $sgpr6 + %8:sgpr(s32) = COPY $sgpr7 + %9:scc(s1) = G_ICMP intpred(ne), %1, %2 + %10:scc(s1) = G_ICMP intpred(ne), %5, %6 + %11:sgpr(s32) = G_SELECT %9, %7, %8 + %12:sgpr(s32) = G_SELECT %10, %3, %4 + %13:vgpr(s32) = COPY %11 + G_STORE %13, %0 :: (volatile store 4, addrspace 1) + %14:vgpr(s32) = COPY %12 + G_STORE %14, %0 :: (volatile store 4, addrspace 1) + +... +--- +name: icmp_salu +legalized: true +regBankSelected: true + +# GCN-LABEL: name: icmp_salu +# GCN: S_CMP_LG_U32 +# GCN: S_CMP_EQ_U32 +# GCN: S_CMP_GT_I32 +# GCN: S_CMP_GE_I32 +# GCN: S_CMP_LT_I32 +# GCN: S_CMP_LE_I32 +# GCN: S_CMP_GT_U32 +# GCN: S_CMP_GE_U32 +# GCN: S_CMP_LT_U32 +# GCN: S_CMP_LE_U32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s32) = COPY $sgpr1 + %3:sgpr(s32) = COPY $sgpr2 + %4:sgpr(s32) = COPY $sgpr3 + %5:scc(s1) = G_ICMP intpred(ne), %1, %2 + %6:scc(s1) = G_ICMP intpred(eq), %1, %2 + %7:scc(s1) = G_ICMP intpred(sgt), %1, %2 + %8:scc(s1) = G_ICMP intpred(sge), %1, %2 + %9:scc(s1) = G_ICMP intpred(slt), %1, %2 + %10:scc(s1) = G_ICMP intpred(sle), %1, %2 + %11:scc(s1) = G_ICMP intpred(ugt), %1, %2 + %12:scc(s1) = G_ICMP intpred(uge), %1, %2 + %13:scc(s1) = G_ICMP intpred(ult), %1, %2 + %14:scc(s1) = G_ICMP intpred(ule), %1, %2 + %15:sgpr(s32) = G_SELECT %5, %3, %4 + %16:sgpr(s32) = G_SELECT %6, %3, %4 + %17:sgpr(s32) = G_SELECT %7, %3, %4 + %18:sgpr(s32) = G_SELECT %8, %3, %4 + %19:sgpr(s32) = G_SELECT %9, %3, %4 + %20:sgpr(s32) = G_SELECT %10, %3, %4 + %21:sgpr(s32) = G_SELECT %11, %3, %4 + %22:sgpr(s32) = G_SELECT %12, %3, %4 + %23:sgpr(s32) = G_SELECT %13, %3, %4 + %24:sgpr(s32) = G_SELECT %14, %3, %4 + %25:vgpr(s32) = COPY %15 + G_STORE %25, %0 :: (volatile store 4, addrspace 1) + %26:vgpr(s32) = COPY %16 + G_STORE %26, %0 :: (volatile store 4, addrspace 1) + %27:vgpr(s32) = COPY %17 + G_STORE %27, %0 :: (volatile store 4, addrspace 1) + %28:vgpr(s32) = COPY %18 + G_STORE %28, %0 :: (volatile store 4, addrspace 1) + %29:vgpr(s32) = COPY %19 + G_STORE %29, %0 :: (volatile store 4, addrspace 1) + %30:vgpr(s32) = COPY %20 + G_STORE %30, %0 :: (volatile store 4, addrspace 1) + %31:vgpr(s32) = COPY %21 + G_STORE %31, %0 :: (volatile store 4, addrspace 1) + %32:vgpr(s32) = COPY %22 + G_STORE %32, %0 :: (volatile store 4, addrspace 1) + %33:vgpr(s32) = COPY %23 + G_STORE %33, %0 :: (volatile store 4, addrspace 1) + %34:vgpr(s32) = COPY %24 + G_STORE %34, %0 :: (volatile store 4, addrspace 1) + +... +--- +name: icmp_v_mix +legalized: true +regBankSelected: true + +# GCN-LABEL: name: icmp_v_mix +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +# GCN: [[VGPR4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 +# GCN: [[VGPR5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 +# GCN: [[VGPR6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 +# GCN: [[VGPR7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 +# GCN: [[VGPR8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 +# GCN: [[VGPR9:%[0-9]+]]:vgpr_32 = COPY $vgpr9 +# GCN: [[COND0:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]] +# GCN: [[COND1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[VGPR6]], [[VGPR7]] +# GCN: V_CNDMASK_B32_e64 0, [[VGPR9]], 0, [[VGPR8]], [[COND0]] +# GCN: V_CNDMASK_B32_e64 0, [[VGPR5]], 0, [[VGPR4]], [[COND1]] + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 + + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s32) = COPY $vgpr3 + %3:vgpr(s32) = COPY $vgpr4 + %4:vgpr(s32) = COPY $vgpr5 + %5:vgpr(s32) = COPY $vgpr6 + %6:vgpr(s32) = COPY $vgpr7 + %7:vgpr(s32) = COPY $vgpr8 + %8:vgpr(s32) = COPY $vgpr9 + %9:sgpr(s1) = G_ICMP intpred(ne), %1, %2 + %10:sgpr(s1) = G_ICMP intpred(ne), %5, %6 + %11:vgpr(s32) = G_SELECT %9, %7, %8 + %12:vgpr(s32) = G_SELECT %10, %3, %4 + G_STORE %11, %0 :: (volatile store 4, addrspace 1) + G_STORE %12, %0 :: (volatile store 4, addrspace 1) +... +--- +name: icmp_valu +legalized: true +regBankSelected: true + +# GCN-LABEL: name: icmp_valu +# GCN: V_CMP_NE_U32_e64 +# GCN: V_CMP_EQ_U32_e64 +# GCN: V_CMP_GT_I32_e64 +# GCN: V_CMP_GE_I32_e64 +# GCN: V_CMP_LT_I32_e64 +# GCN: V_CMP_LE_I32_e64 +# GCN: V_CMP_GT_U32_e64 +# GCN: V_CMP_GE_U32_e64 +# GCN: V_CMP_LT_U32_e64 +# GCN: V_CMP_LE_U32_e64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s32) = COPY $vgpr3 + %3:vgpr(s32) = COPY $vgpr4 + %4:vgpr(s32) = COPY $vgpr5 + %5:sgpr(s1) = G_ICMP intpred(ne), %1, %2 + %6:sgpr(s1) = G_ICMP intpred(eq), %1, %2 + %7:sgpr(s1) = G_ICMP intpred(sgt), %1, %2 + %8:sgpr(s1) = G_ICMP intpred(sge), %1, %2 + %9:sgpr(s1) = G_ICMP intpred(slt), %1, %2 + %10:sgpr(s1) = G_ICMP intpred(sle), %1, %2 + %11:sgpr(s1) = G_ICMP intpred(ugt), %1, %2 + %12:sgpr(s1) = G_ICMP intpred(uge), %1, %2 + %13:sgpr(s1) = G_ICMP intpred(ult), %1, %2 + %14:sgpr(s1) = G_ICMP intpred(ule), %1, %2 + %15:vgpr(s32) = G_SELECT %5, %3, %4 + %16:vgpr(s32) = G_SELECT %6, %3, %4 + %17:vgpr(s32) = G_SELECT %7, %3, %4 + %18:vgpr(s32) = G_SELECT %8, %3, %4 + %19:vgpr(s32) = G_SELECT %9, %3, %4 + %20:vgpr(s32) = G_SELECT %10, %3, %4 + %21:vgpr(s32) = G_SELECT %11, %3, %4 + %22:vgpr(s32) = G_SELECT %12, %3, %4 + %23:vgpr(s32) = G_SELECT %13, %3, %4 + %24:vgpr(s32) = G_SELECT %14, %3, %4 + G_STORE %15, %0 :: (volatile store 4, addrspace 1) + G_STORE %16, %0 :: (volatile store 4, addrspace 1) + G_STORE %17, %0 :: (volatile store 4, addrspace 1) + G_STORE %18, %0 :: (volatile store 4, addrspace 1) + G_STORE %19, %0 :: (volatile store 4, addrspace 1) + G_STORE %20, %0 :: (volatile store 4, addrspace 1) + G_STORE %21, %0 :: (volatile store 4, addrspace 1) + G_STORE %22, %0 :: (volatile store 4, addrspace 1) + G_STORE %23, %0 :: (volatile store 4, addrspace 1) + G_STORE %24, %0 :: (volatile store 4, addrspace 1) +... +--- + +name: icmp_vv +legalized: true +regBankSelected: true + +# GCN-LABEL: name: icmp_vv +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +# GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]] + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s32) = COPY $vgpr3 + %3:vgpr(s32) = COPY $vgpr4 + %4:vgpr(s32) = COPY $vgpr5 + %5:sgpr(s1) = G_ICMP intpred(ne), %1, %2 + %6:vgpr(s32) = G_SELECT %5, %3, %4 + G_STORE %6, %0 :: (store 4, addrspace 1) +... +--- + +name: icmp_vs +legalized: true +regBankSelected: true + +# GCN-LABEL: name: icmp_vs +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +# GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[SGPR0]] + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0 + + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s32) = COPY $vgpr3 + %3:vgpr(s32) = COPY $vgpr4 + %4:sgpr(s32) = COPY $sgpr0 + %5:sgpr(s1) = G_ICMP intpred(ne), %1, %4 + %6:vgpr(s32) = G_SELECT %5, %2, %3 + G_STORE %6, %0 :: (store 4, addrspace 1) +... +--- + +name: icmp_sv +legalized: true +regBankSelected: true + +# GCN-LABEL: name: icmp_sv +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +# GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]] + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0 + + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s32) = COPY $vgpr3 + %3:vgpr(s32) = COPY $vgpr4 + %4:sgpr(s32) = COPY $sgpr0 + %5:sgpr(s1) = G_ICMP intpred(ne), %4, %1 + %6:vgpr(s32) = G_SELECT %5, %2, %3 + G_STORE %6, %0 :: (store 4, addrspace 1) +... +--- + +name: icmp_or_vcc +legalized: true +regBankSelected: true + +# GCN-LABEL: name: icmp_or_vcc +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +# GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]] + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0 + + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s32) = COPY $vgpr3 + %3:vgpr(s32) = COPY $vgpr4 + %4:sgpr(s32) = COPY $sgpr0 + %5:sgpr(s1) = G_ICMP intpred(ne), %4, %1 + %6:vgpr(s32) = G_SELECT %5, %2, %3 + G_STORE %6, %0 :: (store 4, addrspace 1) +... +---