Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -171,19 +171,6 @@ if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI)) return false; - // Don't constrain the source register to a class so the def instruction - // handles it (unless it's undef). - // - // FIXME: This is a hack. When selecting the def, we neeed to know - // specifically know that the result is VCCRegBank, and not just an SGPR - // with size 1. An SReg_32 with size 1 is ambiguous with wave32. - if (Src.isUndef()) { - const TargetRegisterClass *SrcRC = - TRI.getConstrainedRegClassForOperand(Src, *MRI); - if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) - return false; - } - return true; } @@ -287,50 +274,24 @@ } bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { - MachineOperand &Dst = I.getOperand(0); - MachineOperand &Src0 = I.getOperand(1); - MachineOperand &Src1 = I.getOperand(2); - Register DstReg = Dst.getReg(); + Register DstReg = I.getOperand(0).getReg(); unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); - if (DstRB->getID() == AMDGPU::VCCRegBankID) { - const TargetRegisterClass *RC = TRI.getBoolRC(); - unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), - RC == &AMDGPU::SReg_64RegClass); - I.setDesc(TII.get(InstOpc)); - // Dead implicit-def of scc - I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef - true, // isImp - false, // isKill - true)); // isDead - - // FIXME: Hack to avoid turning the register bank into a register class. - // The selector for G_ICMP relies on seeing the register bank for the result - // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will - // be ambiguous whether it's a scalar or vector bool. - if (Src0.isUndef() && !MRI->getRegClassOrNull(Src0.getReg())) - MRI->setRegClass(Src0.getReg(), RC); - if (Src1.isUndef() && !MRI->getRegClassOrNull(Src1.getReg())) - MRI->setRegClass(Src1.getReg(), RC); - - return RBI.constrainGenericRegister(DstReg, *RC, *MRI); - } - - // TODO: Should this allow an SCC bank result, and produce a copy from SCC for - // the result? - if (DstRB->getID() == AMDGPU::SGPRRegBankID) { - unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32); - I.setDesc(TII.get(InstOpc)); - // Dead implicit-def of scc - I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef - true, // isImp - false, // isKill - true)); // isDead - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); - } + if (DstRB->getID() != AMDGPU::SGPRRegBankID && + DstRB->getID() != AMDGPU::VCCRegBankID) + return false; - return false; + bool Is64 = Size > 32 || (DstRB->getID() == AMDGPU::VCCRegBankID && + STI.isWave64()); + I.setDesc(TII.get(getLogicalBitOpcode(I.getOpcode(), Is64))); + + // Dead implicit-def of scc + I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef + true, // isImp + false, // isKill + true)); // isDead + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { @@ -2374,8 +2335,7 @@ CondPhysReg = AMDGPU::SCC; BrOpcode = AMDGPU::S_CBRANCH_SCC1; - // FIXME: Hack for isSCC tests - ConstrainRC = &AMDGPU::SGPR_32RegClass; + ConstrainRC = &AMDGPU::SReg_32RegClass; } else { // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? // We sort of know that a VCC producer based on the register bank, that ands Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -1206,6 +1206,10 @@ return getWavefrontSize() == 32; } + bool isWave64() const { + return getWavefrontSize() == 64; + } + const TargetRegisterClass *getBoolRC() const { return getRegisterInfo()->getBoolRC(); } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir @@ -20,7 +20,7 @@ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY2]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: bb.1: @@ -46,7 +46,7 @@ ; GCN-LABEL: name: brcond_scc_impdef ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) - ; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN: $scc = COPY [[DEF]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: bb.1: @@ -73,7 +73,7 @@ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY2]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir @@ -17,7 +17,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -66,7 +66,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -116,7 +116,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -165,7 +165,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -215,7 +215,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -263,7 +263,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -314,7 +314,7 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -363,7 +363,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -412,7 +412,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2