Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -2996,15 +2996,18 @@ unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned SrcCondCopy = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy) + .addReg(SrcCond); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo) .addReg(Src0, 0, AMDGPU::sub0) .addReg(Src1, 0, AMDGPU::sub0) - .addReg(SrcCond); + .addReg(SrcCondCopy); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi) .addReg(Src0, 0, AMDGPU::sub1) .addReg(Src1, 0, AMDGPU::sub1) - .addReg(SrcCond); + .addReg(SrcCondCopy); BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst) .addReg(DstLo) Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -607,15 +607,18 @@ "Not a VGPR32 reg"); if (Cond.size() == 1) { + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) + .add(Cond[0]); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) - .add(Cond[0]); + .addReg(SReg); } else if (Cond.size() == 2) { assert(Cond[0].isImm() && "Cond[0] is not an immediate"); switch (Cond[0].getImm()) { case SIInstrInfo::SCC_TRUE: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) .addImm(-1) .addImm(0); @@ -626,7 +629,7 @@ break; } case SIInstrInfo::SCC_FALSE: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) .addImm(0) .addImm(-1); @@ -639,23 +642,29 @@ case SIInstrInfo::VCCNZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) + .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) - .add(RegOp); + .addReg(SReg); break; } case SIInstrInfo::VCCZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) + .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(TrueReg) .addReg(FalseReg) - .add(RegOp); + .addReg(SReg); break; } case SIInstrInfo::EXECNZ: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); @@ -669,7 +678,7 @@ break; } case SIInstrInfo::EXECZ: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -963,7 +963,7 @@ VCSrc_f64, VCSrc_b64), !if(!eq(VT.Value, i1.Value), - SCSrc_b64, + SCSrc_i1, !if(isFP, !if(!eq(VT.Value, f16.Value), VCSrc_f16, Index: lib/Target/AMDGPU/SILowerI1Copies.cpp =================================================================== --- lib/Target/AMDGPU/SILowerI1Copies.cpp +++ lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -121,11 +121,14 @@ } } + unsigned int TmpSrc = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::COPY), TmpSrc) + .add(Src); BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64)) .add(Dst) .addImm(0) .addImm(-1) - .add(Src); + .addReg(TmpSrc); MI.eraseFromParent(); } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) && SrcRC == &AMDGPU::VReg_1RegClass) { Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -483,6 +483,8 @@ defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ; +def SCSrc_i1 : RegisterOperand; + //===----------------------------------------------------------------------===// // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate //===----------------------------------------------------------------------===// Index: test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll @@ -1,7 +1,10 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test1: -; CHECK: v_cndmask_b32_e64 v0, 0, 1, exec +; CHECK: s_mov_b64 s[0:1], exec +; CHECK: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; +; Note: The hardware doesn't implement EXEC as src2 for v_cndmask. ; ; Note: We could generate better code here if we recognized earlier that ; there is no WQM use and therefore llvm.amdgcn.ps.live is constant. However, Index: test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir =================================================================== --- test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir +++ test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir @@ -9,9 +9,9 @@ - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } @@ -42,13 +42,13 @@ - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } - { id: 8, class: vgpr_32 } - - { id: 9, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } - { id: 10, class: vgpr_32 } - - { id: 11, class: sreg_64 } + - { id: 11, class: sreg_64_xexec } - { id: 12, class: vgpr_32 } - - { id: 13, class: sreg_64 } + - { id: 13, class: sreg_64_xexec } - { id: 14, class: vgpr_32 } - - { id: 15, class: sreg_64 } + - { id: 15, class: sreg_64_xexec } body: | bb.0: @@ -77,9 +77,9 @@ registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - - { id: 2, class: sreg_64 } + - { id: 2, class: sreg_64_xexec } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_xexec } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } @@ -104,12 +104,12 @@ - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } - - { id: 8, class: sreg_64 } + - { id: 8, class: sreg_64_xexec } body: | bb.0: %0 = V_MOV_B32_e32 0, implicit %exec @@ -130,9 +130,9 @@ - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } @@ -156,9 +156,9 @@ - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } @@ -181,7 +181,7 @@ - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_xexec } - { id: 5, class: vgpr_32 } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } @@ -210,7 +210,7 @@ - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_xexec } - { id: 5, class: vgpr_32 } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } Index: test/CodeGen/AMDGPU/shrink-carry.mir =================================================================== --- test/CodeGen/AMDGPU/shrink-carry.mir +++ test/CodeGen/AMDGPU/shrink-carry.mir @@ -10,9 +10,9 @@ - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } body: | bb.0: @@ -34,9 +34,9 @@ - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } body: | bb.0: @@ -58,9 +58,9 @@ - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } body: | bb.0: @@ -82,9 +82,9 @@ - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } body: | bb.0: Index: test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir =================================================================== --- test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir +++ test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir @@ -27,7 +27,7 @@ - { id: 6, class: sreg_32 } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32_xm0 } - - { id: 9, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } - { id: 10, class: sreg_32_xm0 } - { id: 11, class: sreg_32_xm0 } - { id: 12, class: sgpr_64 } @@ -111,7 +111,7 @@ - { id: 6, class: sreg_32 } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32_xm0 } - - { id: 9, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } - { id: 10, class: sreg_32_xm0 } - { id: 11, class: sreg_32_xm0 } - { id: 12, class: sgpr_64 } @@ -195,7 +195,7 @@ - { id: 6, class: sreg_32 } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32_xm0 } - - { id: 9, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } - { id: 10, class: sreg_32_xm0 } - { id: 11, class: sreg_32_xm0 } - { id: 12, class: sgpr_64 } @@ -278,7 +278,7 @@ - { id: 6, class: sreg_32 } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32_xm0 } - - { id: 9, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } - { id: 10, class: sreg_32_xm0 } - { id: 11, class: sreg_32_xm0 } - { id: 12, class: sgpr_64 }