Index: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -762,14 +762,23 @@ Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) { const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1); - if (Src1->isIdenticalTo(*Src0)) { + int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers); + int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); + if (Src1->isIdenticalTo(*Src0) && + (Src1ModIdx == -1 || !MI->getOperand(Src1ModIdx).getImm()) && + (Src0ModIdx == -1 || !MI->getOperand(Src0ModIdx).getImm())) { LLVM_DEBUG(dbgs() << "Folded " << *MI << " into "); + auto &NewDesc = + TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false)); int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); if (Src2Idx != -1) MI->RemoveOperand(Src2Idx); MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1)); - mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY - : getMovOpc(false))); + if (Src1ModIdx != -1) + MI->RemoveOperand(Src1ModIdx); + if (Src0ModIdx != -1) + MI->RemoveOperand(Src0ModIdx); + mutateCopyOp(*MI, NewDesc); LLVM_DEBUG(dbgs() << *MI << '\n'); return true; } Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3437,11 +3437,15 @@ BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy) .addReg(SrcCond); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo) + .addImm(0) .addReg(Src0, 0, AMDGPU::sub0) + .addImm(0) .addReg(Src1, 0, AMDGPU::sub0) .addReg(SrcCondCopy); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi) + .addImm(0) .addReg(Src0, 0, AMDGPU::sub1) + .addImm(0) .addReg(Src1, 0, AMDGPU::sub1) .addReg(SrcCondCopy); Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -693,7 +693,9 @@ BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(Cond[0]); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); } else if (Cond.size() == 2) { @@ -705,7 +707,9 @@ .addImm(-1) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); break; @@ -716,7 +720,9 @@ .addImm(0) .addImm(-1); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); break; @@ -728,7 +734,9 @@ BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); break; @@ -740,7 +748,9 @@ BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(TrueReg) + .addImm(0) .addReg(FalseReg) .addReg(SReg); break; @@ -754,7 +764,9 @@ .addImm(-1) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); break; @@ -768,7 +780,9 @@ .addImm(0) .addImm(-1); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); llvm_unreachable("Unhandled branch predicate EXECZ"); @@ -2579,7 +2593,8 @@ // Can't shrink instruction with three operands. // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add // a special case for it. It can only be shrunk if the third operand - // is vcc. We should handle this the same way we handle vopc, by addding + // is vcc, and src0_modifiers and src1_modifiers are not set. + // We should handle this the same way we handle vopc, by addding // a register allocation hint pre-regalloc and then do the shrinking // post-regalloc. if (Src2) { Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -1283,7 +1283,7 @@ } // Return type of input modifiers operand for specified input operand -class getSrcMod { +class getSrcMod { bit isFP = !if(!eq(VT.Value, f16.Value), 1, !if(!eq(VT.Value, f32.Value), 1, !if(!eq(VT.Value, f64.Value), 1, @@ -1296,7 +1296,7 @@ FP16InputMods, FP32InputMods ), - Int32InputMods) + !if(EnableF32SrcMods, FP32InputMods, Int32InputMods)) ); } @@ -1331,7 +1331,7 @@ // Returns the input arguments for VOP3 instructions for the given SrcVT. class getIns64 { dag ret = @@ -1369,16 +1369,33 @@ /* endif */ ) /* NumSrcArgs == 3 */, !if (!eq(HasModifiers, 1), - // VOP3 with modifiers - !if (!eq(HasOMod, 1), - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - Src2Mod:$src2_modifiers, Src2RC:$src2, - clampmod:$clamp, omod:$omod), - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - Src2Mod:$src2_modifiers, Src2RC:$src2, - clampmod:$clamp)) + !if (!eq(HasSrc2Mods, 1), + // VOP3 with modifiers + !if (!eq(HasOMod, 1), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2, + clampmod:$clamp, omod:$omod), + !if (!eq(HasIntClamp, 1), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2, + clampmod:$clamp), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2))), + // VOP3 with modifiers except src2 + !if (!eq(HasOMod, 1), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2RC:$src2, clampmod:$clamp, omod:$omod), + !if (!eq(HasIntClamp, 1), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2RC:$src2, clampmod:$clamp), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2RC:$src2)))) /* else */, // VOP3 without modifiers !if (!eq(HasIntClamp, 1), @@ -1743,9 +1760,10 @@ int Pattern = 1; } -class VOPProfile _ArgVT> { +class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0> { field list ArgVT = _ArgVT; + field bit EnableF32SrcMods = _EnableF32SrcMods; field ValueType DstVT = ArgVT[0]; field ValueType Src0VT = ArgVT[1]; @@ -1763,9 +1781,9 @@ field RegisterClass Src1DPP = getVregSrcForVT.ret; field RegisterOperand Src0SDWA = getSDWASrcForVT.ret; field RegisterOperand Src1SDWA = getSDWASrcForVT.ret; - field Operand Src0Mod = getSrcMod.ret; - field Operand Src1Mod = getSrcMod.ret; - field Operand Src2Mod = getSrcMod.ret; + field Operand Src0Mod = getSrcMod.ret; + field Operand Src1Mod = getSrcMod.ret; + field Operand Src2Mod = getSrcMod.ret; field Operand Src0ModDPP = getSrcModExt.ret; field Operand Src1ModDPP = getSrcModExt.ret; field Operand Src0ModSDWA = getSrcModSDWA.ret; @@ -1781,12 +1799,16 @@ field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1); // TODO: Modifiers logic is somewhat adhoc here, to be refined later - field bit HasModifiers = isModifierType.ret; + // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which + // enables modifiers for i32 type. + field bit HasModifiers = BitOr.ret, EnableF32SrcMods>.ret; + // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods. field bit HasSrc0FloatMods = isFloatType.ret; field bit HasSrc1FloatMods = isFloatType.ret; field bit HasSrc2FloatMods = isFloatType.ret; + // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods. field bit HasSrc0IntMods = isIntType.ret; field bit HasSrc1IntMods = isIntType.ret; field bit HasSrc2IntMods = isIntType.ret; @@ -1795,7 +1817,7 @@ field bit HasSrc1Mods = !if(HasModifiers, BitOr.ret, 0); field bit HasSrc2Mods = !if(HasModifiers, BitOr.ret, 0); - field bit HasClamp = HasModifiers; + field bit HasClamp = isModifierType.ret; field bit HasSDWAClamp = EmitDst; field bit HasFPClamp = BitAnd.ret, HasClamp>.ret; field bit HasIntClamp = !if(isFloatType.ret, 0, HasClamp); @@ -1829,8 +1851,8 @@ field dag Ins32 = getIns32.ret; field dag Ins64 = getIns64.ret; + HasIntClamp, HasModifiers, HasSrc2Mods, + HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; field dag InsVOP3P = getInsVOP3P.ret; Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -698,7 +698,7 @@ multiclass SelectPat { def : GCNPat < (vt (select i1:$src0, vt:$src1, vt:$src2)), - (inst $src2, $src1, $src0) + (inst (i32 0), $src2, (i32 0), $src1, $src0) >; } @@ -1104,12 +1104,14 @@ def : GCNPat < (i32 (sext i1:$src0)), - (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 -1), $src0) >; class Ext32Pat : GCNPat < (i32 (ext i1:$src0)), - (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 1), $src0) >; def : Ext32Pat ; @@ -1240,8 +1242,9 @@ class ZExt_i64_i1_Pat : GCNPat < (i64 (ext i1:$src)), (REG_SEQUENCE VReg_64, - (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0, - (S_MOV_B32 (i32 0)), sub1) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 1), $src), + sub0, (S_MOV_B32 (i32 0)), sub1) >; @@ -1259,8 +1262,10 @@ def : GCNPat < (i64 (sext i1:$src)), (REG_SEQUENCE VReg_64, - (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0, - (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub0, + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub1) >; class FPToI1Pat : GCNPat < @@ -1318,32 +1323,46 @@ def : GCNPat < (f16 (sint_to_fp i1:$src)), - (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)) + (V_CVT_F16_F32_e32 ( + V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE), + $src)) >; def : GCNPat < (f16 (uint_to_fp i1:$src)), - (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src)) + (V_CVT_F16_F32_e32 ( + V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), + $src)) >; def : GCNPat < (f32 (sint_to_fp i1:$src)), - (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE), + $src) >; def : GCNPat < (f32 (uint_to_fp i1:$src)), - (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), + $src) >; def : GCNPat < (f64 (sint_to_fp i1:$src)), - (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)) + (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 -1), + $src)) >; def : GCNPat < (f64 (uint_to_fp i1:$src)), - (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)) + (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 1), + $src)) >; //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -483,6 +483,8 @@ ConstrainRegs.insert(SrcReg); BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) + .addImm(0) + .addImm(0) .addImm(-1) .addReg(SrcReg); DeadCopies.push_back(&MI); Index: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -171,6 +171,10 @@ if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64) return AMDGPU::NoRegister; + if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) || + TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers)) + return AMDGPU::NoRegister; + Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0); Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1); MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2); Index: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td @@ -245,7 +245,8 @@ class VOP_MAC : VOPProfile <[vt, vt, vt, vt]> { let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); let Ins64 = getIns64, 3, - 0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; + 0, HasModifiers, HasModifiers, HasOMod, + Src0Mod, Src1Mod, Src2Mod>.ret; let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1ModDPP:$src1_modifiers, Src1DPP:$src1, VGPR_32:$src2, // stub argument @@ -324,11 +325,12 @@ let HasExtSDWA9 = 1; } -// Read in from vcc or arbitrary SGPR -def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { +// Read in from vcc or arbitrary SGPR. +// Enable f32 source modifiers on i32 input type. +def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> { let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above. let Asm32 = "$vdst, $src0, $src1, vcc"; - let Asm64 = "$vdst, $src0, $src1, $src2"; + let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; @@ -347,8 +349,8 @@ src0_sel:$src0_sel, src1_sel:$src1_sel); let InsDPP = (ins DstRCDPP:$old, - Src0DPP:$src0, - Src1DPP:$src1, + Src0ModDPP:$src0_modifiers, Src0DPP:$src0, + Src1ModDPP:$src1_modifiers, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let HasExt = 1; @@ -644,7 +646,9 @@ class ZExt_i16_i1_Pat : GCNPat < (i16 (ext i1:$src)), - (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src) + (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), + (i32 0/*src1mod*/), (i32 1/*src1*/), + $src) >; let Predicates = [Has16BitInsts] in { @@ -681,7 +685,8 @@ def : GCNPat < (i16 (sext i1:$src)), - (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) >; // Undo sub x, c -> add x, -c canonicalization since c is more likely Index: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td @@ -190,6 +190,7 @@ class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { // v_div_scale_{f32|f64} do not support input modifiers. let HasModifiers = 0; + let HasClamp = 0; let HasOMod = 0; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir +++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir @@ -29,7 +29,7 @@ %13:vgpr_32 = V_OR_B32_e32 %11, %12.sub2, implicit $exec %14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec %15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec - %16:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %15, implicit $exec + %16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4) S_ENDPGM 0 @@ -80,7 +80,7 @@ successors: %bb.10 %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) %34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec - %35:vgpr_32 = V_CNDMASK_B32_e64 0, -1, %34, implicit $exec + %35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec %28:vreg_1 = COPY %35 S_BRANCH %bb.10 @@ -93,7 +93,7 @@ $exec = S_OR_B64 $exec, %29, implicit-def $scc %36:vreg_1 = COPY %28 %37:sreg_64_xexec = V_CMP_NE_U32_e64 0, %36, implicit $exec - %38:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %37, implicit $exec + %38:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %37, implicit $exec %39:vgpr_32 = V_MOV_B32_e32 0, implicit $exec undef %40.sub0:vreg_128 = COPY %39 %40.sub1:vreg_128 = COPY %39 Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir +++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir @@ -74,7 +74,7 @@ %29.sub0:vreg_128 = COPY %1 %30:sreg_64 = V_CMP_NE_U32_e64 0, %28, implicit $exec %31:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %28, implicit $exec - dead %32:vgpr_32 = V_CNDMASK_B32_e64 0, -1, killed %31, implicit $exec + dead %32:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %31, implicit $exec %33:vreg_128 = COPY %29 %33.sub1:vreg_128 = COPY undef %32 %34:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir +++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir @@ -69,7 +69,7 @@ %18:vgpr_32 = V_MIN_F32_e32 1065353216, killed %17, implicit $exec %19:sreg_64_xexec = V_CMP_NEQ_F32_e64 0, 1065353216, 0, killed %18, 0, implicit $exec %20:vgpr_32 = V_MOV_B32_e32 2143289344, implicit $exec - %21:vgpr_32 = V_CNDMASK_B32_e64 0, killed %20, killed %19, implicit $exec + %21:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, killed %20, killed %19, implicit $exec %22:sreg_64 = V_CMP_LT_F32_e64 0, 0, 0, killed %21, 0, implicit $exec %23:sreg_64 = COPY $exec, implicit-def $exec %24:sreg_64 = S_AND_B64 %23, %22, implicit-def dead $scc Index: llvm/trunk/test/CodeGen/AMDGPU/fold-cndmask.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fold-cndmask.mir +++ llvm/trunk/test/CodeGen/AMDGPU/fold-cndmask.mir @@ -22,12 +22,12 @@ body: | bb.0.entry: %0 = IMPLICIT_DEF - %1 = V_CNDMASK_B32_e64 0, 0, %0, implicit $exec - %2 = V_CNDMASK_B32_e64 %1, %1, %0, implicit $exec + %1 = V_CNDMASK_B32_e64 0, 0, 0, 0, %0, implicit $exec + %2 = V_CNDMASK_B32_e64 0, %1, 0, %1, %0, implicit $exec %3 = IMPLICIT_DEF - %4 = V_CNDMASK_B32_e64 %3, %3, %0, implicit $exec + %4 = V_CNDMASK_B32_e64 0, %3, 0, %3, %0, implicit $exec %5 = COPY %1 - %6 = V_CNDMASK_B32_e64 %5, 0, %0, implicit $exec + %6 = V_CNDMASK_B32_e64 0, %5, 0, 0, %0, implicit $exec $vcc = IMPLICIT_DEF %7 = V_CNDMASK_B32_e32 %3, %3, implicit $exec, implicit $vcc Index: llvm/trunk/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir +++ llvm/trunk/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir @@ -150,7 +150,7 @@ # GCN-LABEL: name: cluster_cmp_cndmask # GCN: S_NOP 0, implicit-def $vcc # GCN-NEXT: %3:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit $exec -# GCN-NEXT: dead %4:vgpr_32 = V_CNDMASK_B32_e64 %0, %1, %3, implicit $exec +# GCN-NEXT: dead %4:vgpr_32 = V_CNDMASK_B32_e64 0, %0, 0, %1, %3, implicit $exec name: cluster_cmp_cndmask registers: - { id: 0, class: vgpr_32 } @@ -168,13 +168,13 @@ %1 = V_MOV_B32_e32 0, implicit $exec %3 = V_CMP_EQ_I32_e64 %0, %1, implicit $exec S_NOP 0, implicit def $vcc - %4 = V_CNDMASK_B32_e64 %0, %1, %3, implicit $exec + %4 = V_CNDMASK_B32_e64 0, %0, 0, %1, %3, implicit $exec ... # GCN-LABEL: name: cluster_multi_use_cmp_cndmask # GCN: %4:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit $exec -# GCN-NEXT: dead %5:vgpr_32 = V_CNDMASK_B32_e64 %2, %1, %4, implicit $exec -# GCN-NEXT: dead %6:vgpr_32 = V_CNDMASK_B32_e64 %1, %3, %4, implicit $exec +# GCN-NEXT: dead %5:vgpr_32 = V_CNDMASK_B32_e64 0, %2, 0, %1, %4, implicit $exec +# GCN-NEXT: dead %6:vgpr_32 = V_CNDMASK_B32_e64 0, %1, 0, %3, %4, implicit $exec name: cluster_multi_use_cmp_cndmask registers: - { id: 0, class: vgpr_32 } @@ -195,15 +195,15 @@ %4 = V_CMP_EQ_I32_e64 %0, %1, implicit $exec S_NOP 0, implicit def $vcc - %5 = V_CNDMASK_B32_e64 %2, %1, %4, implicit $exec - %6 = V_CNDMASK_B32_e64 %1, %3, %4, implicit $exec + %5 = V_CNDMASK_B32_e64 0, %2, 0, %1, %4, implicit $exec + %6 = V_CNDMASK_B32_e64 0, %1, 0, %3, %4, implicit $exec ... # GCN-LABEL: name: cluster_multi_use_cmp_cndmask2 # GCN: %4:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit $exec -# GCN-NEXT: dead %5:vgpr_32 = V_CNDMASK_B32_e64 %2, %1, %4, implicit $exec +# GCN-NEXT: dead %5:vgpr_32 = V_CNDMASK_B32_e64 0, %2, 0, %1, %4, implicit $exec # GCN-NEXT: %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec -# GCN-NEXT: dead %6:vgpr_32 = V_CNDMASK_B32_e64 %1, %3, %4, implicit $exec +# GCN-NEXT: dead %6:vgpr_32 = V_CNDMASK_B32_e64 0, %1, 0, %3, %4, implicit $exec name: cluster_multi_use_cmp_cndmask2 registers: - { id: 0, class: vgpr_32 } @@ -221,7 +221,7 @@ %1 = V_MOV_B32_e32 0, implicit $exec %4 = V_CMP_EQ_I32_e64 %0, %1, implicit $exec %2 = V_MOV_B32_e32 0, implicit $exec - %5 = V_CNDMASK_B32_e64 %2, %1, %4, implicit $exec + %5 = V_CNDMASK_B32_e64 0, %2, 0, %1, %4, implicit $exec %3 = V_MOV_B32_e32 0, implicit $exec - %6 = V_CNDMASK_B32_e64 %1, %3, %4, implicit $exec + %6 = V_CNDMASK_B32_e64 0, %1, 0, %3, %4, implicit $exec ... Index: llvm/trunk/test/CodeGen/AMDGPU/merge-load-store-vreg.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/merge-load-store-vreg.mir +++ llvm/trunk/test/CodeGen/AMDGPU/merge-load-store-vreg.mir @@ -77,7 +77,7 @@ bb.2: %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec - %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec + %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp) %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -112,7 +112,7 @@ bb.2: %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec - %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec + %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp) undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec @@ -143,7 +143,7 @@ bb.2: %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec - %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec + %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp) undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir +++ llvm/trunk/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir @@ -9,7 +9,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc @@ -31,7 +31,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc @@ -46,7 +46,7 @@ # GCN: name: negated_cond_vop2_redef_vcc1 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec +# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec # GCN-NEXT: $vcc_lo = COPY $sgpr0 # GCN-NEXT: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc @@ -56,7 +56,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec $vcc_lo = COPY $sgpr0 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc @@ -72,7 +72,7 @@ # GCN: name: negated_cond_vop2_redef_vcc2 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec +# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec # GCN-NEXT: $vcc_hi = COPY $sgpr0 # GCN-NEXT: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc @@ -82,7 +82,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec $vcc_hi = COPY $sgpr0 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc @@ -98,7 +98,7 @@ # GCN: name: negated_cond_vop3_redef_cmp # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec +# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec # GCN-NEXT: %2.sub1:sreg_64_xexec = COPY $sgpr0 # GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc @@ -108,7 +108,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec %2.sub1 = COPY $sgpr0 $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc @@ -149,7 +149,7 @@ body: | bb.0: $vcc = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, $vcc, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $vcc, implicit $exec %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc @@ -171,7 +171,7 @@ body: | bb.0: $vcc = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, $vcc, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $vcc, implicit $exec V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec $vcc = S_AND_B64 killed $vcc, $exec, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc @@ -186,7 +186,7 @@ # GCN: name: negated_cond_vop3_redef_sel # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec +# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: %1:vgpr_32 = COPY $vgpr0 # GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec # GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc @@ -196,7 +196,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec %1:vgpr_32 = COPY $vgpr0 %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc @@ -212,7 +212,7 @@ # GCN: name: negated_cond_vop2_used_sel # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec +# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- @@ -220,7 +220,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc @@ -236,7 +236,7 @@ # GCN: name: negated_cond_vop2_used_vcc # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec +# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec # GCN-NEXT: $sgpr0_sgpr1 = COPY $vcc # GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc @@ -246,7 +246,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec $sgpr0_sgpr1 = COPY $vcc $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc @@ -263,7 +263,7 @@ # GCN: name: negated_cond_vop3_sel_wrong_subreg1 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF -# GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec +# GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec # GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc @@ -273,7 +273,7 @@ bb.0: %0:sreg_64_xexec = IMPLICIT_DEF %1.sub1 = IMPLICIT_DEF - %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc @@ -288,7 +288,7 @@ # GCN: name: negated_cond_vop3_sel_wrong_subreg2 # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec +# GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF # GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec # GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc @@ -298,7 +298,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec %1.sub1 = IMPLICIT_DEF %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc @@ -323,7 +323,7 @@ bb.0: %0:sreg_64_xexec = IMPLICIT_DEF %1.sub1 = IMPLICIT_DEF - %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub0, 1, implicit $exec $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc @@ -346,7 +346,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec %1.sub1 = IMPLICIT_DEF %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub0, 1, implicit $exec $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc @@ -362,7 +362,7 @@ # GCN: name: negated_cond_vop3_sel_subreg_overlap # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN-NEXT: %1.sub2:vreg_128 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec +# GCN-NEXT: %1.sub2:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: %1.sub2_sub3:vreg_128 = IMPLICIT_DEF # GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub2, 1, implicit $exec # GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc @@ -372,7 +372,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1.sub2:vreg_128 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1.sub2:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec %1.sub2_sub3 = IMPLICIT_DEF %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub2, 1, implicit $exec $vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc @@ -395,7 +395,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec bb.1: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec @@ -411,7 +411,7 @@ ... # GCN: name: negated_cond_vop2_different_blocks_cmp_and -# GCN: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec +# GCN: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN: $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc --- @@ -419,7 +419,7 @@ body: | bb.0: %0:sreg_64_xexec = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec bb.1: @@ -435,7 +435,7 @@ ... # GCN: name: negated_cond_vop2_not_dominated_blocks -# GCN: V_CNDMASK_B32_e64 0, 1, +# GCN: V_CNDMASK_B32_e64 0, 0, 0, 1, # GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc --- @@ -449,7 +449,7 @@ bb.1: %0:sreg_64_xexec = IMPLICIT_DEF - %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec bb.2: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir +++ llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir @@ -189,7 +189,7 @@ %46 = V_AND_B32_e32 1, killed %45, implicit $exec %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0 :: (dereferenceable invariant load 4) %25 = V_CMP_GE_F32_e64 0, 0, 0, killed %21, 0, implicit $exec - %26 = V_CNDMASK_B32_e64 0, -1, killed %25, implicit $exec + %26 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %25, implicit $exec %62 = IMPLICIT_DEF bb.29: Index: llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir +++ llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir @@ -9,7 +9,7 @@ ... # GCN-LABEL: name: shrink_add_vop3{{$}} # GCN: %29:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %19, %17, implicit $exec -# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec +# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec name: shrink_add_vop3 alignment: 0 exposesReturnsTwice: false @@ -84,7 +84,7 @@ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec %29, %9 = V_ADD_I32_e64 %19, %17, implicit $exec - %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec + %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -92,7 +92,7 @@ --- # GCN-LABEL: name: shrink_sub_vop3{{$}} # GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUB_I32_e64 %19, %17, implicit $exec -# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec +# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec name: shrink_sub_vop3 alignment: 0 @@ -168,7 +168,7 @@ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec %29, %9 = V_SUB_I32_e64 %19, %17, implicit $exec - %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec + %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -176,7 +176,7 @@ --- # GCN-LABEL: name: shrink_subrev_vop3{{$}} # GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUBREV_I32_e64 %19, %17, implicit $exec -# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec +# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec name: shrink_subrev_vop3 alignment: 0 @@ -252,7 +252,7 @@ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec %29, %9 = V_SUBREV_I32_e64 %19, %17, implicit $exec - %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec + %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -260,7 +260,7 @@ --- # GCN-LABEL: name: check_addc_src2_vop3{{$}} # GCN: %29:vgpr_32, $vcc = V_ADDC_U32_e64 %19, %17, %9, implicit $exec -# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec +# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec name: check_addc_src2_vop3 alignment: 0 exposesReturnsTwice: false @@ -336,7 +336,7 @@ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec %9 = S_MOV_B64 0 %29, $vcc = V_ADDC_U32_e64 %19, %17, %9, implicit $exec - %24 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec + %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -344,7 +344,7 @@ --- # GCN-LABEL: name: shrink_addc_vop3{{$}} # GCN: %29:vgpr_32 = V_ADDC_U32_e32 %19, %17, implicit-def $vcc, implicit $vcc, implicit $exec -# GCN %24 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec +# GCN %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec name: shrink_addc_vop3 alignment: 0 @@ -421,7 +421,7 @@ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec $vcc = S_MOV_B64 0 %29, $vcc = V_ADDC_U32_e64 %19, %17, $vcc, implicit $exec - %24 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec + %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -430,7 +430,7 @@ --- # GCN-LABEL: name: shrink_addc_undef_vcc{{$}} # GCN: %29:vgpr_32 = V_ADDC_U32_e32 %19, %17, implicit-def $vcc, implicit undef $vcc, implicit $exec -# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec +# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec name: shrink_addc_undef_vcc alignment: 0 exposesReturnsTwice: false @@ -505,7 +505,7 @@ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec %29, $vcc = V_ADDC_U32_e64 %19, %17, undef $vcc, implicit $exec - %24 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec + %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 Index: llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir +++ llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir @@ -184,7 +184,7 @@ %36:vgpr_32 = V_MAC_F32_e32 0, %33, %36, implicit $exec %37:vgpr_32 = V_MAD_F32 0, %35, 0, 0, 0, 0, 0, 0, implicit $exec %38:sreg_64_xexec = V_CMP_NE_U32_e64 0, %5, implicit $exec - %39:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %38, implicit $exec + %39:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %38, implicit $exec V_CMP_NE_U32_e32 1, %39, implicit-def $vcc, implicit $exec $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc %40:vgpr_32 = V_ADD_F32_e32 %36, %37, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir +++ llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir @@ -47,7 +47,7 @@ $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec - $vgpr1 = V_CNDMASK_B32_e64 0, -1, killed $sgpr0_sgpr1, implicit $exec + $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec $sgpr0_sgpr1 = COPY $exec, implicit-def $exec SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5) $sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc @@ -111,7 +111,7 @@ $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec - $vgpr1 = V_CNDMASK_B32_e64 0, -1, killed $sgpr0_sgpr1, implicit $exec + $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec $sgpr0_sgpr1 = COPY $exec, implicit-def $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5) Index: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir +++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir @@ -16,7 +16,7 @@ $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 3, killed $sgpr4, implicit $exec - $vgpr3 = V_CNDMASK_B32_e64 -1082130432, 1065353216, killed $sgpr0_sgpr1, implicit $exec + $vgpr3 = V_CNDMASK_B32_e64 0, -1082130432, 0, 1065353216, killed $sgpr0_sgpr1, implicit $exec $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec S_BRANCH %bb.1 Index: llvm/trunk/test/MC/AMDGPU/vop3.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop3.s +++ llvm/trunk/test/MC/AMDGPU/vop3.s @@ -199,7 +199,7 @@ // VI: v_add_f32_e64 v1, v3, v5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x02,0x00] -// TODO: Modifier tests +// TODO: Modifier tests (v_cndmask done) v_cndmask_b32 v1, v3, v5, s[4:5] // SICI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00] @@ -213,6 +213,18 @@ // SICI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0xaa,0x01] // VI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0xaa,0x01] +v_cndmask_b32 v1, -v3, v5, s[4:5] +// SICI: v_cndmask_b32_e64 v1, -v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x20] +// VI: v_cndmask_b32_e64 v1, -v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x20] + +v_cndmask_b32_e64 v1, v3, |v5|, s[4:5] +// SICI: v_cndmask_b32_e64 v1, v3, |v5|, s[4:5] ; encoding: [0x01,0x02,0x00,0xd2,0x03,0x0b,0x12,0x00] +// VI: v_cndmask_b32_e64 v1, v3, |v5|, s[4:5] ; encoding: [0x01,0x02,0x00,0xd1,0x03,0x0b,0x12,0x00] + +v_cndmask_b32_e64 v1, -abs(v3), v5, vcc +// SICI: v_cndmask_b32_e64 v1, -|v3|, v5, vcc ; encoding: [0x01,0x01,0x00,0xd2,0x03,0x0b,0xaa,0x21] +// VI: v_cndmask_b32_e64 v1, -|v3|, v5, vcc ; encoding: [0x01,0x01,0x00,0xd1,0x03,0x0b,0xaa,0x21] + //TODO: readlane, writelane v_add_f32 v1, v3, s5 Index: llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt @@ -132,6 +132,15 @@ # VI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0xaa,0x01] 0x01 0x00 0x00 0xd1 0x03 0x0b 0xaa 0x01 +# VI: v_cndmask_b32_e64 v1, -v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x20] +0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x20 + +# VI: v_cndmask_b32_e64 v1, v3, |v5|, s[4:5] ; encoding: [0x01,0x02,0x00,0xd1,0x03,0x0b,0x12,0x00] +0x01,0x02,0x00,0xd1,0x03,0x0b,0x12,0x00 + +# VI: v_cndmask_b32_e64 v1, -|v3|, v5, vcc ; encoding: [0x01,0x01,0x00,0xd1,0x03,0x0b,0xaa,0x21] +0x01,0x01,0x00,0xd1,0x03,0x0b,0xaa,0x21 + # VI: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x00,0x00] 0x01 0x00 0x01 0xd1 0x03 0x0b 0x00 0x00