Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -932,7 +932,8 @@ AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; CurDAG->SelectNodeTo(N, Opc, N->getVTList(), - { N->getOperand(0), N->getOperand(1) }); + {N->getOperand(0), N->getOperand(1), + CurDAG->getConstant(0, {}, MVT::i1)/*clamp bit*/}); } void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { @@ -1032,13 +1033,19 @@ Zero, Addr.getOperand(1)); if (isDSOffsetLegal(Sub, ByteOffset, 16)) { + SmallVector Opnds; + Opnds.push_back(Zero); + Opnds.push_back(Addr.getOperand(1)); + // FIXME: Select to VOP3 version for with-carry. - unsigned SubOp = Subtarget->hasAddNoCarry() ? - AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; + unsigned SubOp = AMDGPU::V_SUB_I32_e32; + if (Subtarget->hasAddNoCarry()) { + SubOp = AMDGPU::V_SUB_U32_e64; + Opnds.push_back(Zero); // clamp bit + } - MachineSDNode *MachineSub - = CurDAG->getMachineNode(SubOp, DL, MVT::i32, - Zero, Addr.getOperand(1)); + MachineSDNode *MachineSub = + CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds); Base = SDValue(MachineSub, 0); Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); @@ -1106,12 +1113,17 @@ Zero, Addr.getOperand(1)); if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { - unsigned SubOp = Subtarget->hasAddNoCarry() ? - AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; + SmallVector Opnds; + Opnds.push_back(Zero); + Opnds.push_back(Addr.getOperand(1)); + unsigned SubOp = AMDGPU::V_SUB_I32_e32; + if (Subtarget->hasAddNoCarry()) { + SubOp = AMDGPU::V_SUB_U32_e64; + Opnds.push_back(Zero); // clamp bit + } MachineSDNode *MachineSub - = CurDAG->getMachineNode(SubOp, DL, MVT::i32, - Zero, Addr.getOperand(1)); + = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds); Base = SDValue(MachineSub, 0); Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1092,7 +1092,8 @@ // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z getAddNoCarry(Entry, Insert, DL, TIDReg) .addReg(TIDReg) - .addReg(TIDIGZReg); + .addReg(TIDIGZReg) + .addImm(0); // clamp bit } else { // Get the wave id BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), @@ -1117,7 +1118,8 @@ unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize); getAddNoCarry(MBB, MI, DL, TmpReg) .addImm(LDSOffset) - .addReg(TIDReg); + .addReg(TIDReg) + .addImm(0); // clamp bit return TmpReg; } @@ -4443,6 +4445,7 @@ Inst.RemoveOperand(3); Inst.setDesc(get(NewOpc)); + Inst.addOperand(MachineOperand::CreateImm(0)); // clamp bit Inst.addImplicitDefUseOperands(*MBB.getParent()); MRI.replaceRegWith(OldDstReg, ResultReg); legalizeOperands(Inst, MDT); @@ -4703,7 +4706,8 @@ BuildMI(MBB, MII, DL, get(LoOpc), DestSub0) .addReg(CarryReg, RegState::Define) .add(SrcReg0Sub0) - .add(SrcReg1Sub0); + .add(SrcReg1Sub0) + .addImm(0); // clamp bit unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64; MachineInstr *HiHalf = @@ -4711,7 +4715,8 @@ .addReg(DeadCarryReg, RegState::Define | RegState::Dead) .add(SrcReg0Sub1) .add(SrcReg1Sub1) - .addReg(CarryReg, RegState::Kill); + .addReg(CarryReg, RegState::Kill) + .addImm(0); // clamp bit BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -1760,10 +1760,12 @@ int Pattern = 1; } -class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0> { +class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, + bit _EnableClamp = 0> { field list ArgVT = _ArgVT; field bit EnableF32SrcMods = _EnableF32SrcMods; + field bit EnableClamp = _EnableClamp; field ValueType DstVT = ArgVT[0]; field ValueType Src0VT = ArgVT[1]; @@ -1817,7 +1819,7 @@ field bit HasSrc1Mods = !if(HasModifiers, BitOr.ret, 0); field bit HasSrc2Mods = !if(HasModifiers, BitOr.ret, 0); - field bit HasClamp = isModifierType.ret; + field bit HasClamp = BitOr.ret, EnableClamp>.ret; field bit HasSDWAClamp = EmitDst; field bit HasFPClamp = BitAnd.ret, HasClamp>.ret; field bit HasIntClamp = !if(isFloatType.ret, 0, HasClamp); @@ -1943,6 +1945,7 @@ def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; +def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>; def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; Index: llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -723,7 +723,8 @@ TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg) .addReg(ImmReg) - .addReg(AddrReg->getReg(), 0, BaseSubReg); + .addReg(AddrReg->getReg(), 0, BaseSubReg) + .addImm(0); // clamp bit BaseSubReg = 0; } @@ -816,7 +817,8 @@ TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg) .addReg(ImmReg) - .addReg(AddrReg->getReg(), 0, BaseSubReg); + .addReg(AddrReg->getReg(), 0, BaseSubReg) + .addImm(0); // clamp bit BaseSubReg = 0; } @@ -1144,7 +1146,8 @@ BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0) .addReg(CarryReg, RegState::Define) .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg) - .add(OffsetLo); + .add(OffsetLo) + .addImm(0); // clamp bit (void)LoHalf; LLVM_DEBUG(dbgs() << " "; LoHalf->dump();); @@ -1153,7 +1156,8 @@ .addReg(DeadCarryReg, RegState::Define | RegState::Dead) .addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg) .add(OffsetHi) - .addReg(CarryReg, RegState::Kill); + .addReg(CarryReg, RegState::Kill) + .addImm(0); // clamp bit (void)HiHalf; LLVM_DEBUG(dbgs() << " "; HiHalf->dump();); Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -334,7 +334,8 @@ TII->getAddNoCarry(*MBB, Ins, DL, BaseReg) .addReg(OffsetReg, RegState::Kill) - .addReg(FIReg); + .addReg(FIReg) + .addImm(0); // clamp bit } void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, @@ -1108,7 +1109,8 @@ if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { TII->getAddNoCarry(*MBB, MI, DL, ResultReg) .addImm(Offset) - .addReg(ScaledReg, RegState::Kill); + .addReg(ScaledReg, RegState::Kill) + .addImm(0); // clamp bit } else { unsigned ConstOffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); @@ -1117,7 +1119,8 @@ .addImm(Offset); TII->getAddNoCarry(*MBB, MI, DL, ResultReg) .addReg(ConstOffsetReg, RegState::Kill) - .addReg(ScaledReg, RegState::Kill); + .addReg(ScaledReg, RegState::Kill) + .addImm(0); // clamp bit } } Index: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td @@ -277,9 +277,9 @@ def VOP_MAC_F32 : VOP_MAC ; // Write out to vcc or arbitrary SGPR. -def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { +def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> { let Asm32 = "$vdst, vcc, $src0, $src1"; - let Asm64 = "$vdst, $sdst, $src0, $src1"; + let Asm64 = "$vdst, $sdst, $src0, $src1$clamp"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; @@ -289,7 +289,7 @@ // Write out to vcc or arbitrary SGPR and read in from vcc or // arbitrary SGPR. -def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { +def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> { // We use VCSrc_b32 to exclude literal constants, even though the // encoding normally allows them since the implicit VCC use means // using one would always violate the constant bus @@ -297,7 +297,7 @@ // technically be possible to use VCC again as src0. let Src0RC32 = VCSrc_b32; let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; - let Asm64 = "$vdst, $sdst, $src0, $src1, $src2"; + let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; @@ -440,9 +440,9 @@ let SubtargetPredicate = HasAddNoCarryInsts in { -defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32, null_frag, "v_add_u32", 1>; -defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32", 1>; -defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32", 1>; +defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>; +defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; +defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; } } // End isCommutable = 1 @@ -473,12 +473,12 @@ def : GCNPat< (AMDGPUadde i32:$src0, i32:$src1, i1:$src2), - (V_ADDC_U32_e64 $src0, $src1, $src2) + (V_ADDC_U32_e64 $src0, $src1, $src2, 0) >; def : GCNPat< (AMDGPUsube i32:$src0, i32:$src1, i1:$src2), - (V_SUBB_U32_e64 $src0, $src1, $src2) + (V_SUBB_U32_e64 $src0, $src1, $src2, 0) >; // These instructions only exist on SI and CI @@ -505,6 +505,15 @@ ) >; +class DivergentClampingBinOp : + GCNPat< + (getDivergentFrag.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), + !if(!cast(Inst).IsOrig, + (Inst $src0, $src1, 0), + (Inst $src1, $src0, 0) + ) + >; + let AddedComplexity = 1 in { def : DivergentBinOp; def : DivergentBinOp; @@ -520,7 +529,7 @@ def : DivergentBinOp; -def : DivergentBinOp; +def : DivergentClampingBinOp; def : DivergentBinOp; def : DivergentBinOp; Index: llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir +++ llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir @@ -308,7 +308,7 @@ bb.0: dead %0:vgpr_32 = V_ADD_I32_e32 12345, undef %1:vgpr_32, implicit-def $vcc, implicit $exec %2:sreg_64_xexec = COPY $vcc - %3:vgpr_32, dead %4:sreg_64_xexec = V_ADDC_U32_e64 undef %5:vgpr_32, undef %6:vgpr_32, %2, implicit $exec + %3:vgpr_32, dead %4:sreg_64_xexec = V_ADDC_U32_e64 undef %5:vgpr_32, undef %6:vgpr_32, %2, 0, implicit $exec S_ENDPGM 0, implicit %3 ... Index: llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir +++ llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir @@ -21,7 +21,7 @@ %2:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = IMPLICIT_DEF - %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec + %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %5 ... @@ -36,8 +36,8 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec - ; GCN: [[V_ADD_I32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF1]], implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec + ; GCN: [[V_ADD_I32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF1]], 0, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_1]], implicit [[V_ADD_I32_e64_2]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF @@ -45,8 +45,8 @@ %3:vgpr_32 = IMPLICIT_DEF %4:vgpr_32 = IMPLICIT_DEF - %5:vgpr_32, %6:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec - %7:vgpr_32, %8:sreg_64_xexec = V_ADD_I32_e64 %0, %2, implicit $exec + %5:vgpr_32, %6:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %7:vgpr_32, %8:sreg_64_xexec = V_ADD_I32_e64 %0, %2, 0, implicit $exec S_ENDPGM 0, implicit %6, implicit %7 ... @@ -70,7 +70,7 @@ %2:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = IMPLICIT_DEF - %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec + %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec DBG_VALUE %5, $noreg S_ENDPGM 0, implicit %4 @@ -92,15 +92,15 @@ ; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc - ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[COPY]], implicit $exec + ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[COPY]], 0, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = IMPLICIT_DEF - %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec - %6:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %2, %3, %5, implicit $exec + %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %6:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %2, %3, %5, 0, implicit $exec S_ENDPGM 0, implicit %6 ... Index: llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir +++ llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir @@ -15,7 +15,7 @@ ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -34,7 +34,7 @@ ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -52,7 +52,7 @@ ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -70,11 +70,11 @@ ; GCN-LABEL: name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[DEF]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[DEF]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] %0:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec %1:sreg_32_xm0 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -89,11 +89,11 @@ ; GCN-LABEL: name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[DEF]], implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[DEF]], 0, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] %0:sreg_32_xm0 = IMPLICIT_DEF %1:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -109,12 +109,12 @@ ; GCN: $vcc = S_MOV_B64 -1 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc $vcc = S_MOV_B64 -1 %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2, implicit $vcc ... @@ -131,7 +131,7 @@ ; GCN: $vcc = S_MOV_B64 -1 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec ; GCN: bb.1: ; GCN: liveins: $vcc ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc @@ -140,7 +140,7 @@ $vcc = S_MOV_B64 -1 %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec bb.1: liveins: $vcc @@ -158,7 +158,7 @@ ; GCN: successors: %bb.1(0x80000000) ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec ; GCN: bb.1: ; GCN: liveins: $vcc_lo ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_lo @@ -167,7 +167,7 @@ $vcc = S_MOV_B64 -1 %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec bb.1: liveins: $vcc_lo @@ -190,7 +190,7 @@ ; GCN: liveins: $vcc ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_lo bb.0: successors: %bb.1 @@ -200,7 +200,7 @@ liveins: $vcc %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2, implicit $vcc_lo ... @@ -219,7 +219,7 @@ ; GCN: liveins: $vcc_hi ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec ; GCN: bb.2: ; GCN: liveins: $vcc_hi ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_hi @@ -231,7 +231,7 @@ liveins: $vcc_hi %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec bb.2: liveins: $vcc_hi @@ -254,7 +254,7 @@ ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -273,7 +273,7 @@ ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 - %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -292,7 +292,7 @@ ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -311,7 +311,7 @@ ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 - %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -367,7 +367,7 @@ S_NOP 0 %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_NOP 0 S_NOP 0 @@ -399,7 +399,7 @@ S_NOP 0, implicit-def $vcc %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_NOP 0 S_NOP 0 @@ -480,7 +480,7 @@ DBG_VALUE $noreg, 0 DBG_VALUE $noreg, 0 DBG_VALUE $noreg, 0 - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0, implicit %2 ... @@ -557,7 +557,7 @@ S_NOP 0 S_NOP 0 S_NOP 0 - %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec DBG_VALUE $noreg, 0 DBG_VALUE $noreg, 0 DBG_VALUE $noreg, 0 Index: llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir +++ llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir @@ -103,9 +103,9 @@ %22:vgpr_32 = COPY %14.sub0 %23:sgpr_32 = COPY %4.sub1 %24:vgpr_32 = COPY %14.sub1 - %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21, %22, implicit $exec + %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21, %22, 0, implicit $exec %25:vgpr_32 = COPY %23 - %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 %25, %24, killed %19, implicit $exec + %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 %25, %24, killed %19, 0, implicit $exec %16:vreg_64 = REG_SEQUENCE %17, %subreg.sub0, %18, %subreg.sub1 %11:vreg_64 = COPY %16 Index: llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -302,12 +302,12 @@ body: | bb.0: - $vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec $sgpr4 = V_READLANE_B32 $vgpr4, $sgpr0 S_BRANCH %bb.1 bb.1: - $vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec $vgpr4 = V_WRITELANE_B32 $sgpr0, $sgpr0, $vgpr4 S_BRANCH %bb.2 Index: llvm/trunk/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir +++ llvm/trunk/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir @@ -2,8 +2,8 @@ # GCN-LABEL: name: cluster_add_addc # GCN: S_NOP 0, implicit-def $vcc -# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec -# GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %3, implicit $exec +# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec +# GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %3, 0, implicit $exec name: cluster_add_addc registers: - { id: 0, class: vgpr_32 } @@ -19,18 +19,18 @@ bb.0: %0 = V_MOV_B32_e32 0, implicit $exec %1 = V_MOV_B32_e32 0, implicit $exec - %2, %3 = V_ADD_I32_e64 %0, %1, implicit $exec + %2, %3 = V_ADD_I32_e64 %0, %1, 0, implicit $exec %6 = V_MOV_B32_e32 0, implicit $exec %7 = V_MOV_B32_e32 0, implicit $exec S_NOP 0, implicit def $vcc - %4, %5 = V_ADDC_U32_e64 %6, %7, %3, implicit $exec + %4, %5 = V_ADDC_U32_e64 %6, %7, %3, 0, implicit $exec ... # GCN-LABEL: name: interleave_add64s -# GCN: dead %8:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec -# GCN-NEXT: dead %12:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %4, %5, %9, implicit $exec -# GCN-NEXT: dead %10:vgpr_32, %11:sreg_64_xexec = V_ADD_I32_e64 %2, %3, implicit $exec -# GCN-NEXT: dead %14:vgpr_32, dead %15:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %11, implicit $exec +# GCN: dead %8:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec +# GCN-NEXT: dead %12:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %4, %5, %9, 0, implicit $exec +# GCN-NEXT: dead %10:vgpr_32, %11:sreg_64_xexec = V_ADD_I32_e64 %2, %3, 0, implicit $exec +# GCN-NEXT: dead %14:vgpr_32, dead %15:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %11, 0, implicit $exec name: interleave_add64s registers: - { id: 0, class: vgpr_32 } @@ -61,18 +61,18 @@ %6 = V_MOV_B32_e32 0, implicit $exec %7 = V_MOV_B32_e32 0, implicit $exec - %8, %9 = V_ADD_I32_e64 %0, %1, implicit $exec - %10, %11 = V_ADD_I32_e64 %2, %3, implicit $exec + %8, %9 = V_ADD_I32_e64 %0, %1, 0, implicit $exec + %10, %11 = V_ADD_I32_e64 %2, %3, 0, implicit $exec - %12, %13 = V_ADDC_U32_e64 %4, %5, %9, implicit $exec - %14, %15 = V_ADDC_U32_e64 %6, %7, %11, implicit $exec + %12, %13 = V_ADDC_U32_e64 %4, %5, %9, 0, implicit $exec + %14, %15 = V_ADDC_U32_e64 %6, %7, %11, 0, implicit $exec ... # GCN-LABEL: name: cluster_mov_addc # GCN: S_NOP 0, implicit-def $vcc # GCN-NEXT: %2:sreg_64_xexec = S_MOV_B64 0 -# GCN-NEXT: dead %3:vgpr_32, dead %4:sreg_64_xexec = V_ADDC_U32_e64 %0, %1, %2, implicit $exec +# GCN-NEXT: dead %3:vgpr_32, dead %4:sreg_64_xexec = V_ADDC_U32_e64 %0, %1, %2, 0, implicit $exec name: cluster_mov_addc registers: - { id: 0, class: vgpr_32 } @@ -89,16 +89,16 @@ %1 = V_MOV_B32_e32 0, implicit $exec %2 = S_MOV_B64 0 S_NOP 0, implicit def $vcc - %3, %4 = V_ADDC_U32_e64 %0, %1, %2, implicit $exec + %3, %4 = V_ADDC_U32_e64 %0, %1, %2, 0, implicit $exec ... # GCN-LABEL: name: no_cluster_add_addc_diff_sgpr -# GCN: dead %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec +# GCN: dead %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec # GCN-NEXT: %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec # GCN-NEXT: %7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec # GCN-NEXT: S_NOP 0, implicit-def $vcc # GCN-NEXT: %8:sreg_64_xexec = S_MOV_B64 0 -# GCN-NEXT: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %8, implicit $exec +# GCN-NEXT: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %8, 0, implicit $exec name: no_cluster_add_addc_diff_sgpr registers: - { id: 0, class: vgpr_32 } @@ -115,16 +115,16 @@ %0 = V_MOV_B32_e32 0, implicit $exec %1 = V_MOV_B32_e32 0, implicit $exec %8 = S_MOV_B64 0 - %2, %3 = V_ADD_I32_e64 %0, %1, implicit $exec + %2, %3 = V_ADD_I32_e64 %0, %1, 0, implicit $exec %6 = V_MOV_B32_e32 0, implicit $exec %7 = V_MOV_B32_e32 0, implicit $exec S_NOP 0, implicit def $vcc - %4, %5 = V_ADDC_U32_e64 %6, %7, %8, implicit $exec + %4, %5 = V_ADDC_U32_e64 %6, %7, %8, 0, implicit $exec ... # GCN-LABEL: name: cluster_sub_subb # GCN: S_NOP 0, implicit-def $vcc -# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_SUB_I32_e64 %0, %1, implicit $exec -# GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_SUBB_U32_e64 %6, %7, %3, implicit $exec +# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_SUB_I32_e64 %0, %1, 0, implicit $exec +# GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_SUBB_U32_e64 %6, %7, %3, 0, implicit $exec name: cluster_sub_subb registers: - { id: 0, class: vgpr_32 } @@ -140,11 +140,11 @@ bb.0: %0 = V_MOV_B32_e32 0, implicit $exec %1 = V_MOV_B32_e32 0, implicit $exec - %2, %3 = V_SUB_I32_e64 %0, %1, implicit $exec + %2, %3 = V_SUB_I32_e64 %0, %1, 0, implicit $exec %6 = V_MOV_B32_e32 0, implicit $exec %7 = V_MOV_B32_e32 0, implicit $exec S_NOP 0, implicit def $vcc - %4, %5 = V_SUBB_U32_e64 %6, %7, %3, implicit $exec + %4, %5 = V_SUBB_U32_e64 %6, %7, %3, 0, implicit $exec ... # GCN-LABEL: name: cluster_cmp_cndmask Index: llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir +++ llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir @@ -23,21 +23,21 @@ %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, implicit $exec + %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 - %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, implicit $exec + %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, implicit $exec - %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, implicit $exec + %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 4096 - %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %25, %21, implicit $exec - %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, implicit $exec + %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %25, %21, 0, implicit $exec + %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 6144 - %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, implicit $exec - %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, implicit $exec + %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec ... @@ -76,26 +76,26 @@ %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, implicit $exec + %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 - %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, implicit $exec + %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, implicit $exec - %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, implicit $exec + %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 8000 - %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, implicit $exec - %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, implicit $exec + %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 6400 - %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, implicit $exec - %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, implicit $exec + %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec %39:sgpr_32 = S_MOV_B32 11200 - %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, implicit $exec - %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, implicit $exec + %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec + %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, implicit $exec ... @@ -129,26 +129,26 @@ %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, implicit $exec + %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 - %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, implicit $exec + %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, implicit $exec - %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, implicit $exec + %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 6144 - %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, implicit $exec - %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, implicit $exec + %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 8192 - %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, implicit $exec - %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, implicit $exec + %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec %39:sgpr_32 = S_MOV_B32 10240 - %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, implicit $exec - %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, implicit $exec + %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec + %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, implicit $exec ... @@ -174,17 +174,17 @@ %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec %12:sgpr_32 = COPY %1.sub1 %13:vgpr_32 = COPY %5 - %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, implicit $exec + %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec %16:vgpr_32 = COPY %12 - %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, implicit $exec + %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec - %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, implicit $exec - %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, implicit $exec + %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec %25:sgpr_32 = S_MOV_B32 6144 - %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, implicit $exec - %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, implicit $exec + %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec ... Index: llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir @@ -236,23 +236,23 @@ undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0, implicit $exec %38.sub0:vreg_64 = COPY %37.sub0 %39:vreg_64 = V_LSHLREV_B64 3, %38, implicit $exec - undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_I32_e64 0, %39.sub0, implicit $exec + undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_I32_e64 0, %39.sub0, 0, implicit $exec %42:vgpr_32 = COPY %33 - %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42, %39.sub1, %41, implicit $exec + %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42, %39.sub1, %41, 0, implicit $exec %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, 0, implicit $exec :: (load 8 from %ir.tmp34) undef %45.sub1:vreg_64 = IMPLICIT_DEF %45.sub0:vreg_64 = COPY %37.sub1 %46:vreg_64 = V_LSHLREV_B64 3, %45, implicit $exec - undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_I32_e64 %32, %46.sub0, implicit $exec + undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_I32_e64 %32, %46.sub0, 0, implicit $exec %49:vgpr_32 = COPY %33 - %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49, %46.sub1, %48, implicit $exec + %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49, %46.sub1, %48, 0, implicit $exec %51:vreg_64 = IMPLICIT_DEF undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, 0, implicit $exec :: (load 4 from %ir.18 + 8) %52.sub1:vreg_64 = IMPLICIT_DEF %53:vreg_64 = V_LSHLREV_B64 3, %52, implicit $exec - undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_I32_e64 0, %53.sub0, implicit $exec + undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_I32_e64 0, %53.sub0, 0, implicit $exec %56:vgpr_32 = COPY %33 - %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1, %55, implicit $exec + %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1, %55, 0, implicit $exec %58:vreg_64 = IMPLICIT_DEF %30.sub1:sreg_64_xexec = IMPLICIT_DEF %59:sreg_64 = IMPLICIT_DEF @@ -262,16 +262,16 @@ undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0, implicit $exec %63.sub0:vreg_64 = COPY %62.sub0 %64:vreg_64 = IMPLICIT_DEF - undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_I32_e64 %60, %64.sub0, implicit $exec + undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_I32_e64 %60, %64.sub0, 0, implicit $exec %67:vgpr_32 = COPY %61 - %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67, %64.sub1, %66, implicit $exec + %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67, %64.sub1, %66, 0, implicit $exec %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, 0, implicit $exec :: (load 16 from %ir.tmp58) undef %70.sub1:vreg_64 = IMPLICIT_DEF %70.sub0:vreg_64 = IMPLICIT_DEF %71:vreg_64 = IMPLICIT_DEF - undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_I32_e64 %60, %71.sub0, implicit $exec + undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_I32_e64 %60, %71.sub0, 0, implicit $exec %74:vgpr_32 = COPY %61 - %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1, %73, implicit $exec + %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1, %73, 0, implicit $exec %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, implicit $exec %77:vgpr_32 = IMPLICIT_DEF %78:vgpr_32 = IMPLICIT_DEF Index: llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir @@ -26,20 +26,20 @@ %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, implicit $exec - %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, implicit $exec - %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, implicit $exec + %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec + %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) %171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %173:vgpr_32, %175:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %171, implicit $exec - %174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, implicit $exec + %173:vgpr_32, %175:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %171, 0, implicit $exec + %174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec %172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %172, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -68,20 +68,20 @@ %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, implicit $exec - %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, implicit $exec + %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec + %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 - %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, implicit $exec + %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, implicit $exec - %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, implicit $exec + %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec + %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -110,8 +110,8 @@ %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, implicit $exec - %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -140,8 +140,8 @@ %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, implicit $exec - %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -169,8 +169,8 @@ %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, implicit $exec - %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -199,7 +199,7 @@ %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -230,7 +230,7 @@ %24:sreg_64_xexec = COPY $sgpr0_sgpr1 %30:vreg_64 = COPY $sgpr0_sgpr1 - %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %24, implicit $exec + %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %24, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %23, %subreg.sub0, %23, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -258,9 +258,9 @@ %22:sreg_32_xm0 = S_MOV_B32 255 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %30:vreg_64 = COPY $sgpr0_sgpr1 - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec $vcc = COPY %30 - %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, implicit $exec + %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %31:vreg_64 = COPY $vcc %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -290,8 +290,8 @@ %30:vreg_64 = COPY $sgpr0_sgpr1 $vcc = COPY %30 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, implicit $exec - %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec + %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %31:vreg_64 = COPY $vcc %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -320,10 +320,10 @@ %30:vreg_64 = COPY $sgpr0_sgpr1 $vcc_lo = COPY %30.sub0 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec %31:vgpr_32 = COPY $vcc_lo %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1 - %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, implicit $exec + %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -351,10 +351,10 @@ %30:vreg_64 = COPY $sgpr0_sgpr1 $vcc_hi = COPY %30.sub0 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec %31:vgpr_32 = COPY $vcc_hi %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1 - %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, implicit $exec + %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) @@ -381,10 +381,10 @@ %22:sreg_32_xm0 = S_MOV_B32 255 %30:vreg_64 = COPY $sgpr0_sgpr1 %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec - %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, implicit $exec + %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec $vcc = COPY %30 %31:vreg_64 = COPY killed $vcc - %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, implicit $exec + %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) Index: llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir +++ llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir @@ -20,7 +20,7 @@ %1 = IMPLICIT_DEF %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec - %4, %5 = V_SUBBREV_U32_e64 0, %0, %3, implicit $exec + %4, %5 = V_SUBBREV_U32_e64 0, %0, %3, 0, implicit $exec ... @@ -44,7 +44,7 @@ %1 = IMPLICIT_DEF %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec - %4, %5 = V_SUBB_U32_e64 %0, 0, %3, implicit $exec + %4, %5 = V_SUBB_U32_e64 %0, 0, %3, 0, implicit $exec ... @@ -68,7 +68,7 @@ %1 = IMPLICIT_DEF %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec - %4, %5 = V_ADDC_U32_e64 0, %0, %3, implicit $exec + %4, %5 = V_ADDC_U32_e64 0, %0, %3, 0, implicit $exec ... @@ -92,6 +92,6 @@ %1 = IMPLICIT_DEF %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec - %4, %5 = V_ADDC_U32_e64 %0, 0, %3, implicit $exec + %4, %5 = V_ADDC_U32_e64 %0, 0, %3, 0, implicit $exec ... Index: llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir +++ llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir @@ -8,7 +8,7 @@ ... # GCN-LABEL: name: shrink_add_vop3{{$}} -# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %19, %17, implicit $exec +# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %19, %17, 0, implicit $exec # GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec name: shrink_add_vop3 alignment: 0 @@ -83,7 +83,7 @@ %16 = REG_SEQUENCE killed %4, 17, %12, 18 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec - %29, %9 = V_ADD_I32_e64 %19, %17, implicit $exec + %29, %9 = V_ADD_I32_e64 %19, %17, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -91,7 +91,7 @@ ... --- # GCN-LABEL: name: shrink_sub_vop3{{$}} -# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUB_I32_e64 %19, %17, implicit $exec +# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUB_I32_e64 %19, %17, 0, implicit $exec # GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec name: shrink_sub_vop3 @@ -167,7 +167,7 @@ %16 = REG_SEQUENCE killed %4, 17, %12, 18 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec - %29, %9 = V_SUB_I32_e64 %19, %17, implicit $exec + %29, %9 = V_SUB_I32_e64 %19, %17, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -175,7 +175,7 @@ ... --- # GCN-LABEL: name: shrink_subrev_vop3{{$}} -# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUBREV_I32_e64 %19, %17, implicit $exec +# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUBREV_I32_e64 %19, %17, 0, implicit $exec # GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec name: shrink_subrev_vop3 @@ -251,7 +251,7 @@ %16 = REG_SEQUENCE killed %4, 17, %12, 18 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec - %29, %9 = V_SUBREV_I32_e64 %19, %17, implicit $exec + %29, %9 = V_SUBREV_I32_e64 %19, %17, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -259,7 +259,7 @@ ... --- # GCN-LABEL: name: check_addc_src2_vop3{{$}} -# GCN: %29:vgpr_32, $vcc = V_ADDC_U32_e64 %19, %17, %9, implicit $exec +# GCN: %29:vgpr_32, $vcc = V_ADDC_U32_e64 %19, %17, %9, 0, implicit $exec # GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec name: check_addc_src2_vop3 alignment: 0 @@ -335,7 +335,7 @@ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec %9 = S_MOV_B64 0 - %29, $vcc = V_ADDC_U32_e64 %19, %17, %9, implicit $exec + %29, $vcc = V_ADDC_U32_e64 %19, %17, %9, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -420,7 +420,7 @@ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec $vcc = S_MOV_B64 0 - %29, $vcc = V_ADDC_U32_e64 %19, %17, $vcc, implicit $exec + %29, $vcc = V_ADDC_U32_e64 %19, %17, $vcc, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -504,7 +504,7 @@ %16 = REG_SEQUENCE killed %4, 17, %12, 18 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec - %29, $vcc = V_ADDC_U32_e64 %19, %17, undef $vcc, implicit $exec + %29, $vcc = V_ADDC_U32_e64 %19, %17, undef $vcc, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 Index: llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir +++ llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir @@ -50,7 +50,7 @@ bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF - %2, $vcc = V_ADD_I32_e64 %0, %1, implicit $exec + %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0 ... @@ -71,7 +71,7 @@ bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF - %2, $vcc = V_ADD_I32_e64 %1, %0, implicit $exec + %2, $vcc = V_ADD_I32_e64 %1, %0, 0, implicit $exec S_ENDPGM 0 ... @@ -93,7 +93,7 @@ bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF - %2, $vcc = V_ADD_I32_e64 %1, %0, implicit $exec + %2, $vcc = V_ADD_I32_e64 %1, %0, 0, implicit $exec S_ENDPGM 0 ... @@ -115,7 +115,7 @@ bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF - %2, $vcc = V_ADD_I32_e64 %0, %1, implicit $exec + %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0 ... # TODO: Should probably prefer folding immediate first @@ -136,7 +136,7 @@ bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = V_MOV_B32_e32 999, implicit $exec - %2, $vcc = V_ADD_I32_e64 %0, %1, implicit $exec + %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0 ... @@ -157,5 +157,5 @@ bb.0: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = V_MOV_B32_e32 999, implicit $exec - %2, $vcc = V_ADD_I32_e64 %1, %0, implicit $exec + %2, $vcc = V_ADD_I32_e64 %1, %0, 0, implicit $exec S_ENDPGM 0 Index: llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir +++ llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir @@ -16,7 +16,7 @@ %0 = COPY undef %0 %0 = V_MOV_B32_e32 123, implicit $exec %1 = V_MOV_B32_e32 456, implicit $exec - %2, $vcc = V_ADD_I32_e64 %0, %1, implicit $exec + %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec S_ENDPGM 0 ... @@ -34,7 +34,7 @@ bb.0: undef %3.sub0 = V_MOV_B32_e32 123, implicit $exec, implicit-def %3 %1 = V_MOV_B32_e32 456, implicit $exec - %2, $vcc = V_ADD_I32_e64 %3.sub0, %1, implicit $exec + %2, $vcc = V_ADD_I32_e64 %3.sub0, %1, 0, implicit $exec S_ENDPGM 0 ... Index: llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s +++ llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s @@ -455,6 +455,42 @@ // GXF9: [0x05,0x00,0x77,0xd1,0xc1,0x00,0x00,0x00] // NOVI: error: instruction not supported on this GPU +v_add_u32 v84, v13, s31 clamp +// GFX9: v_add_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x34,0xd1,0x0d,0x3f,0x00,0x00] +// NOVI: error: + +v_sub_u32 v84, v13, s31 clamp +// GFX9: v_sub_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x35,0xd1,0x0d,0x3f,0x00,0x00] +// NOVI: error: + +v_subrev_u32 v84, v13, s31 clamp +// GFX9: v_subrev_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x36,0xd1,0x0d,0x3f,0x00,0x00] +// NOVI: error: + +v_addc_co_u32 v84, s[4:5], v13, v31, vcc clamp +// GFX9: v_addc_co_u32_e64 v84, s[4:5], v13, v31, vcc clamp ; encoding: [0x54,0x84,0x1c,0xd1,0x0d,0x3f,0xaa,0x01] +// NOVI: error: + +v_subb_co_u32 v84, s[2:3], v13, v31, vcc clamp +// GFX9: v_subb_co_u32_e64 v84, s[2:3], v13, v31, vcc clamp ; encoding: [0x54,0x82,0x1d,0xd1,0x0d,0x3f,0xaa,0x01] +// NOVI: error: + +v_subbrev_co_u32 v84, vcc, v13, v31, s[6:7] clamp +// GFX9: v_subbrev_co_u32_e64 v84, vcc, v13, v31, s[6:7] clamp ; encoding: [0x54,0xea,0x1e,0xd1,0x0d,0x3f,0x1a,0x00] +// NOVI: error: + +v_add_co_u32 v84, s[4:5], v13, v31 clamp +// GFX9: v_add_co_u32_e64 v84, s[4:5], v13, v31 clamp ; encoding: [0x54,0x84,0x19,0xd1,0x0d,0x3f,0x02,0x00] +// NOVI: error: + +v_sub_co_u32 v84, s[2:3], v13, v31 clamp +// GFX9: v_sub_co_u32_e64 v84, s[2:3], v13, v31 clamp ; encoding: [0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x02,0x00] +// NOVI: error: + +v_subrev_co_u32 v84, vcc, v13, v31 clamp +// GFX9: v_subrev_co_u32_e64 v84, vcc, v13, v31 clamp ; encoding: [0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x02,0x00] +// NOVI: error: + //===----------------------------------------------------------------------===// // Validate register size checks (bug 37943) //===----------------------------------------------------------------------===// @@ -534,3 +570,4 @@ // NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_u16 v0, v0, v[0:1] + Index: llvm/trunk/test/MC/AMDGPU/vop3.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop3.s +++ llvm/trunk/test/MC/AMDGPU/vop3.s @@ -283,6 +283,30 @@ // NOSICI: error: // VI: v_mac_f16_e64 v0, flat_scratch_lo, -4.0 ; encoding: [0x00,0x00,0x23,0xd1,0x66,0xee,0x01,0x00] +v_add_u32 v84, vcc, v13, s31 clamp +// NOSICI: error: +// VI: v_add_u32_e64 v84, vcc, v13, s31 clamp ; encoding: [0x54,0xea,0x19,0xd1,0x0d,0x3f,0x00,0x00] + +v_sub_u32 v84, s[2:3], v13, s31 clamp +// NOSICI: error: +// VI: v_sub_u32_e64 v84, s[2:3], v13, s31 clamp ; encoding: [0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x00,0x00] + +v_subrev_u32 v84, vcc, v13, s31 clamp +// NOSICI: error: +// VI: v_subrev_u32_e64 v84, vcc, v13, s31 clamp ; encoding: [0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x00,0x00] + +v_addc_u32 v84, s[4:5], v13, v31, vcc clamp +// NOSICI: error: +// VI: v_addc_u32_e64 v84, s[4:5], v13, v31, vcc clamp ; encoding: [0x54,0x84,0x1c,0xd1,0x0d,0x3f,0xaa,0x01] + +v_subb_u32 v84, s[2:3], v13, v31, vcc clamp +// NOSICI: error: +// VI: v_subb_u32_e64 v84, s[2:3], v13, v31, vcc clamp ; encoding: [0x54,0x82,0x1d,0xd1,0x0d,0x3f,0xaa,0x01] + +v_subbrev_u32 v84, vcc, v13, v31, s[6:7] clamp +// NOSICI: error: +// VI: v_subbrev_u32_e64 v84, vcc, v13, v31, s[6:7] clamp ; encoding: [0x54,0xea,0x1e,0xd1,0x0d,0x3f,0x1a,0x00] + ///===---------------------------------------------------------------------===// // VOP3 Instructions ///===---------------------------------------------------------------------===// Index: llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt @@ -695,3 +695,31 @@ # GXF9: v_screen_partition_4se_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x77,0xd1,0x01,0x01,0x00,0x00] 0x05,0x00,0x77,0xd1,0x01,0x01,0x00,0x00 + +# GFX9: v_add_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x34,0xd1,0x0d,0x3f,0x00,0x00] +0x54,0x80,0x34,0xd1,0x0d,0x3f,0x00,0x00 + +# GFX9: v_sub_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x35,0xd1,0x0d,0x3f,0x00,0x00] +0x54,0x80,0x35,0xd1,0x0d,0x3f,0x00,0x00 + +# GFX9: v_subrev_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x36,0xd1,0x0d,0x3f,0x00,0x00] +0x54,0x80,0x36,0xd1,0x0d,0x3f,0x00,0x00 + +# GFX9: v_addc_co_u32_e64 v84, s[4:5], v13, v31, vcc clamp ; encoding: [0x54,0x84,0x1c,0xd1,0x0d,0x3f,0xaa,0x01] +0x54,0x84,0x1c,0xd1,0x0d,0x3f,0xaa,0x01 + +# GFX9: v_subb_co_u32_e64 v84, s[2:3], v13, v31, vcc clamp ; encoding: [0x54,0x82,0x1d,0xd1,0x0d,0x3f,0xaa,0x01] +0x54,0x82,0x1d,0xd1,0x0d,0x3f,0xaa,0x01 + +# GFX9: v_subbrev_co_u32_e64 v84, vcc, v13, v31, s[6:7] clamp ; encoding: [0x54,0xea,0x1e,0xd1,0x0d,0x3f,0x1a,0x00] +0x54,0xea,0x1e,0xd1,0x0d,0x3f,0x1a,0x00 + +# GFX9: v_add_co_u32_e64 v84, s[4:5], v13, v31 clamp ; encoding: [0x54,0x84,0x19,0xd1,0x0d,0x3f,0x02,0x00] +0x54,0x84,0x19,0xd1,0x0d,0x3f,0x02,0x00 + +# GFX9: v_sub_co_u32_e64 v84, s[2:3], v13, v31 clamp ; encoding: [0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x02,0x00] +0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x02,0x00 + +# GFX9: v_subrev_co_u32_e64 v84, vcc, v13, v31 clamp ; encoding: [0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x02,0x00] +0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x02,0x00 + Index: llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt @@ -491,3 +491,22 @@ # VI: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_add_u32_e64 v84, vcc, v13, s31 clamp ; encoding: [0x54,0xea,0x19,0xd1,0x0d,0x3f,0x00,0x00] +0x54,0xea,0x19,0xd1,0x0d,0x3f,0x00,0x00 + +# VI: v_sub_u32_e64 v84, s[2:3], v13, s31 clamp ; encoding: [0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x00,0x00] +0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x00,0x00 + +# VI: v_subrev_u32_e64 v84, vcc, v13, s31 clamp ; encoding: [0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x00,0x00] +0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x00,0x00 + +# VI: v_addc_u32_e64 v84, s[4:5], v13, v31, vcc clamp ; encoding: [0x54,0x84,0x1c,0xd1,0x0d,0x3f,0xaa,0x01] +0x54,0x84,0x1c,0xd1,0x0d,0x3f,0xaa,0x01 + +# VI: v_subb_u32_e64 v84, s[2:3], v13, v31, vcc clamp ; encoding: [0x54,0x82,0x1d,0xd1,0x0d,0x3f,0xaa,0x01] +0x54,0x82,0x1d,0xd1,0x0d,0x3f,0xaa,0x01 + +# VI: v_subbrev_u32_e64 v84, vcc, v13, v31, s[6:7] clamp ; encoding: [0x54,0xea,0x1e,0xd1,0x0d,0x3f,0x1a,0x00] +0x54,0xea,0x1e,0xd1,0x0d,0x3f,0x1a,0x00 +