Index: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -8804,6 +8804,7 @@ } int Fi = 0; + int Clamp = 0; for (unsigned E = Operands.size(); I != E; ++I) { auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), MCOI::TIED_TO); @@ -8820,6 +8821,10 @@ continue; } + if (Op.isClampSI()) { + Clamp = Op.getImm(); + continue; + } if (IsDPP8) { if (Op.isDPP8()) { Op.addImmOperands(Inst, 1); @@ -8850,8 +8855,15 @@ if (IsDPP8) { using namespace llvm::AMDGPU::DPP; + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) { + Inst.addOperand(MCOperand::createImm(Clamp)); + } Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); } else { + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyClampSI, Clamp); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2310,8 +2310,12 @@ MovDPP.addDef(Tmp); } + MachineOperand *Src0Mods = + getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); for (unsigned I = 1; I <= 2; ++I) { // old and src operands. const MachineOperand &SrcOp = MI.getOperand(I); + if (I == 2) + MovDPP.addImm(Src0Mods ? Src0Mods->getImm() : 0); assert(!SrcOp.isFPImm()); if (SrcOp.isImm()) { APInt Imm(64, SrcOp.getImm()); Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -211,11 +211,14 @@ defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>; } -def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { +def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped], 1> { let InsVOPDX = (ins Src0RC32:$src0X); let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X); let InsVOPDY = (ins Src0RC32:$src0Y); let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y); + let HasModifiers = 1; + let Src0Mod = FP32InputMods; + let Src0ModVOP3DPP = FPVRegInputMods; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { @@ -1188,7 +1191,7 @@ def : GCNPat < (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, timm:$bound_ctrl)), - (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), + (V_MOV_B32_dpp VGPR_32:$src, SRCMODS.NONE, VGPR_32:$src, (as_i32timm $dpp_ctrl), (as_i32timm $row_mask), (as_i32timm $bank_mask), (as_i1timm $bound_ctrl)) >; @@ -1197,7 +1200,7 @@ (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, timm:$bound_ctrl)), - (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), + (V_MOV_B32_dpp VGPR_32:$old, SRCMODS.NONE, VGPR_32:$src, (as_i32timm $dpp_ctrl), (as_i32timm $row_mask), (as_i32timm $bank_mask), (as_i1timm $bound_ctrl)) >; @@ -1287,7 +1290,7 @@ let OtherPredicates = [isGFX10Only] in { def : GCNPat < (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), - (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, + (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, SRCMODS.NONE, VGPR_32:$src, (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) >; } // End OtherPredicates = [isGFX10Only] @@ -1299,7 +1302,7 @@ let OtherPredicates = [isGFX11Only] in { def : GCNPat < (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), - (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src, + (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, SRCMODS.NONE, VGPR_32:$src, (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) >; } // End OtherPredicates = [isGFX11Only] Index: llvm/test/CodeGen/AMDGPU/dpp64_combine.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/dpp64_combine.mir +++ llvm/test/CodeGen/AMDGPU/dpp64_combine.mir @@ -39,8 +39,8 @@ # DPP64 does not support all control values and must be split to become legal # GCN-LABEL: name: dpp64_illegal_ctrl -# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec -# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec +# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, 0, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec +# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, 0, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec # GCN: %0:vreg_64_align2 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1 # GCN: %3:vreg_64_align2 = V_CEIL_F64_e32 %0, implicit $mode, implicit $exec name: dpp64_illegal_ctrl Index: llvm/test/CodeGen/AMDGPU/dpp_combine.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/dpp_combine.mir +++ llvm/test/CodeGen/AMDGPU/dpp_combine.mir @@ -25,29 +25,29 @@ %2:vgpr_32 = IMPLICIT_DEF ; VOP2 - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec - %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec + %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec - %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec + %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec - %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec ; VOP1 - %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec - %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec + %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec - %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec + %15:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec - %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %17:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec ... @@ -109,29 +109,29 @@ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; VOP2 - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec - %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec + %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec - %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec + %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec - %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec ; VOP1 - %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec - %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec + %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec - %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec + %15:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec - %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %17:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec ... @@ -158,19 +158,19 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 0, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 15, 0, implicit $exec %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec - %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 15, 15, 0, implicit $exec + %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 15, 15, 0, implicit $exec %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 15, 0, implicit $exec + %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 15, 0, implicit $exec %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec ... @@ -196,19 +196,19 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec - %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec + %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec + %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec ... @@ -234,19 +234,19 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 1, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 1, implicit $exec %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec - %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 1, implicit $exec + %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 1, implicit $exec %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 1, implicit $exec + %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 1, implicit $exec %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec ... @@ -268,28 +268,28 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %4:vgpr_32 = V_MUL_U32_U24_e32 %1, %3, implicit $exec %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec %7:vgpr_32 = V_AND_B32_e32 %1, %6, implicit $exec %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec - %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec + %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec %10:vgpr_32 = V_MAX_I32_e32 %1, %9, implicit $exec %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec + %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec %13:vgpr_32 = V_MIN_I32_e32 %1, %12, implicit $exec %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec + %15:vgpr_32 = V_MOV_B32_dpp %14, 0, %0, 1, 14, 15, 0, implicit $exec %16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec ; this cannot be combined because immediate as src0 isn't commutable %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec + %18:vgpr_32 = V_MOV_B32_dpp %17, 0, %0, 1, 14, 15, 0, implicit $exec %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec ... @@ -297,7 +297,7 @@ # check for floating point modifiers # GCN-LABEL: name: add_f32_e64 -# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec # GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec # GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec # GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec @@ -314,19 +314,19 @@ %2:vgpr_32 = IMPLICIT_DEF ; this shouldn't be combined as omod is set - %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec ; this should be combined as all modifiers are default - %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec + %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec ; this should be combined as modifiers other than abs|neg are default - %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec + %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec ; this shouldn't be combined as modifiers aren't abs|neg - %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec + %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec ... @@ -346,11 +346,11 @@ %2:vgpr_32 = IMPLICIT_DEF ; this should be combined as all modifiers are default - %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec ; this shouldn't be combined as clamp is set - %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec + %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec ... @@ -368,7 +368,7 @@ %2:vgpr_32 = IMPLICIT_DEF ; this shouldn't be combined as the carry-out is used - %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec S_NOP 0, implicit %5 @@ -380,7 +380,7 @@ # GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec # broken sequence: -# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec +# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec name: dpp_seq tracksRegLiveness: true @@ -391,12 +391,12 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec - %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec ; this breaks the sequence %9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec @@ -418,7 +418,7 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec @@ -450,7 +450,7 @@ S_BRANCH %bb.1 bb.1: - %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 1, 1, 0, implicit $exec %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec ... @@ -471,13 +471,13 @@ S_BRANCH %bb.1 bb.1: - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec ... # EXEC mask changed between def and use - cannot combine # GCN-LABEL: name: exec_changed -# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec name: exec_changed tracksRegLiveness: true @@ -488,7 +488,7 @@ %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec %5:sreg_64 = COPY $exec, implicit-def $exec %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec @@ -511,7 +511,7 @@ %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4 - %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, 0, %1, 1, 1, 1, 0, implicit $exec %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec ... @@ -528,7 +528,7 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted - %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec + %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 1, 1, 0, implicit $exec %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec ... @@ -545,7 +545,7 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef - %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec + %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 15, 15, 1, implicit $exec %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec ... @@ -557,7 +557,7 @@ body: | bb.0: %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec ... @@ -569,7 +569,7 @@ body: | bb.0: %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec ... @@ -580,43 +580,43 @@ tracksRegLiveness: true body: | bb.0: - %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec ... # Do not combine a dpp mov which writes a physreg. # GCN-LABEL: name: phys_dpp_mov_dst -# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec +# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec # GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec name: phys_dpp_mov_dst tracksRegLiveness: true body: | bb.0: - $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec ... # Do not combine a dpp mov which reads a physreg. # GCN-LABEL: name: phys_dpp_mov_old_src -# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec +# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec name: phys_dpp_mov_old_src tracksRegLiveness: true body: | bb.0: - %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec ... # Do not combine a dpp mov which reads a physreg. # GCN-LABEL: name: phys_dpp_mov_src -# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec +# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec name: phys_dpp_mov_src tracksRegLiveness: true body: | bb.0: - %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec + %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec ... @@ -637,8 +637,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec @@ -649,7 +649,7 @@ # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec # GCN: %8:vgpr_32 = IMPLICIT_DEF -# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec +# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1 # GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec @@ -662,8 +662,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec @@ -673,7 +673,7 @@ # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec -# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec # GCN: %8:vgpr_32 = IMPLICIT_DEF # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1 # GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec @@ -687,8 +687,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec @@ -698,8 +698,8 @@ # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec -# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec -# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec +# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 # GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec @@ -712,8 +712,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec @@ -723,8 +723,8 @@ # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec -# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec -# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec +# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 # GCN: S_BRANCH %bb.1 # GCN: bb.1: @@ -739,8 +739,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 S_BRANCH %bb.1 @@ -753,8 +753,8 @@ # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec -# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec -# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec +# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 # GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1 # GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec @@ -768,8 +768,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1 %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec @@ -803,7 +803,7 @@ ... # GCN-LABEL: name: dpp64_add64_first_combined -# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec +# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, 0, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec # GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1 # GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec # GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec @@ -827,7 +827,7 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = IMPLICIT_DEF - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:sreg_64_xexec = IMPLICIT_DEF %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec ... @@ -847,7 +847,7 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = IMPLICIT_DEF - %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec S_ENDPGM 0, implicit %4 @@ -862,7 +862,7 @@ liveins: $vgpr0, $vgpr1 %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 - %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0, 0, %1, 1, 15, 15, 1, implicit $exec %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec ... @@ -876,8 +876,8 @@ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec @@ -903,7 +903,7 @@ ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %5, %bb.2 - ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[PHI]], 323, 15, 15, 0, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[PHI]], 323, 15, 15, 0, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) @@ -925,7 +925,7 @@ bb.1: %4:vgpr_32 = PHI %1, %bb.0, %5, %bb.2 - %5:vgpr_32 = V_MOV_B32_dpp %1, %4, 323, 15, 15, 0, implicit $exec + %5:vgpr_32 = V_MOV_B32_dpp %1, 0, %4, 323, 15, 15, 0, implicit $exec bb.2: %6:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %3, implicit $exec Index: llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir +++ llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir @@ -17,7 +17,7 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = COPY $vgpr2 %3:vgpr_32 = IMPLICIT_DEF - %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec + %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %0, 1, 15, 15, 1, implicit $exec %5:sreg_32_xm0_xexec = IMPLICIT_DEF %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64 %4, %1, %5, 1, implicit $exec @@ -25,11 +25,11 @@ %8:vgpr_32 = V_CVT_PK_U8_F32_e64 4, %4, 2, %2, 2, %1, 1, implicit $mode, implicit $exec ; should not be combined because src2 literal is illegal - %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %9:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec ; should not be combined because src1 imm is illegal - %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %11:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec ... @@ -47,9 +47,9 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = COPY $vgpr2 %3:vgpr_32 = IMPLICIT_DEF - %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec - %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %5, 1, 15, 15, 1, implicit $exec %7:vgpr_32 = V_ADD_NC_U16_e64 4, %6, 8, %5, 0, 0, implicit $exec ... @@ -65,9 +65,9 @@ ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec + ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec ; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec - ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec + ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec ; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec ; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 13, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec ; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec @@ -79,23 +79,23 @@ %3:vgpr_32 = IMPLICIT_DEF ; this should not be combined because op_sel is not zero - %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %5:vgpr_32 = V_DOT2_F32_F16 0, %4, 0, %0, 0, %2, 0, 5, 0, 0, 0, implicit $mode, implicit $exec ; this should not be combined because op_sel_hi is not all set - %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec - %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %8:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 13, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec - %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %10:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec - %12:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %12:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %13:vgpr_32 = V_FMA_MIXLO_F16 8, %12, 8, %0, 8, %2, 0, %2, 0, 7, implicit $mode, implicit $exec - %14:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %14:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %15:vgpr_32 = V_FMA_MIXHI_F16 8, %14, 8, %0, 8, %2, 1, %0, 0, 7, implicit $mode, implicit $exec ... @@ -112,7 +112,7 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = COPY $vgpr2 %3:vgpr_32 = IMPLICIT_DEF - %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec + %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %0, 1, 15, 15, 1, implicit $exec %6:vgpr_32 = V_FMAC_F32_e64 2, %4, 2, %1, 2, %2, 1, 2, implicit $mode, implicit $exec ... @@ -133,23 +133,23 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 0, implicit $exec %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec %7:vgpr_32 = V_AND_B32_e64 %1, %6, implicit $exec %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec - %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec + %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec %10:vgpr_32 = V_MAX_I32_e64 %1, %9, implicit $exec %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec + %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec %13:vgpr_32 = V_MIN_I32_e64 %1, %12, implicit $exec %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec + %15:vgpr_32 = V_MOV_B32_dpp %14, 0, %0, 1, 14, 15, 0, implicit $exec %16:vgpr_32 = V_SUB_U32_e64 %1, %15, 0, implicit $exec ... @@ -169,13 +169,13 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = IMPLICIT_DEF - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec - %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec - %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec ... @@ -197,24 +197,24 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 1, implicit $exec %5:vgpr_32 = IMPLICIT_DEF - %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 1, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 15, 1, implicit $exec %7:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %6, 2, %1, 1, 2, implicit $mode, implicit $exec %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec + %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec %10:vgpr_32 = V_SUB_U32_e64 %1, %9, 1, implicit $exec %11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 14, 15, 0, implicit $exec + %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 14, 15, 0, implicit $exec %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1, %12, 0, implicit $exec ; this cannot be combined because immediate as src0 isn't commutable %15:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %16:vgpr_32 = V_MOV_B32_dpp %15, %0, 1, 14, 15, 0, implicit $exec + %16:vgpr_32 = V_MOV_B32_dpp %15, 0, %0, 1, 14, 15, 0, implicit $exec %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec ... @@ -238,19 +238,19 @@ %2:vgpr_32 = IMPLICIT_DEF ; this should be combined as e64 - %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec ; this should be combined and shrunk as all modifiers are default - %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec + %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec ; this should be combined and shrunk as modifiers other than abs|neg are default - %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec + %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec ; this should be combined as e64 - %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec + %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec ... @@ -270,11 +270,11 @@ %2:vgpr_32 = IMPLICIT_DEF ; this should be combined and shrunk as all modifiers are default - %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec ; this should be combined as _e64 - %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec + %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec ... @@ -284,7 +284,7 @@ # GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec # broken sequence: -# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec +# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec name: dpp_seq tracksRegLiveness: true @@ -295,12 +295,12 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec - %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec ; this breaks the sequence %9:vgpr_32 = V_SUB_U32_e32 5, %7, implicit $exec @@ -322,7 +322,7 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec @@ -354,7 +354,7 @@ S_BRANCH %bb.1 bb.1: - %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 1, 1, 0, implicit $exec %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec ... @@ -375,13 +375,13 @@ S_BRANCH %bb.1 bb.1: - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec ... # EXEC mask changed between def and use - cannot combine # GCN-LABEL: name: exec_changed -# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec name: exec_changed tracksRegLiveness: true @@ -392,7 +392,7 @@ %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec %5:sreg_64 = COPY $exec, implicit-def $exec %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec @@ -415,7 +415,7 @@ %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4 - %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, 0, %1, 1, 1, 1, 0, implicit $exec %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec ... @@ -432,7 +432,7 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted - %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec + %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 1, 1, 0, implicit $exec %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec ... @@ -449,7 +449,7 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef - %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec + %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 15, 15, 1, implicit $exec %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec ... @@ -461,7 +461,7 @@ body: | bb.0: %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec ... @@ -473,7 +473,7 @@ body: | bb.0: %1:vgpr_32 = IMPLICIT_DEF - %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec ... @@ -484,43 +484,43 @@ tracksRegLiveness: true body: | bb.0: - %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec ... # Do not combine a dpp mov which writes a physreg. # GCN-LABEL: name: phys_dpp_mov_dst -# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec +# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec # GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec name: phys_dpp_mov_dst tracksRegLiveness: true body: | bb.0: - $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec ... # Do not combine a dpp mov which reads a physreg. # GCN-LABEL: name: phys_dpp_mov_old_src -# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec +# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec name: phys_dpp_mov_old_src tracksRegLiveness: true body: | bb.0: - %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec ... # Do not combine a dpp mov which reads a physreg. # GCN-LABEL: name: phys_dpp_mov_src -# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec +# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec name: phys_dpp_mov_src tracksRegLiveness: true body: | bb.0: - %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec + %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec ... @@ -541,8 +541,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec @@ -553,7 +553,7 @@ # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec # GCN: %8:vgpr_32 = IMPLICIT_DEF -# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec +# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1 # GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec @@ -566,8 +566,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec @@ -577,7 +577,7 @@ # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec -# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec # GCN: %8:vgpr_32 = IMPLICIT_DEF # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1 # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec @@ -591,8 +591,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec @@ -602,8 +602,8 @@ # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec -# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec -# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec +# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec @@ -616,8 +616,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec @@ -627,8 +627,8 @@ # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec -# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec -# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec +# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 # GCN: S_BRANCH %bb.1 # GCN: bb.1: @@ -643,8 +643,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 S_BRANCH %bb.1 @@ -657,8 +657,8 @@ # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec -# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec -# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec +# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 # GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1 # GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec @@ -672,8 +672,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1 %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %8, implicit $exec @@ -681,8 +681,8 @@ ... # GCN-LABEL: name: dpp_reg_sequence_src2_reject -#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec -#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec +#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec +#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec #GCN: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 #GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec #GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec @@ -695,8 +695,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec ; use of dpp arg as src2, reject @@ -706,7 +706,7 @@ ... # GCN-LABEL: name: dpp_reg_sequence_src2 -#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec +#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec #GCN: %4:vreg_64 = REG_SEQUENCE undef %2:vgpr_32, %subreg.sub0, %3, %subreg.sub1 #GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec #GCN: %6:vgpr_32 = V_FMA_F32_e64_dpp %8, 2, %1.sub0, 2, %5, 2, %4.sub1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec @@ -718,8 +718,8 @@ %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec @@ -764,12 +764,12 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = IMPLICIT_DEF - %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec %4:sreg_32_xm0_xexec = IMPLICIT_DEF %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec ; src2 is legal for _e64 - %6:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 15, 1, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 15, 15, 1, implicit $exec %7:sreg_32_xm0_xexec = IMPLICIT_DEF %8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec ... @@ -789,7 +789,7 @@ %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = IMPLICIT_DEF - %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec S_ENDPGM 0, implicit %4 @@ -804,7 +804,7 @@ liveins: $vgpr0, $vgpr1 %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 - %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0, 0, %1, 1, 15, 15, 1, implicit $exec %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec ... @@ -818,8 +818,8 @@ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 - %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec Index: llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -419,11 +419,11 @@ body: | bb.0: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - $vgpr1 = V_MOV_B32_dpp $vgpr1, $vgpr0, 0, 15, 15, 0, implicit $exec + $vgpr1 = V_MOV_B32_dpp $vgpr1, 0, $vgpr0, 0, 15, 15, 0, implicit $exec S_BRANCH %bb.1 bb.1: implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec - $vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec + $vgpr3 = V_MOV_B32_dpp $vgpr3, 0, $vgpr0, 0, 15, 15, 0, implicit $exec S_ENDPGM 0 ... Index: llvm/test/CodeGen/AMDGPU/remat-vop.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/remat-vop.mir +++ llvm/test/CodeGen/AMDGPU/remat-vop.mir @@ -82,16 +82,16 @@ body: | bb.0: ; GCN-LABEL: name: test_remat_v_mov_b32_e64 - ; GCN: renamable $vgpr0 = V_MOV_B32_e64 1, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec + ; GCN: renamable $vgpr0 = V_MOV_B32_e64 0, 1, 0, implicit $exec + ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 0, 2, 0, implicit $exec ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec + ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 0, 3, 0, implicit $exec ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 ; GCN-NEXT: S_ENDPGM 0 - %0:vgpr_32 = V_MOV_B32_e64 1, implicit $exec - %1:vgpr_32 = V_MOV_B32_e64 2, implicit $exec - %2:vgpr_32 = V_MOV_B32_e64 3, implicit $exec + %0:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e64 0, 2, 0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e64 0, 3, 0, implicit $exec S_NOP 0, implicit %0 S_NOP 0, implicit %1 S_NOP 0, implicit %2 @@ -105,10 +105,10 @@ body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_mov_b32_dpp - ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec + ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 @@ -116,9 +116,9 @@ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 ; GCN-NEXT: S_ENDPGM 0 - %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec - %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec S_NOP 0, implicit %1 S_NOP 0, implicit %2 S_NOP 0, implicit %3 Index: llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir +++ llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir @@ -100,7 +100,7 @@ %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec %25 = V_LSHRREV_B32_e64 16, %3, implicit $exec - %26 = V_MOV_B32_e64 %25, implicit $exec + %26 = V_MOV_B32_e64 0, %25, 0, implicit $exec %26 = V_LSHLREV_B32_e64 16, %26, implicit $exec %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec Index: llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir +++ llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir @@ -109,7 +109,7 @@ %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec %25 = V_LSHRREV_B32_e64 16, %3, implicit $exec - %26 = V_MOV_B32_e64 %25, implicit $exec + %26 = V_MOV_B32_e64 0, %25, 0, implicit $exec %26 = V_LSHLREV_B32_e64 16, %26, implicit $exec %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec Index: llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir +++ llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir @@ -54,7 +54,7 @@ %18 = V_LSHRREV_B32_e64 16, %17, implicit $exec %19 = V_READLANE_B32 killed %18, 0, implicit-def $vcc, implicit $exec - %20 = V_MOV_B32_e64 %19, implicit $exec + %20 = V_MOV_B32_e64 0, %19, 0, implicit $exec FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) $sgpr30_sgpr31 = COPY %2 Index: llvm/test/CodeGen/AMDGPU/vopc_dpp.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/vopc_dpp.mir +++ llvm/test/CodeGen/AMDGPU/vopc_dpp.mir @@ -17,15 +17,15 @@ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec ; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec ; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec ; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec ; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec ; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec ; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec ; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc @@ -35,41 +35,41 @@ %2:vgpr_32 = COPY $vgpr2 %3:vgpr_32 = IMPLICIT_DEF - %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec V_CMP_LT_F32_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec ; unsafe to combine cmpx - %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %5:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec V_CMPX_EQ_I16_t16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec - %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec - %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %8:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec ; unsafe to combine cmpx - %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %10:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec V_CMPX_GT_U32_nosdst_e64 %10, %0, implicit-def $exec, implicit $mode, implicit $exec - %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %11:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec ; shrink - %13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %13:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %14:sgpr_32 = V_CMP_NGE_F32_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec ; do not shrink True16 instructions - %15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %15:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec ; do not shrink, sdst used - %17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %17:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec %18:sgpr_32 = V_CMP_NGE_F32_e64 0, %17, 0, %0, 0, implicit $mode, implicit $exec %19:sgpr_32 = S_AND_B32 %18, 10101, implicit-def $scc ; commute - %20:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %20:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec V_CMP_LT_I32_e32 %0, %20, implicit-def $vcc, implicit $exec ... @@ -88,9 +88,9 @@ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 14, 1, implicit $exec ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[COPY1]], 1, 13, 15, 1, implicit $exec ; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 @@ -99,10 +99,10 @@ ; Do not combine VOPC when row_mask or bank_mask is not 0xf ; All cases are covered by generic rules for creating DPP instructions - %4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec + %4:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 15, 14, 1, implicit $exec %99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec - %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec + %5:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 13, 15, 1, implicit $exec %6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec ... Index: llvm/test/CodeGen/AMDGPU/wqm.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/wqm.mir +++ llvm/test/CodeGen/AMDGPU/wqm.mir @@ -178,7 +178,7 @@ %11:vgpr_32 = COPY %16 %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc %14:vgpr_32 = COPY %7 - %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec + %13:vgpr_32 = V_MOV_B32_dpp %14, 0, killed %10, 323, 12, 15, 0, implicit $exec early-clobber %15:vgpr_32 = STRICT_WWM killed %13, implicit $exec BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, implicit $exec S_ENDPGM 0 @@ -210,7 +210,7 @@ %8:sreg_64 = COPY $exec %9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec - %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec + %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, 0, %10:vgpr_32, 312, 15, 15, 0, implicit $exec %12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63 early-clobber %13:sreg_32 = STRICT_WWM %9:vgpr_32, implicit $exec Index: llvm/test/MC/AMDGPU/gfx10_asm_vop1.s =================================================================== --- llvm/test/MC/AMDGPU/gfx10_asm_vop1.s +++ llvm/test/MC/AMDGPU/gfx10_asm_vop1.s @@ -265,6 +265,18 @@ v_mov_b32_dpp v5, v1 row_xmask:15 row_mask:0x0 bank_mask:0x0 // GFX10: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x6f,0x01,0x00] +v_mov_b32_e64 v5, -v2 +// GFX10: encoding: [0x05,0x00,0x81,0xd5,0x02,0x01,0x00,0x20] + +v_mov_b32_e64 v5, |v2| +// GFX10: encoding: [0x05,0x01,0x81,0xd5,0x02,0x01,0x00,0x00] + +v_mov_b32_e64 v5, -v2 clamp +// GFX10: encoding: [0x05,0x80,0x81,0xd5,0x02,0x01,0x00,0x20] + +v_mov_b32_e64 v5, |v2| clamp +// GFX10: encoding: [0x05,0x81,0x81,0xd5,0x02,0x01,0x00,0x00] + v_readfirstlane_b32 s5, v1 // GFX10: encoding: [0x01,0x05,0x0a,0x7e] Index: llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s =================================================================== --- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s +++ llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s @@ -2142,6 +2142,18 @@ v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +v_mov_b32_e64_dpp v5, -v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x02,0x1b,0x00,0xff] + +v_mov_b32_e64_dpp v5, |v2| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x02,0x1b,0x00,0xff] + +v_mov_b32_e64_dpp v5, -v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x80,0x81,0xd5,0xfa,0x00,0x00,0x20,0x02,0x1b,0x00,0xff] + +v_mov_b32_e64_dpp v5, |v2| clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x81,0x81,0xd5,0xfa,0x00,0x00,0x00,0x02,0x1b,0x00,0xff] + v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] Index: llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s =================================================================== --- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s +++ llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s @@ -534,6 +534,18 @@ v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +v_mov_b32_e64_dpp v5, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x02,0x77,0x39,0x05] + +v_mov_b32_e64_dpp v5, |v2| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +v_mov_b32_e64_dpp v5, -v2 clamp dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x80,0x81,0xd5,0xe9,0x00,0x00,0x20,0x02,0x77,0x39,0x05] + +v_mov_b32_e64_dpp v5, |v2| clamp dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x81,0x81,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] Index: llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s =================================================================== --- llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s +++ llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s @@ -2601,6 +2601,18 @@ v_movrelsd_b32_e64 v255, v255 // GFX11: encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] +v_mov_b32_e64 v5, -v2 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x02,0x01,0x00,0x20] + +v_mov_b32_e64 v5, |v2| +// GFX11: encoding: [0x05,0x01,0x81,0xd5,0x02,0x01,0x00,0x00] + +v_mov_b32_e64 v5, -v2 clamp +// GFX11: encoding: [0x05,0x80,0x81,0xd5,0x02,0x01,0x00,0x20] + +v_mov_b32_e64 v5, |v2| clamp +// GFX11: encoding: [0x05,0x81,0x81,0xd5,0x02,0x01,0x00,0x00] + v_nop_e64 // GFX11: encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00] Index: llvm/test/MC/AMDGPU/gfx7_asm_vop3.s =================================================================== --- llvm/test/MC/AMDGPU/gfx7_asm_vop3.s +++ llvm/test/MC/AMDGPU/gfx7_asm_vop3.s @@ -78,6 +78,12 @@ v_mov_b32_e64 v5, src_lds_direct // CHECK: [0x05,0x00,0x02,0xd3,0xfe,0x00,0x00,0x00] +v_mov_b32_e64 v5, -v2 +// CHECK: [0x05,0x00,0x02,0xd3,0x02,0x01,0x00,0x20] + +v_mov_b32_e64 v5, |v2| +// CHECK: [0x05,0x01,0x02,0xd3,0x02,0x01,0x00,0x00] + v_cvt_i32_f64_e64 v5, v[1:2] // CHECK: [0x05,0x00,0x06,0xd3,0x01,0x01,0x00,0x00] Index: llvm/test/MC/AMDGPU/gfx8_asm_vop3.s =================================================================== --- llvm/test/MC/AMDGPU/gfx8_asm_vop3.s +++ llvm/test/MC/AMDGPU/gfx8_asm_vop3.s @@ -210,6 +210,18 @@ v_mov_b32_e64 v5, src_lds_direct // CHECK: [0x05,0x00,0x41,0xd1,0xfe,0x00,0x00,0x00] +v_mov_b32_e64 v5, -v2 +// CHECK: [0x05,0x00,0x41,0xd1,0x02,0x01,0x00,0x20] + +v_mov_b32_e64 v5, |v2| +// CHECK: [0x05,0x01,0x41,0xd1,0x02,0x01,0x00,0x00] + +v_mov_b32_e64 v5, -v2 clamp +// CHECK: [0x05,0x80,0x41,0xd1,0x02,0x01,0x00,0x20] + +v_mov_b32_e64 v5, |v2| clamp +// CHECK: [0x05,0x81,0x41,0xd1,0x02,0x01,0x00,0x00] + v_cvt_i32_f64_e64 v5, v[1:2] // CHECK: [0x05,0x00,0x43,0xd1,0x01,0x01,0x00,0x00] Index: llvm/test/MC/AMDGPU/gfx9_asm_vop3.s =================================================================== --- llvm/test/MC/AMDGPU/gfx9_asm_vop3.s +++ llvm/test/MC/AMDGPU/gfx9_asm_vop3.s @@ -198,6 +198,18 @@ v_mov_b32_e64 v5, src_lds_direct // CHECK: [0x05,0x00,0x41,0xd1,0xfe,0x00,0x00,0x00] +v_mov_b32_e64 v5, -v2 +// CHECK: [0x05,0x00,0x41,0xd1,0x02,0x01,0x00,0x20] + +v_mov_b32_e64 v5, |v2| +// CHECK: [0x05,0x01,0x41,0xd1,0x02,0x01,0x00,0x00] + +v_mov_b32_e64 v5, -v2 clamp +// CHECK: [0x05,0x80,0x41,0xd1,0x02,0x01,0x00,0x20] + +v_mov_b32_e64 v5, |v2| clamp +// CHECK: [0x05,0x81,0x41,0xd1,0x02,0x01,0x00,0x00] + v_cvt_i32_f64_e64 v5, v[1:2] // CHECK: [0x05,0x00,0x43,0xd1,0x01,0x01,0x00,0x00] Index: llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt @@ -14055,6 +14055,18 @@ # GFX10: v_movrelsd_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] 0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00 +# GFX10: v_mov_b32_e64 v5, -v2 ; encoding: [0x05,0x00,0x81,0xd5,0x02,0x01,0x00,0x20] +0x05,0x00,0x81,0xd5,0x02,0x01,0x00,0x20 + +# GFX10: v_mov_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0x81,0xd5,0x02,0x01,0x00,0x00] +0x05,0x01,0x81,0xd5,0x02,0x01,0x00,0x00 + +# GFX10: v_mov_b32_e64 v5, -v2 clamp ; encoding: [0x05,0x80,0x81,0xd5,0x02,0x01,0x00,0x20] +0x05,0x80,0x81,0xd5,0x02,0x01,0x00,0x20 + +# GFX10: v_mov_b32_e64 v5, |v2| clamp ; encoding: [0x05,0x81,0x81,0xd5,0x02,0x01,0x00,0x00] +0x05,0x81,0x81,0xd5,0x02,0x01,0x00,0x00 + # GFX10: v_mqsad_pk_u16_u8 v[254:255], v[1:2], v2, v[3:4] ; encoding: [0xfe,0x00,0x73,0xd5,0x01,0x05,0x0e,0x04] 0xfe,0x00,0x73,0xd5,0x01,0x05,0x0e,0x04 Index: llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt @@ -1932,6 +1932,18 @@ # GFX11: v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_mov_b32_e64_dpp v5, -v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x02,0x1b,0x00,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x02,0x1b,0x00,0xff + +# GFX11: v_mov_b32_e64_dpp v5, |v2| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x02,0x1b,0x00,0xff] +0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x02,0x1b,0x00,0xff + +# GFX11: v_mov_b32_e64_dpp v5, -v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x81,0xd5,0xfa,0x00,0x00,0x20,0x02,0x1b,0x00,0xff] +0x05,0x80,0x81,0xd5,0xfa,0x00,0x00,0x20,0x02,0x1b,0x00,0xff + +# GFX11: v_mov_b32_e64_dpp v5, |v2| clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x81,0x81,0xd5,0xfa,0x00,0x00,0x00,0x02,0x1b,0x00,0xff] +0x05,0x81,0x81,0xd5,0xfa,0x00,0x00,0x00,0x02,0x1b,0x00,0xff + # GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff Index: llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt @@ -426,6 +426,18 @@ # GFX11: v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 +# GFX11: v_mov_b32_e64_dpp v5, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x02,0x77,0x39,0x05] +0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x02,0x77,0x39,0x05 + +# GFX11: v_mov_b32_e64_dpp v5, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 + +# GFX11: v_mov_b32_e64_dpp v5, -v2 clamp dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x80,0x81,0xd5,0xe9,0x00,0x00,0x20,0x02,0x77,0x39,0x05] +0x05,0x80,0x81,0xd5,0xe9,0x00,0x00,0x20,0x02,0x77,0x39,0x05 + +# GFX11: v_mov_b32_e64_dpp v5, |v2| clamp dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x81,0x81,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +0x05,0x81,0x81,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 + # GFX11: v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 Index: llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt @@ -2376,6 +2376,18 @@ # GFX11: v_movrelsd_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] 0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_mov_b32_e64 v5, -v2 ; encoding: [0x05,0x00,0x81,0xd5,0x02,0x01,0x00,0x20] +0x05,0x00,0x81,0xd5,0x02,0x01,0x00,0x20 + +# GFX11: v_mov_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0x81,0xd5,0x02,0x01,0x00,0x00] +0x05,0x01,0x81,0xd5,0x02,0x01,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, -v2 clamp ; encoding: [0x05,0x80,0x81,0xd5,0x02,0x01,0x00,0x20] +0x05,0x80,0x81,0xd5,0x02,0x01,0x00,0x20 + +# GFX11: v_mov_b32_e64 v5, |v2| clamp ; encoding: [0x05,0x81,0x81,0xd5,0x02,0x01,0x00,0x00] +0x05,0x81,0x81,0xd5,0x02,0x01,0x00,0x00 + # GFX11: v_nop ; encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00] 0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00 Index: llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt @@ -198,6 +198,18 @@ # CHECK: v_mov_b32_e64 v5, -4.0 ; encoding: [0x05,0x00,0x41,0xd1,0xf7,0x00,0x00,0x00] 0x05,0x00,0x41,0xd1,0xf7,0x00,0x00,0x00 +# CHECK: v_mov_b32_e64 v5, -v2 ; encoding: [0x05,0x00,0x41,0xd1,0x02,0x01,0x00,0x20] +0x05,0x00,0x41,0xd1,0x02,0x01,0x00,0x20 + +# CHECK: v_mov_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0x41,0xd1,0x02,0x01,0x00,0x00] +0x05,0x01,0x41,0xd1,0x02,0x01,0x00,0x00 + +# CHECK: v_mov_b32_e64 v5, -v2 clamp ; encoding: [0x05,0x80,0x41,0xd1,0x02,0x01,0x00,0x20] +0x05,0x80,0x41,0xd1,0x02,0x01,0x00,0x20 + +# CHECK: v_mov_b32_e64 v5, |v2| clamp ; encoding: [0x05,0x81,0x41,0xd1,0x02,0x01,0x00,0x00] +0x05,0x81,0x41,0xd1,0x02,0x01,0x00,0x00 + # CHECK: v_cvt_i32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x43,0xd1,0x01,0x01,0x00,0x00] 0x05,0x00,0x43,0xd1,0x01,0x01,0x00,0x00 Index: llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt @@ -183,6 +183,18 @@ # CHECK: v_mov_b32_e64 v5, -4.0 ; encoding: [0x05,0x00,0x41,0xd1,0xf7,0x00,0x00,0x00] 0x05,0x00,0x41,0xd1,0xf7,0x00,0x00,0x00 +# CHECK: v_mov_b32_e64 v5, -v2 ; encoding: [0x05,0x00,0x41,0xd1,0x02,0x01,0x00,0x20] +0x05,0x00,0x41,0xd1,0x02,0x01,0x00,0x20 + +# CHECK: v_mov_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0x41,0xd1,0x02,0x01,0x00,0x00] +0x05,0x01,0x41,0xd1,0x02,0x01,0x00,0x00 + +# CHECK: v_mov_b32_e64 v5, -v2 clamp ; encoding: [0x05,0x80,0x41,0xd1,0x02,0x01,0x00,0x20] +0x05,0x80,0x41,0xd1,0x02,0x01,0x00,0x20 + +# CHECK: v_mov_b32_e64 v5, |v2| clamp ; encoding: [0x05,0x81,0x41,0xd1,0x02,0x01,0x00,0x00] +0x05,0x81,0x41,0xd1,0x02,0x01,0x00,0x00 + # CHECK: v_cvt_i32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x43,0xd1,0x01,0x01,0x00,0x00] 0x05,0x00,0x43,0xd1,0x01,0x01,0x00,0x00