Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -1129,36 +1129,41 @@ ); } -class getInsDPP { +class getInsDPP { dag ret = !if (!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) - (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + (ins i8imm:$wqm_ctrl, + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl), !if (!eq(NumSrcArgs, 1), !if (!eq(HasModifiers, 1), // VOP1_DPP with modifiers - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + (ins i8imm:$wqm_ctrl, DstRC:$old, Src0Mod:$src0_modifiers, + Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) /* else */, // VOP1_DPP without modifiers - (ins Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, - bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) + (ins i8imm:$wqm_ctrl, DstRC:$old, Src0RC:$src0, + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) /* endif */) /* NumSrcArgs == 2 */, !if (!eq(HasModifiers, 1), // VOP2_DPP with modifiers - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + (ins i8imm:$wqm_ctrl, DstRC:$old, + Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) /* else */, // VOP2_DPP without modifiers - (ins Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl, - row_mask:$row_mask, bank_mask:$bank_mask, - bound_ctrl:$bound_ctrl) + (ins i8imm:$wqm_ctrl, DstRC:$old, + Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl, + row_mask:$row_mask, bank_mask:$bank_mask, + bound_ctrl:$bound_ctrl) /* endif */))); } @@ -1459,7 +1464,7 @@ NumSrcArgs, HasClamp, Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; - field dag InsDPP = getInsDPP.ret; field dag InsSDWA = getInsSDWA; - def : Pat< (i32 (anyext i16:$src)), (COPY $src) Index: lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP2Instructions.td +++ lib/Target/AMDGPU/VOP2Instructions.td @@ -201,7 +201,8 @@ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); let Ins64 = getIns64, 3, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; - let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, + let InsDPP = (ins i8imm:$wqm_ctrl, DstRCDPP:$old, + Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1ModDPP:$src1_modifiers, Src1DPP:$src1, VGPR_32:$src2, // stub argument dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, @@ -274,7 +275,8 @@ dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); - let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0, + let InsDPP = (ins i8imm:$wqm_ctrl, DstRCDPP:$old, + Src0Mod:$src0_modifiers, Src0DPP:$src0, Src1Mod:$src1_modifiers, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); Index: lib/Target/AMDGPU/VOPInstructions.td =================================================================== --- lib/Target/AMDGPU/VOPInstructions.td +++ lib/Target/AMDGPU/VOPInstructions.td @@ -465,6 +465,8 @@ let AssemblerPredicate = !if(P.HasExt, HasDPP, DisableInst); let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP, AMDGPUAsmVariants.Disable); + let Constraints = !if(P.NumSrcArgs, "$old = $vdst", ""); + let DisableEncoding = !if(P.NumSrcArgs, "$old, $wqm_ctrl", "$wqm_ctrl"); let DecoderNamespace = "DPP"; } Index: test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll @@ -5,8 +5,10 @@ ; VI-LABEL: {{^}}dpp_test: ; VI: v_mov_b32_e32 v0, s{{[0-9]+}} +; VI-NOOPT: v_mov_b32_e32 v1, s{{[0-9]+}} ; VI: s_nop 1 -; VI: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11] +; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11] +; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11] define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) { %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0 store i32 %tmp0, i32 addrspace(1)* %out @@ -14,11 +16,14 @@ } ; VI-LABEL: {{^}}dpp_wait_states: +; VI-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}} ; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}} ; VI: s_nop 1 -; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 +; VI-OPT: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 +; VI-NOOPT: v_mov_b32_dpp [[VGPR1]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; VI: s_nop 1 -; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 +; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 +; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) { %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0 %tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) #0