Index: lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP3Instructions.td +++ lib/Target/AMDGPU/VOP3Instructions.td @@ -209,7 +209,10 @@ } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile, int_amdgcn_msad_u8>; + +let Constraints = "@earlyclobber $vdst" in { def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile, int_amdgcn_mqsad_pk_u16_u8>; +} // End Constraints = "@earlyclobber $vdst" def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile, AMDGPUtrig_preop> { let SchedRW = [WriteDouble]; @@ -232,8 +235,10 @@ let SubtargetPredicate = isCIVI in { +let Constraints = "@earlyclobber $vdst" in { def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile, int_amdgcn_qsad_pk_u16_u8>; def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile, int_amdgcn_mqsad_u32_u8>; +} // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; Index: test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll @@ -5,6 +5,7 @@ ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8: ; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-NOT: v_mqsad_pk_u16_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}} define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 store i64 %result, i64 addrspace(1)* %out, align 4 @@ -13,6 +14,7 @@ ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8_non_immediate: ; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-NOT: v_mqsad_pk_u16_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}} define amdgpu_kernel void @v_mqsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 store i64 %result, i64 addrspace(1)* %out, align 4 Index: test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll @@ -5,6 +5,7 @@ ; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_non_inline_constant: ; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-NOT: v_mqsad_u32_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}} define amdgpu_kernel void @v_mqsad_u32_u8_use_non_inline_constant(<4 x i32> addrspace(1)* %out, i64 %src) { %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 100, <4 x i32> ) #0 store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 @@ -13,6 +14,7 @@ ; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate: ; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-NOT: v_mqsad_u32_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}} define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) { %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %b) #0 store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 @@ -21,6 +23,7 @@ ; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate: ; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-NOT: v_mqsad_u32_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}} define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> ) #0 store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 @@ -29,6 +32,7 @@ ; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_fp_immediate: ; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-NOT: v_mqsad_u32_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}} define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> ) #0 store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 @@ -37,6 +41,7 @@ ; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_sgpr_vgpr: ; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-NOT: v_mqsad_u32_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}} define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) { %in = load <4 x i32>, <4 x i32> addrspace(1) * %input Index: test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll @@ -5,6 +5,7 @@ ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8: ; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-NOT: v_qsad_pk_u16_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}} define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 store i64 %result, i64 addrspace(1)* %out, align 4 @@ -13,6 +14,7 @@ ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8_non_immediate: ; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-NOT: v_qsad_pk_u16_u8 v{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], v{{\[}}[[VLO]], v{{\[}}[[VLO]]:[[VHI]]{{\]}} define amdgpu_kernel void @v_qsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 store i64 %result, i64 addrspace(1)* %out, align 4