Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -407,6 +407,41 @@ GCCBuiltin<"__builtin_amdgcn_lerp">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +// llvm.amdgcn.sad.u8 +def int_amdgcn_sad_u8 : + GCCBuiltin<"__builtin_amdgcn_sad_u8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +// llvm.amdgcn.msad.u8 +def int_amdgcn_msad_u8 : + GCCBuiltin<"__builtin_amdgcn_msad_u8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +// llvm.amdgcn.sad.hi.u8 +def int_amdgcn_sad_hi_u8 : + GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +// llvm.amdgcn.sad.u16 +def int_amdgcn_sad_u16 : + GCCBuiltin<"__builtin_amdgcn_sad_u16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +// llvm.amdgcn.qsad.pk.u16.u8 +def int_amdgcn_qsad_pk_u16_u8 : + GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +// llvm.amdgcn.mqsad.pk.u16.u8 +def int_amdgcn_mqsad_pk_u16_u8 : + GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +// llvm.amdgcn.mqsadu32.u8 +def int_amdgcn_mqsad_u32_u8 : + GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_amdgcn_icmp : Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; Index: lib/Target/AMDGPU/CIInstructions.td =================================================================== --- lib/Target/AMDGPU/CIInstructions.td +++ lib/Target/AMDGPU/CIInstructions.td @@ -59,15 +59,15 @@ // VOP3 Instructions //===----------------------------------------------------------------------===// -defm V_QSAD_PK_U16_U8 : VOP3Inst , "v_qsad_pk_u16_u8", - VOP_I32_I32_I32 ->; defm V_MQSAD_U16_U8 : VOP3Inst , "v_mqsad_u16_u8", VOP_I32_I32_I32 >; -defm V_MQSAD_U32_U8 : VOP3Inst , "v_mqsad_u32_u8", - VOP_I32_I32_I32 ->; + +defm V_QSAD_PK_U16_U8 : VOP3Inst , "v_qsad_pk_u16_u8", + VOP_I32_I32_I32_I32, int_amdgcn_qsad_pk_u16_u8>; + +defm V_MQSAD_U32_U8 : VOP3Inst , "v_mqsad_u32_u8", + VOP_I32_I32_I32_I32, int_amdgcn_mqsad_u32_u8>; let isCommutable = 1 in { defm V_MAD_U64_U32 : VOP3Inst , "v_mad_u64_u32", Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1760,9 +1760,15 @@ VOP_I32_I32_I32_I32, AMDGPUumed3 >; -//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>; -//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>; -//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>; +defm V_SAD_U8 : VOP3Inst , "v_sad_u8", + VOP_I32_I32_I32_I32, int_amdgcn_sad_u8>; + +defm V_SAD_HI_U8 : VOP3Inst , "v_sad_hi_u8", + VOP_I32_I32_I32_I32, int_amdgcn_sad_hi_u8>; + +defm V_SAD_U16 : VOP3Inst , "v_sad_u16", + VOP_I32_I32_I32_I32, int_amdgcn_sad_u16>; + defm V_SAD_U32 : VOP3Inst , "v_sad_u32", VOP_I32_I32_I32_I32 >; @@ -1864,8 +1870,12 @@ } // End SchedRW = [WriteDouble] } // End isCommutable = 1, Uses = [VCC, EXEC] -//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>; -//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>; +defm V_MSAD_U8 : VOP3Inst , "v_msad_u8", + VOP_I32_I32_I32_I32, int_amdgcn_msad_u8>; + +defm V_MQSAD_PK_U16_U8 : VOP3Inst , "v_mqsad_pk_u16_u8", + VOP_I32_I32_I32_I32, int_amdgcn_mqsad_pk_u16_u8>; + //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>; let SchedRW = [WriteDouble] in { Index: test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.mqsad.pk.u16.u8(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8: +; GCN: v_mqsad_pk_u16_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_mqsad_pk_u16_u8(i32 addrspace(1)* %out, i32 %src) nounwind { + %result= call i32 @llvm.amdgcn.mqsad.pk.u16.u8(i32 %src, i32 100, i32 100) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.mqsad.u32.u8(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_mqsad_u32_u8: +; GCN: v_mqsad_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_mqsad_u32_u8(i32 addrspace(1)* %out, i32 %src) nounwind { + %result= call i32 @llvm.amdgcn.mqsad.u32.u8(i32 %src, i32 100, i32 100) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.msad.u8(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_msad_u8: +; GCN: v_msad_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_msad_u8(i32 addrspace(1)* %out, i32 %src) nounwind { + %result= call i32 @llvm.amdgcn.msad.u8(i32 %src, i32 100, i32 100) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.qsad.pk.u16.u8(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_qsad_pk_u16_u8: +; GCN: v_qsad_pk_u16_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_qsad_pk_u16_u8(i32 addrspace(1)* %out, i32 %src) nounwind { + %result= call i32 @llvm.amdgcn.qsad.pk.u16.u8(i32 %src, i32 100, i32 100) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.sad.hi.u8(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_sad_hi_u8: +; GCN: v_sad_hi_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_hi_u8(i32 addrspace(1)* %out, i32 %src) nounwind { + %result= call i32 @llvm.amdgcn.sad.hi.u8(i32 %src, i32 100, i32 100) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.sad.u16(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_sad_u16: +; GCN: v_sad_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u16(i32 addrspace(1)* %out, i32 %src) nounwind { + %result= call i32 @llvm.amdgcn.sad.u16(i32 %src, i32 100, i32 100) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.sad.u8(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_sad_u8: +; GCN: v_sad_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u8(i32 addrspace(1)* %out, i32 %src) nounwind { + %result= call i32 @llvm.amdgcn.sad.u8(i32 %src, i32 100, i32 100) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone }