Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -3404,6 +3404,23 @@ defm : SI_INDIRECT_Pattern ; //===----------------------------------------------------------------------===// +// SAD Patterns +//===----------------------------------------------------------------------===// + +def : Pat < + (i32 (add (i32 (sub (i32 (umax i32:$src0, i32:$src1)), + (i32 (umin i32:$src0, i32:$src1)))), i32:$src2)), + (V_SAD_U32 $src0, $src1, $src2) +>; + +def : Pat < + (i32 (add (i32 (select (i1 (setugt i32:$src0, i32:$src1)), + (i32 (sub i32:$src0, i32:$src1)), + (i32 (sub i32:$src1, i32:$src0)))), i32:$src2)), + (V_SAD_U32 $src0, $src1, $src2) +>; + +//===----------------------------------------------------------------------===// // Conversion Patterns //===----------------------------------------------------------------------===// Index: test/CodeGen/AMDGPU/sad.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/sad.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=GCN %s + +; GCN-LABEL: {{^}}v_sad_u32_pat1: +; SI: v_sad_u32 +define void @v_sad_u32_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + %ret0 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_pat2: +; SI: v_sad_u32 +define void @v_sad_u32_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind { + %icmp0 = icmp ugt i32 %a, %b + %sub0 = sub i32 %a, %b + %sub1 = sub i32 %b, %a + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +