Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -618,6 +618,7 @@ def smin_oneuse : HasOneUseBinOp; def umax_oneuse : HasOneUseBinOp; def umin_oneuse : HasOneUseBinOp; +def sub_oneuse : HasOneUseBinOp; } // Properties = [SDNPCommutative, SDNPAssociative] Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -3404,6 +3404,23 @@ defm : SI_INDIRECT_Pattern ; //===----------------------------------------------------------------------===// +// SAD Patterns +//===----------------------------------------------------------------------===// + +def : Pat < + (add (sub_oneuse (umax i32:$src0, i32:$src1), + (umin i32:$src0, i32:$src1)), i32:$src2), + (V_SAD_U32 $src0, $src1, $src2) +>; + +def : Pat < + (add (select (i1 (setugt i32:$src0, i32:$src1)), + (sub_oneuse i32:$src0, i32:$src1), + (sub_oneuse i32:$src1, i32:$src0)), i32:$src2), + (V_SAD_U32 $src0, $src1, $src2) +>; + +//===----------------------------------------------------------------------===// // Conversion Patterns //===----------------------------------------------------------------------===// Index: test/CodeGen/AMDGPU/sad.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/sad.ll @@ -0,0 +1,358 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}v_sad_u32_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + %ret0 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %sub0 = sub i32 %a, %b + %sub1 = sub i32 %b, %a + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_sub_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + %ret0 = sub i32 %t0, %t1 + %ret1 = sub i32 %t0, %t1 + %ret2 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_pat1 +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_add_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + %ret0 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c + %ret1 = add i32 %ret0, %c + %ret2 = add i32 %ret0, %c + %ret3 = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_max_pat1 +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + + %icmp00 = icmp ugt i32 %a, %b + %t00 = select i1 %icmp00, i32 %a, i32 %b + + %icmp000= icmp ugt i32 %a, %b + %t000 = select i1 %icmp000, i32 %a, i32 %b + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + %ret0 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_min_pat1 +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_min_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + %icmp11 = icmp ule i32 %a, %b + %t11 = select i1 %icmp11, i32 %a, i32 %b + + %icmp111 = icmp ule i32 %a, %b + %t111 = select i1 %icmp111, i32 %a, i32 %b + + %ret0 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_add_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %sub0 = sub i32 %a, %b + %sub1 = sub i32 %b, %a + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + + %ret = add i32 %ret0, %c + %ret1 = add i32 %ret0, %c + %ret11 = add i32 %ret0, %c + %ret111 = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %sub0 = sub i32 %a, %b + %sub00 = sub i32 %a, %b + %sub0000 = sub i32 %a, %b + %sub00000 = sub i32 %a, %b + + %sub1 = sub i32 %b, %a + %sub11 = sub i32 %b, %a + %sub111 = sub i32 %b, %a + %sub1111 = sub i32 %b, %a + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_vector_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b + + %icmp1 = icmp ule <4 x i32> %a, %b + %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b + + %ret0 = sub <4 x i32> %t0, %t1 + %ret = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_vector_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_sub_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b + + %icmp1 = icmp ule <4 x i32> %a, %b + %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b + + %ret0 = sub <4 x i32> %t0, %t1 + %ret00 = sub <4 x i32> %t0, %t1 + %ret000 = sub <4 x i32> %t0, %t1 + %ret = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_vector_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_add_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b + + %icmp1 = icmp ule <4 x i32> %a, %b + %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b + + %ret0 = sub <4 x i32> %t0, %t1 + %ret = add <4 x i32> %ret0, %c + %ret11 = add <4 x i32> %ret0, %c + %ret111 = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_min_vector_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_min_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b + + %icmp1 = icmp ule <4 x i32> %a, %b + %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b + + %icmp11 = icmp ule <4 x i32> %a, %b + %t11 = select <4 x i1> %icmp11, <4 x i32> %a, <4 x i32> %b + + %icmp111 = icmp ule <4 x i32> %a, %b + %t111 = select <4 x i1> %icmp111, <4 x i32> %a, <4 x i32> %b + + %ret0 = sub <4 x i32> %t0, %t1 + %ret = add <4 x i32> %ret0, %c + %ret11 = add <4 x i32> %ret0, %c + %ret111 = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_max_vector_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_max_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b + + %icmp00 = icmp ugt <4 x i32> %a, %b + %t00 = select <4 x i1> %icmp00, <4 x i32> %a, <4 x i32> %b + + %icmp000 = icmp ugt <4 x i32> %a, %b + %t000 = select <4 x i1> %icmp000, <4 x i32> %a, <4 x i32> %b + + %icmp1 = icmp ule <4 x i32> %a, %b + %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b + + %ret0 = sub <4 x i32> %t0, %t1 + %ret = add <4 x i32> %ret0, %c + %ret11 = add <4 x i32> %ret0, %c + %ret111 = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_vector_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %sub0 = sub <4 x i32> %a, %b + %sub1 = sub <4 x i32> %b, %a + %ret0 = select <4 x i1> %icmp0, <4 x i32> %sub0, <4 x i32> %sub1 + + %ret = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + + +; GCN-LABEL: {{^}}v_sad_u32_i16_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i16_pat1(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) { + %a.ext = zext i16 %a to i32 + %b.ext = zext i16 %b to i32 + %c.ext = zext i16 %c to i32 + + %icmp0 = icmp ugt i32 %a.ext, %b.ext + %t0 = select i1 %icmp0, i32 %a.ext, i32 %b.ext + + %icmp1 = icmp ule i32 %a.ext, %b.ext + %t1 = select i1 %icmp1, i32 %a.ext, i32 %b.ext + + %ret0 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c.ext + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_i16_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i16_pat2(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) { + %a.ext = zext i16 %a to i32 + %b.ext = zext i16 %b to i32 + %c.ext = zext i16 %c to i32 + + %icmp0 = icmp ugt i32 %a.ext, %b.ext + %sub0 = sub i32 %a.ext, %b.ext + %sub1 = sub i32 %b.ext, %a.ext + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + + %ret = add i32 %ret0, %c.ext + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_i8_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i8_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { + %icmp0 = icmp ugt i8 %a, %b + %t0 = select i1 %icmp0, i8 %a, i8 %b + + %icmp1 = icmp ule i8 %a, %b + %t1 = select i1 %icmp1, i8 %a, i8 %b + + %ret0 = sub i8 %t0, %t1 + %ret = add i8 %ret0, %c + + store i8 %ret, i8 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_i8_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i8_pat2(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { + %a.ext = zext i8 %a to i32 + %b.ext = zext i8 %b to i32 + %c.ext = zext i8 %c to i32 + + %icmp0 = icmp ugt i32 %a.ext, %b.ext + %sub0 = sub i32 %a.ext, %b.ext + %sub1 = sub i32 %b.ext, %a.ext + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + + %ret = add i32 %ret0, %c.ext + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_no_sad: +; GCN: s_max_u32 +; GCN: s_min_u32 +; GCN-NOT: v_sad_u32 +define void @v_sad_no_sad(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + %ret0 = sub i32 %t0, %t1 + store i32 %ret0, i32 *undef + %ret = add i32 undef, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +