Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -618,6 +618,7 @@ def smin_oneuse : HasOneUseBinOp; def umax_oneuse : HasOneUseBinOp; def umin_oneuse : HasOneUseBinOp; +def sub_oneuse : HasOneUseBinOp; } // Properties = [SDNPCommutative, SDNPAssociative] Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -3404,6 +3404,23 @@ defm : SI_INDIRECT_Pattern ; //===----------------------------------------------------------------------===// +// SAD Patterns +//===----------------------------------------------------------------------===// + +def : Pat < + (add (sub_oneuse (umax i32:$src0, i32:$src1), + (umin i32:$src0, i32:$src1)), i32:$src2), + (V_SAD_U32 $src0, $src1, $src2) +>; + +def : Pat < + (add (select (i1 (setugt i32:$src0, i32:$src1)), + (sub_oneuse i32:$src0, i32:$src1), + (sub_oneuse i32:$src1, i32:$src0)), i32:$src2), + (V_SAD_U32 $src0, $src1, $src2) +>; + +//===----------------------------------------------------------------------===// // Conversion Patterns //===----------------------------------------------------------------------===// Index: test/CodeGen/AMDGPU/sad.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/sad.ll @@ -0,0 +1,427 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}v_sad_u32_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + %ret0 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %sub0 = sub i32 %a, %b + %sub1 = sub i32 %b, %a + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat1: +; GCN-NOT: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_sub_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + %ret0 = sub i32 %t0, %t1 + store volatile i32 %ret0, i32 *undef + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_add_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + %ret0 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c + store volatile i32 %ret, i32 *undef + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_max_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + store volatile i32 %t0, i32 *undef + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + %ret0 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_min_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_min_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %t0 = select i1 %icmp0, i32 %a, i32 %b + + %icmp1 = icmp ule i32 %a, %b + %t1 = select i1 %icmp1, i32 %a, i32 %b + + store volatile i32 %t1, i32 *undef + + %ret0 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_add_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %sub0 = sub i32 %a, %b + %sub1 = sub i32 %b, %a + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + + %ret = add i32 %ret0, %c + store volatile i32 %ret, i32 *undef + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat2: +; GCN-NOT: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %sub0 = sub i32 %a, %b + store volatile i32 %sub0, i32 *undef + %sub1 = sub i32 %b, %a + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_max_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_max_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { + %icmp0 = icmp ugt i32 %a, %b + %sub0 = sub i32 %a, %b + %sub1 = sub i32 %b, %a + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + store volatile i32 %ret0, i32 *undef + + %ret = add i32 %ret0, %c + + store i32 %ret, i32 addrspace(1)* %out + ret void + +} + +; GCN-LABEL: {{^}}v_sad_u32_vector_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b + + %icmp1 = icmp ule <4 x i32> %a, %b + %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b + + %ret0 = sub <4 x i32> %t0, %t1 + %ret = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_vector_pat1: +; GCN-NOT: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_sub_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b + + %icmp1 = icmp ule <4 x i32> %a, %b + %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b + + %ret0 = sub <4 x i32> %t0, %t1 + store volatile <4 x i32> %ret0, <4 x i32> *undef + %ret = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_vector_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_add_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b + + %icmp1 = icmp ule <4 x i32> %a, %b + %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b + + %ret0 = sub <4 x i32> %t0, %t1 + %ret = add <4 x i32> %ret0, %c + store volatile <4 x i32> %ret, <4 x i32> *undef + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_min_vector_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_min_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b + + %icmp1 = icmp ule <4 x i32> %a, %b + %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b + store volatile <4 x i32> %t1, <4 x i32> *undef + + %ret0 = sub <4 x i32> %t0, %t1 + %ret = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_max_vector_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_max_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b + store volatile <4 x i32> %t0, <4 x i32> *undef + + %icmp1 = icmp ule <4 x i32> %a, %b + %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b + + %ret0 = sub <4 x i32> %t0, %t1 + %ret = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_vector_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %sub0 = sub <4 x i32> %a, %b + %sub1 = sub <4 x i32> %b, %a + %ret0 = select <4 x i1> %icmp0, <4 x i32> %sub0, <4 x i32> %sub1 + + %ret = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_vector_pat2: +; GCN-NOT: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_sub_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %sub0 = sub <4 x i32> %a, %b + store volatile <4 x i32> %sub0, <4 x i32> *undef + %sub1 = sub <4 x i32> %b, %a + %ret0 = select <4 x i1> %icmp0, <4 x i32> %sub0, <4 x i32> %sub1 + + %ret = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_vector_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_add_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %sub0 = sub <4 x i32> %a, %b + %sub1 = sub <4 x i32> %b, %a + %ret0 = select <4 x i1> %icmp0, <4 x i32> %sub0, <4 x i32> %sub1 + + %ret = add <4 x i32> %ret0, %c + store volatile <4 x i32> %ret, <4 x i32> *undef + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_multi_use_min_vector_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_multi_use_min_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %icmp0 = icmp ugt <4 x i32> %a, %b + %sub0 = sub <4 x i32> %a, %b + %sub1 = sub <4 x i32> %b, %a + %ret0 = select <4 x i1> %icmp0, <4 x i32> %sub0, <4 x i32> %sub1 + store volatile <4 x i32> %ret0, <4 x i32> *undef + %ret = add <4 x i32> %ret0, %c + + store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_i16_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i16_pat1(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) { + %a.ext = zext i16 %a to i32 + %b.ext = zext i16 %b to i32 + %c.ext = zext i16 %c to i32 + + %icmp0 = icmp ugt i32 %a.ext, %b.ext + %t0 = select i1 %icmp0, i32 %a.ext, i32 %b.ext + + %icmp1 = icmp ule i32 %a.ext, %b.ext + %t1 = select i1 %icmp1, i32 %a.ext, i32 %b.ext + + %ret0 = sub i32 %t0, %t1 + %ret = add i32 %ret0, %c.ext + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_i16_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i16_pat2(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) { + %a.ext = zext i16 %a to i32 + %b.ext = zext i16 %b to i32 + %c.ext = zext i16 %c to i32 + + %icmp0 = icmp ugt i32 %a.ext, %b.ext + %sub0 = sub i32 %a.ext, %b.ext + %sub1 = sub i32 %b.ext, %a.ext + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + + %ret = add i32 %ret0, %c.ext + + store i32 %ret, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_i8_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i8_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { + %icmp0 = icmp ugt i8 %a, %b + %t0 = select i1 %icmp0, i8 %a, i8 %b + + %icmp1 = icmp ule i8 %a, %b + %t1 = select i1 %icmp1, i8 %a, i8 %b + + %ret0 = sub i8 %t0, %t1 + %ret = add i8 %ret0, %c + + store i8 %ret, i8 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_i8_multi_use_min_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i8_multi_use_min_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { + %icmp0 = icmp ugt i8 %a, %b + %t0 = select i1 %icmp0, i8 %a, i8 %b + + %icmp1 = icmp ule i8 %a, %b + %t1 = select i1 %icmp1, i8 %a, i8 %b + store volatile i8 %t1, i8 *undef + + %ret0 = sub i8 %t0, %t1 + %ret = add i8 %ret0, %c + + store i8 %ret, i8 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_i8_multi_use_max_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i8_multi_use_max_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { + %icmp0 = icmp ugt i8 %a, %b + %t0 = select i1 %icmp0, i8 %a, i8 %b + store volatile i8 %t0, i8 *undef + + %icmp1 = icmp ule i8 %a, %b + %t1 = select i1 %icmp1, i8 %a, i8 %b + + %ret0 = sub i8 %t0, %t1 + %ret = add i8 %ret0, %c + + store i8 %ret, i8 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_i8_multi_use_add_pat1: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i8_multi_use_add_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { + %icmp0 = icmp ugt i8 %a, %b + %t0 = select i1 %icmp0, i8 %a, i8 %b + + %icmp1 = icmp ule i8 %a, %b + %t1 = select i1 %icmp1, i8 %a, i8 %b + + %ret0 = sub i8 %t0, %t1 + %ret = add i8 %ret0, %c + store volatile i8 %ret, i8 *undef + + store i8 %ret, i8 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_i8_multi_use_sub_pat1: +; GCN-NOT: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i8_multi_use_sub_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { + %icmp0 = icmp ugt i8 %a, %b + %t0 = select i1 %icmp0, i8 %a, i8 %b + + %icmp1 = icmp ule i8 %a, %b + %t1 = select i1 %icmp1, i8 %a, i8 %b + + %ret0 = sub i8 %t0, %t1 + store volatile i8 %ret0, i8 *undef + %ret = add i8 %ret0, %c + + store i8 %ret, i8 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_sad_u32_i8_pat2: +; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_sad_u32_i8_pat2(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { + %a.ext = zext i8 %a to i32 + %b.ext = zext i8 %b to i32 + %c.ext = zext i8 %c to i32 + + %icmp0 = icmp ugt i32 %a.ext, %b.ext + %sub0 = sub i32 %a.ext, %b.ext + %sub1 = sub i32 %b.ext, %a.ext + %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 + + %ret = add i32 %ret0, %c.ext + + store i32 %ret, i32 addrspace(1)* %out + ret void +} +