Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -796,18 +796,30 @@ (BIT_ALIGN $src0, $src0, $src1) >; -// This matches 16 permutations of -// max(min(x, y), min(max(x, y), z)) -class IntMed3Pat : AMDGPUPat< + SDPatternOperator max_oneuse, + ValueType vt = i32> { + + // This matches 16 permutations of + // min(max(a, b), max(min(a, b), c)) + def : AMDGPUPat < + (min (max_oneuse vt:$src0, vt:$src1), + (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)), + (med3Inst vt:$src0, vt:$src1, vt:$src2) +>; + + // This matches 16 permutations of + // max(min(x, y), min(max(x, y), z)) + def : AMDGPUPat < (max (min_oneuse vt:$src0, vt:$src1), (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), (med3Inst $src0, $src1, $src2) >; - +} + // Special conversion patterns def cvt_rpi_i32_f32 : PatFrag < Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -579,7 +579,8 @@ (int_amdgcn_kill (i1 (setcc f32:$src, InlineFPImm:$imm, cond:$cond))), (SI_KILL_F32_COND_IMM_PSEUDO $src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond)) >; -// TODO: we could add more variants for other types of conditionals + + // TODO: we could add more variants for other types of conditionals //===----------------------------------------------------------------------===// // VOP1 Patterns @@ -1621,8 +1622,8 @@ defm : BFEPattern ; defm : SHA256MaPattern ; -def : IntMed3Pat; -def : IntMed3Pat; +defm : IntMed3Pat; +defm : IntMed3Pat; } Index: test/CodeGen/AMDGPU/umed3.ll =================================================================== --- test/CodeGen/AMDGPU/umed3.ll +++ test/CodeGen/AMDGPU/umed3.ll @@ -363,6 +363,35 @@ ret void } +; GCN-LABEL: {{^}}v_med3_i32_test: +; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define amdgpu_kernel void @v_med3_i32_test(i32 addrspace(1)* %arg, i32, i32, i32) { + %4 = icmp slt i32 %0, %1 + %5 = select i1 %4, i32 %0, i32 %1 + %6 = icmp sgt i32 %0, %1 + %7 = select i1 %6, i32 %0, i32 %1 + %8 = icmp sgt i32 %5, %2 + %9 = select i1 %8, i32 %5, i32 %2 + %10 = icmp slt i32 %7, %9 + %11 = select i1 %10, i32 %7, i32 %9 + store i32 %11, i32 addrspace(1)* %arg + ret void +} + +; GCN-LABEL: {{^}}v_med3_u32_test: +; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define amdgpu_kernel void @v_med3_u32_test(i32 addrspace(1)* %arg, i32, i32, i32) { + %4 = icmp ult i32 %0, %1 + %5 = select i1 %4, i32 %0, i32 %1 + %6 = icmp ugt i32 %0, %1 + %7 = select i1 %6, i32 %0, i32 %1 + %8 = icmp ugt i32 %5, %2 + %9 = select i1 %8, i32 %5, i32 %2 + %10 = icmp ult i32 %7, %9 + %11 = select i1 %10, i32 %7, i32 %9 + store i32 %11, i32 addrspace(1)* %arg ret void +} + ; GCN-LABEL: {{^}}s_test_umed3_i16_pat_0: ; GCN: s_and_b32 ; GCN: s_and_b32