Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1650,20 +1650,33 @@ (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE) >; -class Int16Med3Pat : GCNPat< + ValueType vt = i16> { + // This matches 16 permutations of + // max(min(x, y), min(max(x, y), z)) + def : GCNPat < (max (min_oneuse vt:$src0, vt:$src1), (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) >; + //This matches 16 permutations of + //min(max(a, b), max(min(a, b), c)) + def : GCNPat < + (min (max_oneuse vt:$src0, vt:$src1), + (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)), + (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) +>; +} + def : FPMed3Pat; let OtherPredicates = [isGFX9] in { def : FP16Med3Pat; -def : Int16Med3Pat; -def : Int16Med3Pat; +defm : Int16Med3Pat; +defm : Int16Med3Pat; } // End Predicates = [isGFX9] Index: test/CodeGen/AMDGPU/smed3.ll =================================================================== --- test/CodeGen/AMDGPU/smed3.ll +++ test/CodeGen/AMDGPU/smed3.ll @@ -681,6 +681,28 @@ ret void } +; GCN-LABEL: {{^}}v_test_smed3_i16_pat_1: +; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} + +define amdgpu_kernel void @v_test_smed3_i16_pat_1(i16 addrspace(1)* %arg, i16 addrspace(1)* %out, i16 addrspace(1)* %a.ptr) #1 { +bb: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i32 %tid + %gep1 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 3 + %gep2 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 8 + %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid + %x = load i16, i16 addrspace(1)* %gep0 + %y = load i16, i16 addrspace(1)* %gep1 + %z = load i16, i16 addrspace(1)* %gep2 + + %tmp0 = call i16 @smin16(i16 %x, i16 %y) + %tmp1 = call i16 @smax16(i16 %x, i16 %y) + %tmp2 = call i16 @smax16(i16 %tmp0, i16 %z) + %tmp3 = call i16 @smin16(i16 %tmp1, i16 %tmp2) + store i16 %tmp3, i16 addrspace(1)* %out.gep + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } attributes #2 = { nounwind readnone alwaysinline } Index: test/CodeGen/AMDGPU/umed3.ll =================================================================== --- test/CodeGen/AMDGPU/umed3.ll +++ test/CodeGen/AMDGPU/umed3.ll @@ -716,6 +716,27 @@ ret void } +; GCN-LABEL: {{^}}v_test_umed3_i16_pat_1: +; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define amdgpu_kernel void @v_test_umed3_i16_pat_1(i16 addrspace(1)* %arg, i16 addrspace(1)* %out, i16 addrspace(1)* %a.ptr) #1 { +bb: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i32 %tid + %gep1 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 3 + %gep2 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 8 + %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid + %x = load i16, i16 addrspace(1)* %gep0 + %y = load i16, i16 addrspace(1)* %gep1 + %z = load i16, i16 addrspace(1)* %gep2 + + %tmp0 = call i16 @umin16(i16 %x, i16 %y) + %tmp1 = call i16 @umax16(i16 %x, i16 %y) + %tmp2 = call i16 @umax16(i16 %tmp0, i16 %z) + %tmp3 = call i16 @umin16(i16 %tmp1, i16 %tmp2) + store i16 %tmp3, i16 addrspace(1)* %out.gep + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } attributes #2 = { nounwind readnone alwaysinline }