Skip to content

Commit 1a60116

Browse files
committedNov 14, 2018
AMDGPU: Additional pattern for i16 median3 matching
min(max(a, b), max(min(a, b), c)) Differential Revision: https://reviews.llvm.org/D54494 llvm-svn: 346886
1 parent 808e157 commit 1a60116

File tree

3 files changed

+60
-4
lines changed

3 files changed

+60
-4
lines changed
 

‎llvm/lib/Target/AMDGPU/SIInstructions.td

+17-4
Original file line numberDiff line numberDiff line change
@@ -1650,20 +1650,33 @@ class FP16Med3Pat<ValueType vt,
16501650
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE)
16511651
>;
16521652

1653-
class Int16Med3Pat<Instruction med3Inst,
1653+
multiclass Int16Med3Pat<Instruction med3Inst,
1654+
SDPatternOperator min,
16541655
SDPatternOperator max,
16551656
SDPatternOperator max_oneuse,
16561657
SDPatternOperator min_oneuse,
1657-
ValueType vt = i32> : GCNPat<
1658+
ValueType vt = i16> {
1659+
// This matches 16 permutations of
1660+
// max(min(x, y), min(max(x, y), z))
1661+
def : GCNPat <
16581662
(max (min_oneuse vt:$src0, vt:$src1),
16591663
(min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
16601664
(med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
16611665
>;
16621666

1667+
// This matches 16 permutations of
1668+
// min(max(a, b), max(min(a, b), c))
1669+
def : GCNPat <
1670+
(min (max_oneuse vt:$src0, vt:$src1),
1671+
(max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
1672+
(med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
1673+
>;
1674+
}
1675+
16631676
def : FPMed3Pat<f32, V_MED3_F32>;
16641677

16651678
let OtherPredicates = [isGFX9] in {
16661679
def : FP16Med3Pat<f16, V_MED3_F16>;
1667-
def : Int16Med3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
1668-
def : Int16Med3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
1680+
defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>;
1681+
defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>;
16691682
} // End Predicates = [isGFX9]

‎llvm/test/CodeGen/AMDGPU/smed3.ll

+22
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,28 @@ bb:
681681
ret void
682682
}
683683

684+
; GCN-LABEL: {{^}}v_test_smed3_i16_pat_1:
685+
; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
686+
687+
define amdgpu_kernel void @v_test_smed3_i16_pat_1(i16 addrspace(1)* %arg, i16 addrspace(1)* %out, i16 addrspace(1)* %a.ptr) #1 {
688+
bb:
689+
%tid = call i32 @llvm.amdgcn.workitem.id.x()
690+
%gep0 = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i32 %tid
691+
%gep1 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 3
692+
%gep2 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 8
693+
%out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
694+
%x = load i16, i16 addrspace(1)* %gep0
695+
%y = load i16, i16 addrspace(1)* %gep1
696+
%z = load i16, i16 addrspace(1)* %gep2
697+
698+
%tmp0 = call i16 @smin16(i16 %x, i16 %y)
699+
%tmp1 = call i16 @smax16(i16 %x, i16 %y)
700+
%tmp2 = call i16 @smax16(i16 %tmp0, i16 %z)
701+
%tmp3 = call i16 @smin16(i16 %tmp1, i16 %tmp2)
702+
store i16 %tmp3, i16 addrspace(1)* %out.gep
703+
ret void
704+
}
705+
684706
attributes #0 = { nounwind readnone }
685707
attributes #1 = { nounwind }
686708
attributes #2 = { nounwind readnone alwaysinline }

‎llvm/test/CodeGen/AMDGPU/umed3.ll

+21
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,27 @@ bb:
716716
ret void
717717
}
718718

719+
; GCN-LABEL: {{^}}v_test_umed3_i16_pat_1:
720+
; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
721+
define amdgpu_kernel void @v_test_umed3_i16_pat_1(i16 addrspace(1)* %arg, i16 addrspace(1)* %out, i16 addrspace(1)* %a.ptr) #1 {
722+
bb:
723+
%tid = call i32 @llvm.amdgcn.workitem.id.x()
724+
%gep0 = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i32 %tid
725+
%gep1 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 3
726+
%gep2 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 8
727+
%out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
728+
%x = load i16, i16 addrspace(1)* %gep0
729+
%y = load i16, i16 addrspace(1)* %gep1
730+
%z = load i16, i16 addrspace(1)* %gep2
731+
732+
%tmp0 = call i16 @umin16(i16 %x, i16 %y)
733+
%tmp1 = call i16 @umax16(i16 %x, i16 %y)
734+
%tmp2 = call i16 @umax16(i16 %tmp0, i16 %z)
735+
%tmp3 = call i16 @umin16(i16 %tmp1, i16 %tmp2)
736+
store i16 %tmp3, i16 addrspace(1)* %out.gep
737+
ret void
738+
}
739+
719740
attributes #0 = { nounwind readnone }
720741
attributes #1 = { nounwind }
721742
attributes #2 = { nounwind readnone alwaysinline }

0 commit comments

Comments
 (0)
Please sign in to comment.