Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -289,6 +289,7 @@ setOperationAction(ISD::FSIN, MVT::f16, Promote); // F16 - VOP2 Actions. + setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); setOperationAction(ISD::FMINNUM, MVT::f16, Legal); setOperationAction(ISD::FDIV, MVT::f16, Promote); Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -468,16 +468,23 @@ // VOP2 Patterns //===----------------------------------------------------------------------===// +multiclass SelectPat { + def : Pat < + (vt (select i1:$src0, vt:$src1, vt:$src2)), + (inst $src2, $src1, $src0) + >; +} + +defm : SelectPat; +defm : SelectPat; +defm : SelectPat; +defm : SelectPat; + def : Pat < (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)), (V_BCNT_U32_B32_e64 $popcnt, $val) >; -def : Pat < - (i32 (select i1:$src0, i32:$src1, i32:$src2)), - (V_CNDMASK_B32_e64 $src2, $src1, $src0) ->; - // Pattern for V_MAC_F16 def : Pat < (f16 (fmad (VOP3NoMods0 f16:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), @@ -990,11 +997,6 @@ (V_ALIGNBIT_B32 $a, $a, (i32 8))) >; -def : Pat < - (f32 (select i1:$src2, f32:$src1, f32:$src0)), - (V_CNDMASK_B32_e64 $src0, $src1, $src2) ->; - multiclass BFMPatterns { def : Pat < (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)), Index: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td @@ -232,11 +232,6 @@ } // End SubtargetPredicate = isVI -def : Pat < - (i16 (select i1:$src0, i16:$src1, i16:$src2)), - (V_CNDMASK_B32_e64 $src2, $src1, $src0) ->; - let Predicates = [isVI] in { multiclass Tenary_i16_Pats addrspace(1)* %r, + <2 x half> addrspace(1)* %a, + <2 x half> addrspace(1)* %b, + <2 x half> addrspace(1)* %c, + <2 x half> addrspace(1)* %d) { +entry: + %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + %c.val = load <2 x half>, <2 x half> addrspace(1)* %c + %d.val = load <2 x half>, <2 x half> addrspace(1)* %d + %fcmp = fcmp olt <2 x half> %a.val, %b.val + %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val + store <2 x half> %r.val, <2 x half> addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}select_v2f16_imm_a +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cmp_gt_f32_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cmp_gt_f32_e64 +; VI: v_cmp_lt_f16_e32 +; VI: v_cmp_lt_f16_e64 +; GCN: v_cndmask_b32_e32 +; SI: v_cvt_f16_f32_e32 +; GCN: v_cndmask_b32_e64 +; SI: v_cvt_f16_f32_e32 +; GCN: s_endpgm +define void @select_v2f16_imm_a( + <2 x half> addrspace(1)* %r, + <2 x half> addrspace(1)* %b, + <2 x half> addrspace(1)* %c, + <2 x half> addrspace(1)* %d) { +entry: + %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + %c.val = load <2 x half>, <2 x half> addrspace(1)* %c + %d.val = load <2 x half>, <2 x half> addrspace(1)* %d + %fcmp = fcmp olt <2 x half> , %b.val + %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val + store <2 x half> %r.val, <2 x half> addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}select_v2f16_imm_b +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cmp_lt_f32_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cmp_lt_f32_e64 +; VI: v_cmp_gt_f16_e32 +; VI: v_cmp_gt_f16_e64 +; GCN: v_cndmask_b32_e32 +; SI: v_cvt_f16_f32_e32 +; GCN: v_cndmask_b32_e64 +; SI: v_cvt_f16_f32_e32 +; GCN: s_endpgm +define void @select_v2f16_imm_b( + <2 x half> addrspace(1)* %r, + <2 x half> addrspace(1)* %a, + <2 x half> addrspace(1)* %c, + <2 x half> addrspace(1)* %d) { +entry: + %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %c.val = load <2 x half>, <2 x half> addrspace(1)* %c + %d.val = load <2 x half>, <2 x half> addrspace(1)* %d + %fcmp = fcmp olt <2 x half> %a.val, + %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val + store <2 x half> %r.val, <2 x half> addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}select_v2f16_imm_c +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cmp_lt_f32_e32 +; SI: v_cmp_lt_f32_e64 +; VI: v_cmp_lt_f16_e32 +; VI: v_cmp_lt_f16_e64 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e64 +; SI: v_cvt_f16_f32_e32 +; SI: v_cvt_f16_f32_e32 +; GCN: s_endpgm +define void @select_v2f16_imm_c( + <2 x half> addrspace(1)* %r, + <2 x half> addrspace(1)* %a, + <2 x half> addrspace(1)* %b, + <2 x half> addrspace(1)* %d) { +entry: + %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + %d.val = load <2 x half>, <2 x half> addrspace(1)* %d + %fcmp = fcmp olt <2 x half> %a.val, %b.val + %r.val = select <2 x i1> %fcmp, <2 x half> , <2 x half> %d.val + store <2 x half> %r.val, <2 x half> addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}select_v2f16_imm_d +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI: v_cmp_lt_f32_e32 +; SI: v_cmp_lt_f32_e64 +; VI: v_cmp_lt_f16_e32 +; VI: v_cmp_lt_f16_e64 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e64 +; SI: v_cvt_f16_f32_e32 +; SI: v_cvt_f16_f32_e32 +; GCN: s_endpgm +define void @select_v2f16_imm_d( + <2 x half> addrspace(1)* %r, + <2 x half> addrspace(1)* %a, + <2 x half> addrspace(1)* %b, + <2 x half> addrspace(1)* %c) { +entry: + %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + %c.val = load <2 x half>, <2 x half> addrspace(1)* %c + %fcmp = fcmp olt <2 x half> %a.val, %b.val + %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> + store <2 x half> %r.val, <2 x half> addrspace(1)* %r + ret void +}