Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1519,6 +1519,10 @@ ); } +class getSOPSrcForVT { + RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32); +} + // Returns the vreg register class to use for source operand given VT class getVregSrcForVT { RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, Index: llvm/lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SOPInstructions.td +++ llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -536,6 +536,7 @@ >; } // End isCommutable = 1 +// There are also separate patterns for types other than i32 def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32", [(set i32:$sdst, (UniformBinFrag i32:$src0, (UniformUnaryFrag i32:$src1)))] >; @@ -1325,6 +1326,24 @@ (S_AND_B32 (S_MOV_B32 (i32 0xffff)), $src) >; +// FIXME: ValueType should have isVector field +class ScalarNot2Pat : GCNPat< + (UniformBinFrag vt:$src0, (UniformUnaryFrag vt:$src1)), + (inst getSOPSrcForVT.ret:$src0, getSOPSrcForVT.ret:$src1) +>; + +// Match these for some more types +// TODO: i1 +def : ScalarNot2Pat; +def : ScalarNot2Pat; +def : ScalarNot2Pat; +def : ScalarNot2Pat; + +def : ScalarNot2Pat; +def : ScalarNot2Pat; +def : ScalarNot2Pat; +def : ScalarNot2Pat; //===----------------------------------------------------------------------===// // Target-specific instruction encodings. Index: llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll @@ -204,8 +204,7 @@ define amdgpu_ps i16 @s_andn2_i16(i16 inreg %src0, i16 inreg %src1) { ; GFX6-LABEL: s_andn2_i16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_xor_b32 s0, s3, -1 -; GFX6-NEXT: s_and_b32 s0, s2, s0 +; GFX6-NEXT: s_andn2_b32 s0, s2, s3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_andn2_i16: @@ -225,8 +224,7 @@ define amdgpu_ps i16 @s_andn2_i16_commute(i16 inreg %src0, i16 inreg %src1) { ; GFX6-LABEL: s_andn2_i16_commute: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_xor_b32 s0, s3, -1 -; GFX6-NEXT: s_and_b32 s0, s0, s2 +; GFX6-NEXT: s_andn2_b32 s0, s2, s3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_andn2_i16_commute: @@ -247,7 +245,7 @@ ; GFX6-LABEL: s_andn2_i16_multi_use: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_xor_b32 s1, s3, -1 -; GFX6-NEXT: s_and_b32 s0, s2, s1 +; GFX6-NEXT: s_andn2_b32 s0, s2, s3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_andn2_i16_multi_use: @@ -269,9 +267,8 @@ define amdgpu_ps { i16, i16 } @s_andn2_i16_multi_foldable_use(i16 inreg %src0, i16 inreg %src1, i16 inreg %src2) { ; GFX6-LABEL: s_andn2_i16_multi_foldable_use: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_xor_b32 s1, s4, -1 -; GFX6-NEXT: s_and_b32 s0, s2, s1 -; GFX6-NEXT: s_and_b32 s1, s3, s1 +; GFX6-NEXT: s_andn2_b32 s0, s2, s4 +; GFX6-NEXT: s_andn2_b32 s1, s3, s4 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_andn2_i16_multi_foldable_use: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll @@ -204,8 +204,7 @@ define amdgpu_ps i16 @s_orn2_i16(i16 inreg %src0, i16 inreg %src1) { ; GFX6-LABEL: s_orn2_i16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_xor_b32 s0, s3, -1 -; GFX6-NEXT: s_or_b32 s0, s2, s0 +; GFX6-NEXT: s_orn2_b32 s0, s2, s3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_orn2_i16: @@ -225,8 +224,7 @@ define amdgpu_ps i16 @s_orn2_i16_commute(i16 inreg %src0, i16 inreg %src1) { ; GFX6-LABEL: s_orn2_i16_commute: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_xor_b32 s0, s3, -1 -; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_orn2_b32 s0, s2, s3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_orn2_i16_commute: @@ -247,7 +245,7 @@ ; GFX6-LABEL: s_orn2_i16_multi_use: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_xor_b32 s1, s3, -1 -; GFX6-NEXT: s_or_b32 s0, s2, s1 +; GFX6-NEXT: s_orn2_b32 s0, s2, s3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_orn2_i16_multi_use: @@ -269,9 +267,8 @@ define amdgpu_ps { i16, i16 } @s_orn2_i16_multi_foldable_use(i16 inreg %src0, i16 inreg %src1, i16 inreg %src2) { ; GFX6-LABEL: s_orn2_i16_multi_foldable_use: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_xor_b32 s1, s4, -1 -; GFX6-NEXT: s_or_b32 s0, s2, s1 -; GFX6-NEXT: s_or_b32 s1, s3, s1 +; GFX6-NEXT: s_orn2_b32 s0, s2, s4 +; GFX6-NEXT: s_orn2_b32 s1, s3, s4 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_orn2_i16_multi_foldable_use: Index: llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -968,10 +968,10 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b32 s4, s4, 3 ; VI-NEXT: v_lshlrev_b16_e64 v0, s4, -1 -; VI-NEXT: v_and_b32_e32 v1, 0x505, v0 -; VI-NEXT: v_xor_b32_e32 v0, -1, v0 -; VI-NEXT: v_and_b32_e32 v0, s6, v0 -; VI-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-NEXT: v_not_b32_e32 v1, v0 +; VI-NEXT: v_and_b32_e32 v1, s6, v1 +; VI-NEXT: v_and_b32_e32 v0, 0x505, v0 +; VI-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <2 x i8> %a, i8 5, i32 %b