diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll @@ -212,10 +212,9 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NEXT: s_and_b32 s1, s3, s0 -; GFX9-NEXT: s_xor_b32 s1, s1, s0 -; GFX9-NEXT: s_and_b32 s2, s2, s0 -; GFX9-NEXT: s_and_b32 s0, s1, s0 +; GFX9-NEXT: s_xor_b32 s0, s1, s0 ; GFX9-NEXT: s_and_b32 s0, s2, s0 +; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX9-NEXT: ; return to shader part epilog %not.src1 = xor i16 %src1, -1 %and = and i16 %src0, %not.src1 @@ -233,10 +232,9 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NEXT: s_and_b32 s1, s3, s0 -; GFX9-NEXT: s_xor_b32 s1, s1, s0 -; GFX9-NEXT: s_and_b32 s1, s1, s0 -; GFX9-NEXT: s_and_b32 s0, s2, s0 -; GFX9-NEXT: s_and_b32 s0, s1, s0 +; GFX9-NEXT: s_xor_b32 s0, s1, s0 +; GFX9-NEXT: s_and_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX9-NEXT: ; return to shader part epilog %not.src1 = xor i16 %src1, -1 %and = and i16 %not.src1, %src0 @@ -255,9 +253,8 @@ ; GFX9-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NEXT: s_and_b32 s1, s3, s0 ; GFX9-NEXT: s_xor_b32 s1, s1, s0 -; GFX9-NEXT: s_and_b32 s2, s2, s0 -; GFX9-NEXT: s_and_b32 s0, s1, s0 -; GFX9-NEXT: s_and_b32 s0, s2, s0 +; GFX9-NEXT: s_and_b32 s0, s2, s1 +; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX9-NEXT: ; return to shader part epilog %not.src1 = xor i16 %src1, -1 %and = and i16 %src0, %not.src1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll @@ -212,10 +212,9 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NEXT: s_and_b32 s1, s3, s0 -; GFX9-NEXT: s_xor_b32 s1, s1, s0 -; GFX9-NEXT: s_and_b32 s2, s2, s0 -; GFX9-NEXT: s_and_b32 s0, s1, s0 +; GFX9-NEXT: s_xor_b32 s0, s1, s0 ; GFX9-NEXT: s_or_b32 s0, s2, s0 +; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX9-NEXT: ; return to shader part epilog %not.src1 = xor i16 %src1, -1 %or = or i16 %src0, %not.src1 @@ -233,10 +232,9 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NEXT: s_and_b32 s1, s3, s0 -; GFX9-NEXT: s_xor_b32 s1, s1, s0 -; GFX9-NEXT: s_and_b32 s1, s1, s0 -; GFX9-NEXT: s_and_b32 s0, s2, s0 -; GFX9-NEXT: s_or_b32 s0, s1, s0 +; GFX9-NEXT: s_xor_b32 s0, s1, s0 +; GFX9-NEXT: s_or_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX9-NEXT: ; return to shader part epilog %not.src1 = xor i16 %src1, -1 %or = or i16 %not.src1, %src0 @@ -255,9 +253,8 @@ ; GFX9-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NEXT: s_and_b32 s1, s3, s0 ; GFX9-NEXT: s_xor_b32 s1, s1, s0 -; GFX9-NEXT: s_and_b32 s2, s2, s0 -; GFX9-NEXT: s_and_b32 s0, s1, s0 -; GFX9-NEXT: s_or_b32 s0, s2, s0 +; GFX9-NEXT: s_or_b32 s0, s2, s1 +; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX9-NEXT: ; return to shader part epilog %not.src1 = xor i16 %src1, -1 %or = or i16 %src0, %not.src1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll @@ -32,15 +32,12 @@ ; ; GFX8-LABEL: scalar_xnor_v2i16_one_use: ; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_xor_b32 s0, s0, s1 ; GFX8-NEXT: s_mov_b32 s2, 0xffff -; GFX8-NEXT: s_lshr_b32 s5, s0, 16 -; GFX8-NEXT: s_lshr_b32 s6, s1, 16 -; GFX8-NEXT: s_and_b32 s4, s0, s2 -; GFX8-NEXT: s_and_b32 s0, s1, s2 -; GFX8-NEXT: s_and_b32 s5, s5, s2 -; GFX8-NEXT: s_and_b32 s1, s6, s2 +; GFX8-NEXT: s_lshr_b32 s1, s0, 16 ; GFX8-NEXT: s_mov_b32 s3, s2 -; GFX8-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] +; GFX8-NEXT: s_and_b32 s0, s0, s2 +; GFX8-NEXT: s_and_b32 s1, s1, s2 ; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 @@ -118,23 +115,16 @@ ; ; GFX8-LABEL: scalar_xnor_v4i16_one_use: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] ; GFX8-NEXT: s_mov_b32 s4, 0xffff -; GFX8-NEXT: s_lshr_b32 s5, s0, 16 -; GFX8-NEXT: s_and_b32 s7, s5, s4 +; GFX8-NEXT: s_lshr_b32 s3, s0, 16 ; GFX8-NEXT: s_lshr_b32 s5, s1, 16 -; GFX8-NEXT: s_and_b32 s6, s0, s4 +; GFX8-NEXT: s_and_b32 s2, s0, s4 ; GFX8-NEXT: s_and_b32 s0, s1, s4 ; GFX8-NEXT: s_and_b32 s1, s5, s4 -; GFX8-NEXT: s_lshr_b32 s5, s2, 16 -; GFX8-NEXT: s_and_b32 s8, s2, s4 -; GFX8-NEXT: s_and_b32 s9, s5, s4 -; GFX8-NEXT: s_lshr_b32 s5, s3, 16 -; GFX8-NEXT: s_and_b32 s2, s3, s4 -; GFX8-NEXT: s_and_b32 s3, s5, s4 -; GFX8-NEXT: s_xor_b64 s[6:7], s[6:7], s[8:9] ; GFX8-NEXT: s_mov_b32 s5, s4 -; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] -; GFX8-NEXT: s_and_b64 s[2:3], s[6:7], s[4:5] +; GFX8-NEXT: s_and_b32 s3, s3, s4 +; GFX8-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5] ; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] ; GFX8-NEXT: s_xor_b64 s[6:7], s[0:1], s[4:5]