diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7004,6 +7004,24 @@ return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); } + if (N0.getOpcode() == ISD::XOR) { + // fold or (xor x, y), x --> or x, y + // or (xor x, y), (x and/or y) --> or x, y + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (N00 == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); + if (N01 == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1); + + if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) { + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01); + } + } + if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) return R; diff --git a/llvm/test/CodeGen/AMDGPU/fshl.ll b/llvm/test/CodeGen/AMDGPU/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/fshl.ll @@ -704,11 +704,10 @@ ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, s0 ; SI-NEXT: s_lshl_b32 s0, s2, 7 -; SI-NEXT: s_mov_b32 s5, s1 -; SI-NEXT: s_xor_b32 s1, s0, s3 -; SI-NEXT: s_or_b32 s0, s0, s1 +; SI-NEXT: s_or_b32 s0, s3, s0 ; SI-NEXT: s_cmp_eq_u32 s0, 0 ; SI-NEXT: s_cselect_b32 s0, s2, s3 +; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm @@ -718,8 +717,7 @@ ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b32 s4, s2, 7 -; VI-NEXT: s_xor_b32 s5, s4, s3 -; VI-NEXT: s_or_b32 s4, s4, s5 +; VI-NEXT: s_or_b32 s4, s3, s4 ; VI-NEXT: s_cmp_eq_u32 s4, 0 ; VI-NEXT: s_cselect_b32 s2, s2, s3 ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -734,8 +732,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s4, s2, 7 -; GFX9-NEXT: s_xor_b32 s5, s4, s3 -; GFX9-NEXT: s_or_b32 s4, s4, s5 +; GFX9-NEXT: s_or_b32 s4, s3, s4 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: s_cselect_b32 s2, s2, s3 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 @@ -744,15 +741,14 @@ ; ; R600-LABEL: orxor2or1: ; R600: ; %bb.0: -; R600-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: ; R600-NEXT: LSHL * T0.W, KC0[2].Z, literal.x, ; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) -; R600-NEXT: XOR_INT * T1.W, PV.W, KC0[2].W, -; R600-NEXT: OR_INT * T0.W, T0.W, PV.W, +; R600-NEXT: OR_INT * T0.W, KC0[2].W, PV.W, ; R600-NEXT: CNDE_INT T0.X, PV.W, KC0[2].Z, KC0[2].W, ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) @@ -763,8 +759,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_lshl_b32 s4, s2, 7 -; GFX10-NEXT: s_xor_b32 s5, s4, s3 -; GFX10-NEXT: s_or_b32 s4, s4, s5 +; GFX10-NEXT: s_or_b32 s4, s3, s4 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s2, s2, s3 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 @@ -777,11 +772,10 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshl_b32 s4, s2, 7 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_xor_b32 s5, s4, s3 -; GFX11-NEXT: s_or_b32 s4, s4, s5 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_or_b32 s4, s3, s4 ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 ; GFX11-NEXT: s_cselect_b32 s2, s2, s3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll --- a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll +++ b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll @@ -5,17 +5,11 @@ define amdgpu_ps float @xor3_i1_const(float inreg %arg1, i32 inreg %arg2) { ; GCN-LABEL: xor3_i1_const: ; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 m0, s1 -; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000 -; GCN-NEXT: v_cmp_nlt_f32_e64 s[2:3], s0, 0 -; GCN-NEXT: v_interp_p2_f32 v0, v0, attr0.x -; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v1 -; GCN-NEXT: v_cmp_gt_f32_e64 s[0:1], 0, v0 -; GCN-NEXT: s_or_b64 s[2:3], s[2:3], vcc -; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] -; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], s[0:1] -; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] -; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, 0x42640000 +; GCN-NEXT: v_cmp_lt_f32_e64 s[2:3], s0, 0 +; GCN-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 +; GCN-NEXT: s_and_b64 s[0:1], s[2:3], vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[0:1] ; GCN-NEXT: ; return to shader part epilog main_body: %tmp26 = fcmp nsz olt float %arg1, 0.000000e+00 diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -151,21 +151,14 @@ ; CHECK-LABEL: mand16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: andl %esi, %eax -; CHECK-NEXT: xorl %esi, %edi -; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: orl %esi, %eax ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq ; ; X86-LABEL: mand16: ; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: andl %ecx, %edx -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orw {{[0-9]+}}(%esp), %ax ; X86-NEXT: retl %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> @@ -181,9 +174,7 @@ ; KNL: ## %bb.0: ; KNL-NEXT: kmovw (%rdi), %k0 ; KNL-NEXT: kmovw (%rsi), %k1 -; KNL-NEXT: kandw %k1, %k0, %k2 -; KNL-NEXT: kxorw %k1, %k0, %k0 -; KNL-NEXT: korw %k0, %k2, %k0 +; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax ; KNL-NEXT: retq @@ -192,9 +183,7 @@ ; SKX: ## %bb.0: ; SKX-NEXT: kmovw (%rdi), %k0 ; SKX-NEXT: kmovw (%rsi), %k1 -; SKX-NEXT: kandw %k1, %k0, %k2 -; SKX-NEXT: kxorw %k1, %k0, %k0 -; SKX-NEXT: korw %k0, %k2, %k0 +; SKX-NEXT: korw %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq @@ -203,9 +192,7 @@ ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovw (%rdi), %k0 ; AVX512BW-NEXT: kmovw (%rsi), %k1 -; AVX512BW-NEXT: kandw %k1, %k0, %k2 -; AVX512BW-NEXT: kxorw %k1, %k0, %k0 -; AVX512BW-NEXT: korw %k0, %k2, %k0 +; AVX512BW-NEXT: korw %k1, %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: retq @@ -214,9 +201,7 @@ ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: kmovw (%rdi), %k0 ; AVX512DQ-NEXT: kmovw (%rsi), %k1 -; AVX512DQ-NEXT: kandw %k1, %k0, %k2 -; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 -; AVX512DQ-NEXT: korw %k0, %k2, %k0 +; AVX512DQ-NEXT: korw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax ; AVX512DQ-NEXT: retq @@ -227,9 +212,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw (%ecx), %k0 ; X86-NEXT: kmovw (%eax), %k1 -; X86-NEXT: kandw %k1, %k0, %k2 -; X86-NEXT: kxorw %k1, %k0, %k0 -; X86-NEXT: korw %k0, %k2, %k0 +; X86-NEXT: korw %k1, %k0, %k0 ; X86-NEXT: kmovd %k0, %eax ; X86-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll --- a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll @@ -79,9 +79,7 @@ ; CHECK-LABEL: mand32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: andl %esi, %eax -; CHECK-NEXT: xorl %esi, %edi -; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: orl %esi, %eax ; CHECK-NEXT: retq %ma = bitcast i32 %x to <32 x i1> %mb = bitcast i32 %y to <32 x i1> @@ -97,9 +95,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovd (%rdi), %k0 ; CHECK-NEXT: kmovd (%rsi), %k1 -; CHECK-NEXT: kandd %k1, %k0, %k2 -; CHECK-NEXT: kxord %k1, %k0, %k0 -; CHECK-NEXT: kord %k0, %k2, %k0 +; CHECK-NEXT: kord %k1, %k0, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: retq %ma = load <32 x i1>, ptr %x @@ -115,9 +111,7 @@ ; CHECK-LABEL: mand64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: andq %rsi, %rax -; CHECK-NEXT: xorq %rsi, %rdi -; CHECK-NEXT: orq %rdi, %rax +; CHECK-NEXT: orq %rsi, %rax ; CHECK-NEXT: retq %ma = bitcast i64 %x to <64 x i1> %mb = bitcast i64 %y to <64 x i1> @@ -133,9 +127,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovq (%rdi), %k0 ; CHECK-NEXT: kmovq (%rsi), %k1 -; CHECK-NEXT: kandq %k1, %k0, %k2 -; CHECK-NEXT: kxorq %k1, %k0, %k0 -; CHECK-NEXT: korq %k0, %k2, %k0 +; CHECK-NEXT: korq %k1, %k0, %k0 ; CHECK-NEXT: kmovq %k0, %rax ; CHECK-NEXT: retq %ma = load <64 x i1>, ptr %x diff --git a/llvm/test/CodeGen/X86/avx512dq-mask-op.ll b/llvm/test/CodeGen/X86/avx512dq-mask-op.ll --- a/llvm/test/CodeGen/X86/avx512dq-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512dq-mask-op.ll @@ -33,9 +33,8 @@ ; CHECK-LABEL: mand8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: andb %sil, %al -; CHECK-NEXT: xorb %sil, %dil -; CHECK-NEXT: orb %dil, %al +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %ma = bitcast i8 %x to <8 x i1> %mb = bitcast i8 %y to <8 x i1> @@ -51,9 +50,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovb (%rdi), %k0 ; CHECK-NEXT: kmovb (%rsi), %k1 -; CHECK-NEXT: kandb %k1, %k0, %k2 -; CHECK-NEXT: kxorb %k1, %k0, %k0 -; CHECK-NEXT: korb %k0, %k2, %k0 +; CHECK-NEXT: korb %k1, %k0, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/combine-sra-load.ll b/llvm/test/CodeGen/X86/combine-sra-load.ll --- a/llvm/test/CodeGen/X86/combine-sra-load.ll +++ b/llvm/test/CodeGen/X86/combine-sra-load.ll @@ -93,11 +93,11 @@ ; CHECK-NEXT: movswl 2(%rdi), %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: xorl $6, %eax -; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: imull %ecx, %eax ; CHECK-NEXT: retq %load = load i32, ptr %p %shift = ashr i32 %load, 16 %use1 = xor i32 %shift, 6 - %use2 = or i32 %shift, %use1 + %use2 = mul i32 %shift, %use1 ret i32 %use2 }