diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1201,17 +1201,27 @@ Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, - Known2, TLO, Depth + 1)) + APInt Op0DemandedBits = ~Known.Zero & DemandedBits; + if (SimplifyDemandedBits(Op0, Op0DemandedBits, DemandedElts, Known2, TLO, + Depth + 1)) return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // If we have learned that some more bits of Op1 are not demanded due to + // known bits in Op0, try simplifying Op1 again. + APInt Op1DemandedBits = ~Known2.Zero & DemandedBits; + if (Op1DemandedBits != DemandedBits && + SimplifyDemandedBits(Op1, Op1DemandedBits, DemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( - Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + Op0, Op0DemandedBits, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( - Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + Op1, Op1DemandedBits, DemandedElts, TLO.DAG, Depth + 1); if (DemandedOp0 || DemandedOp1) { Op0 = DemandedOp0 ? DemandedOp0 : Op0; Op1 = DemandedOp1 ? DemandedOp1 : Op1; @@ -1253,6 +1263,15 @@ return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // If we have learned that some more bits of Op1 are not demanded due to + // known bits in Op0, try simplifying Op1 again. + APInt Op1DemandedBits = ~Known2.One & DemandedBits; + if (Op1DemandedBits != DemandedBits && + SimplifyDemandedBits(Op1, Op1DemandedBits, DemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll @@ -58,7 +58,6 @@ ; CHECK-NEXT: tst.w r1, #1 ; CHECK-NEXT: csetm r1, ne ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 -; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vmov r3, s0 @@ -209,7 +208,6 @@ ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r1 -; CHECK-NEXT: vmov q3[3], q3[1], r2, r1 ; CHECK-NEXT: vmov.u16 r1, q0[1] ; CHECK-NEXT: vmov.u16 r2, q0[0] ; CHECK-NEXT: vmov q4[2], q4[0], r2, r1 @@ -226,7 +224,6 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r3 -; CHECK-NEXT: vmov q3[3], q3[1], r0, r3 ; CHECK-NEXT: vmov.u16 r0, q0[3] ; CHECK-NEXT: vmov.u16 r3, q0[2] ; CHECK-NEXT: vmov q4[2], q4[0], r3, r0 @@ -253,7 +250,6 @@ ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r3 -; CHECK-NEXT: vmov q2[3], q2[1], r0, r3 ; CHECK-NEXT: vmov.u16 r0, q0[5] ; CHECK-NEXT: vmov.u16 r3, q0[4] ; CHECK-NEXT: vmov q3[2], q3[0], r3, r0 @@ -272,7 +268,6 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r1, r3 -; CHECK-NEXT: vmov q2[3], q2[1], r1, r3 ; CHECK-NEXT: vmov.u16 r1, q0[7] ; CHECK-NEXT: vmov.u16 r3, q0[6] ; CHECK-NEXT: vmov q0[2], q0[0], r3, r1 @@ -467,7 +462,6 @@ ; CHECK-NEXT: tst.w r1, #1 ; CHECK-NEXT: csetm r1, ne ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 -; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vmov r1, s0 @@ -812,7 +806,6 @@ ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r1 -; CHECK-NEXT: vmov q6[3], q6[1], r2, r1 ; CHECK-NEXT: vmov.u8 r1, q0[1] ; CHECK-NEXT: vmov.u8 r2, q0[0] ; CHECK-NEXT: vmov q7[2], q7[0], r2, r1 @@ -829,7 +822,6 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: vmov q6[2], q6[0], r0, r3 -; CHECK-NEXT: vmov q6[3], q6[1], r0, r3 ; CHECK-NEXT: vmov.u8 r0, q0[3] ; CHECK-NEXT: vmov.u8 r3, q0[2] ; CHECK-NEXT: vmov q7[2], q7[0], r3, r0 @@ -856,7 +848,6 @@ ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q5[2], q5[0], r0, r3 -; CHECK-NEXT: vmov q5[3], q5[1], r0, r3 ; CHECK-NEXT: vmov.u8 r0, q0[5] ; CHECK-NEXT: vmov.u8 r3, q0[4] ; CHECK-NEXT: vmov q6[2], q6[0], r3, r0 @@ -875,7 +866,6 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: vmov q5[2], q5[0], r1, r3 -; CHECK-NEXT: vmov q5[3], q5[1], r1, r3 ; CHECK-NEXT: vmov.u8 r1, q0[7] ; CHECK-NEXT: vmov.u8 r3, q0[6] ; CHECK-NEXT: vmov q6[2], q6[0], r3, r1 @@ -920,7 +910,6 @@ ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r3 -; CHECK-NEXT: vmov q3[3], q3[1], r0, r3 ; CHECK-NEXT: vmov.u8 r0, q0[9] ; CHECK-NEXT: vmov.u8 r3, q0[8] ; CHECK-NEXT: vmov q4[2], q4[0], r3, r0 @@ -939,7 +928,6 @@ ; CHECK-NEXT: rsb.w r2, r2, #0 ; CHECK-NEXT: adc.w r1, r1, r12 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 -; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 ; CHECK-NEXT: vmov.u8 r2, q0[11] ; CHECK-NEXT: vmov.u8 r3, q0[10] ; CHECK-NEXT: vmov q4[2], q4[0], r3, r2 @@ -966,7 +954,6 @@ ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r3 -; CHECK-NEXT: vmov q2[3], q2[1], r0, r3 ; CHECK-NEXT: vmov.u8 r0, q0[13] ; CHECK-NEXT: vmov.u8 r3, q0[12] ; CHECK-NEXT: vmov q3[2], q3[0], r3, r0 @@ -985,7 +972,6 @@ ; CHECK-NEXT: rsb.w r2, r2, #0 ; CHECK-NEXT: adc.w r1, r1, r12 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3 -; CHECK-NEXT: vmov q2[3], q2[1], r2, r3 ; CHECK-NEXT: vmov.u8 r2, q0[15] ; CHECK-NEXT: vmov.u8 r3, q0[14] ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 @@ -1303,7 +1289,6 @@ ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r1 -; CHECK-NEXT: vmov q3[3], q3[1], r2, r1 ; CHECK-NEXT: vmov.u16 r1, q0[1] ; CHECK-NEXT: vmov.u16 r2, q0[0] ; CHECK-NEXT: vmov q4[2], q4[0], r2, r1 @@ -1320,7 +1305,6 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r3 -; CHECK-NEXT: vmov q3[3], q3[1], r0, r3 ; CHECK-NEXT: vmov.u16 r0, q0[3] ; CHECK-NEXT: vmov.u16 r3, q0[2] ; CHECK-NEXT: vmov q4[2], q4[0], r3, r0 @@ -1347,7 +1331,6 @@ ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r3 -; CHECK-NEXT: vmov q2[3], q2[1], r0, r3 ; CHECK-NEXT: vmov.u16 r0, q0[5] ; CHECK-NEXT: vmov.u16 r3, q0[4] ; CHECK-NEXT: vmov q3[2], q3[0], r3, r0 @@ -1366,7 +1349,6 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r1, r3 -; CHECK-NEXT: vmov q2[3], q2[1], r1, r3 ; CHECK-NEXT: vmov.u16 r1, q0[7] ; CHECK-NEXT: vmov.u16 r3, q0[6] ; CHECK-NEXT: vmov q0[2], q0[0], r3, r1 @@ -1565,7 +1547,6 @@ ; CHECK-NEXT: tst.w r1, #1 ; CHECK-NEXT: csetm r1, ne ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 -; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vmov r1, s0 @@ -1717,7 +1698,6 @@ ; CHECK-NEXT: tst.w r3, #1 ; CHECK-NEXT: csetm r3, ne ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2 -; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmov r3, s0 @@ -1883,7 +1863,6 @@ ; CHECK-NEXT: and r3, r2, #1 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r12 -; CHECK-NEXT: vmov q3[3], q3[1], r3, r12 ; CHECK-NEXT: vmov.u16 r12, q0[1] ; CHECK-NEXT: vmov.u16 r3, q0[0] ; CHECK-NEXT: vmov q4[2], q4[0], r3, r12 @@ -1900,7 +1879,6 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 -; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 ; CHECK-NEXT: vmov.u16 r2, q0[3] ; CHECK-NEXT: vmov.u16 r3, q0[2] ; CHECK-NEXT: vmov q4[2], q4[0], r3, r2 @@ -1927,7 +1905,6 @@ ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r3 -; CHECK-NEXT: vmov q2[3], q2[1], r4, r3 ; CHECK-NEXT: vmov.u16 r3, q0[5] ; CHECK-NEXT: vmov.u16 r4, q0[4] ; CHECK-NEXT: vmov q3[2], q3[0], r4, r3 @@ -1946,7 +1923,6 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3 -; CHECK-NEXT: vmov q2[3], q2[1], r2, r3 ; CHECK-NEXT: vmov.u16 r2, q0[7] ; CHECK-NEXT: vmov.u16 r3, q0[6] ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 @@ -2117,7 +2093,6 @@ ; CHECK-NEXT: tst.w r3, #1 ; CHECK-NEXT: csetm r3, ne ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2 -; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r2, s3 ; CHECK-NEXT: vmov r3, s1 @@ -2376,7 +2351,6 @@ ; CHECK-NEXT: and r3, r2, #1 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r12 -; CHECK-NEXT: vmov q6[3], q6[1], r3, r12 ; CHECK-NEXT: vmov.u8 r12, q0[1] ; CHECK-NEXT: vmov.u8 r3, q0[0] ; CHECK-NEXT: vmov q7[2], q7[0], r3, r12 @@ -2393,7 +2367,6 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3 -; CHECK-NEXT: vmov q6[3], q6[1], r2, r3 ; CHECK-NEXT: vmov.u8 r2, q0[3] ; CHECK-NEXT: vmov.u8 r3, q0[2] ; CHECK-NEXT: vmov q7[2], q7[0], r3, r2 @@ -2420,7 +2393,6 @@ ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q5[2], q5[0], r4, r3 -; CHECK-NEXT: vmov q5[3], q5[1], r4, r3 ; CHECK-NEXT: vmov.u8 r3, q0[5] ; CHECK-NEXT: vmov.u8 r4, q0[4] ; CHECK-NEXT: vmov q6[2], q6[0], r4, r3 @@ -2439,7 +2411,6 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r3 -; CHECK-NEXT: vmov q5[3], q5[1], r2, r3 ; CHECK-NEXT: vmov.u8 r2, q0[7] ; CHECK-NEXT: vmov.u8 r3, q0[6] ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 @@ -2484,7 +2455,6 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r4 -; CHECK-NEXT: vmov q3[3], q3[1], r3, r4 ; CHECK-NEXT: vmov.u8 r3, q0[9] ; CHECK-NEXT: vmov.u8 r4, q0[8] ; CHECK-NEXT: vmov q4[2], q4[0], r4, r3 @@ -2503,7 +2473,6 @@ ; CHECK-NEXT: rsb.w r2, r2, #0 ; CHECK-NEXT: adc.w r3, r3, r12 ; CHECK-NEXT: vmov q3[2], q3[0], r2, r4 -; CHECK-NEXT: vmov q3[3], q3[1], r2, r4 ; CHECK-NEXT: vmov.u8 r2, q0[11] ; CHECK-NEXT: vmov.u8 r4, q0[10] ; CHECK-NEXT: vmov q4[2], q4[0], r4, r2 @@ -2530,7 +2499,6 @@ ; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r4 -; CHECK-NEXT: vmov q2[3], q2[1], r2, r4 ; CHECK-NEXT: vmov.u8 r2, q0[13] ; CHECK-NEXT: vmov.u8 r4, q0[12] ; CHECK-NEXT: vmov q3[2], q3[0], r4, r2 @@ -2549,7 +2517,6 @@ ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r4 -; CHECK-NEXT: vmov q2[3], q2[1], r3, r4 ; CHECK-NEXT: vmov.u8 r3, q0[15] ; CHECK-NEXT: vmov.u8 r4, q0[14] ; CHECK-NEXT: vmov q0[2], q0[0], r4, r3 @@ -2867,7 +2834,6 @@ ; CHECK-NEXT: tst.w r3, #1 ; CHECK-NEXT: csetm r3, ne ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2 -; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r2, s3 ; CHECK-NEXT: vmov r3, s1 diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -3930,8 +3930,6 @@ ; SSE-NEXT: shrl $15, %ecx ; SSE-NEXT: movl %eax, %edx ; SSE-NEXT: shrl $8, %edx -; SSE-NEXT: andl $1, %edx -; SSE-NEXT: andl $8, %eax ; SSE-NEXT: shrl $3, %eax ; SSE-NEXT: xorl %edx, %eax ; SSE-NEXT: andl %ecx, %eax @@ -3946,8 +3944,6 @@ ; AVX1OR2-NEXT: shrl $15, %ecx ; AVX1OR2-NEXT: movl %eax, %edx ; AVX1OR2-NEXT: shrl $8, %edx -; AVX1OR2-NEXT: andl $1, %edx -; AVX1OR2-NEXT: andl $8, %eax ; AVX1OR2-NEXT: shrl $3, %eax ; AVX1OR2-NEXT: xorl %edx, %eax ; AVX1OR2-NEXT: andl %ecx, %eax diff --git a/llvm/test/CodeGen/X86/pr34137.ll b/llvm/test/CodeGen/X86/pr34137.ll --- a/llvm/test/CodeGen/X86/pr34137.ll +++ b/llvm/test/CodeGen/X86/pr34137.ll @@ -13,7 +13,6 @@ ; CHECK-NEXT: andl %eax, %ecx ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: andl %ecx, %edx -; CHECK-NEXT: movzwl %dx, %edx ; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: testw %cx, %ax diff --git a/llvm/test/CodeGen/X86/shift-parts.ll b/llvm/test/CodeGen/X86/shift-parts.ll --- a/llvm/test/CodeGen/X86/shift-parts.ll +++ b/llvm/test/CodeGen/X86/shift-parts.ll @@ -12,11 +12,12 @@ ; CHECK-NEXT: movq g_144+{{.*}}(%rip), %rax ; CHECK-NEXT: movq g_144+{{.*}}(%rip), %rcx ; CHECK-NEXT: movzbl %sil, %edx +; CHECK-NEXT: andl $1, %edx ; CHECK-NEXT: shll $6, %edx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: testb $64, %dl +; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: movq %rcx, %rsi ; CHECK-NEXT: cmovneq %rax, %rsi ; CHECK-NEXT: orl $0, %esi