diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1008,33 +1008,62 @@ // (load/store (add, (add, x, offset1), offset2)) -> // (load/store (add, x, offset1+offset2)). - if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) - return false; + // (load/store (add, (add, x, y), offset2)) -> + // (load/store (add, (add, x, offset2), y)). - if (N0.hasOneUse()) + if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) return false; - auto *C1 = dyn_cast(N0.getOperand(1)); auto *C2 = dyn_cast(N1); - if (!C1 || !C2) + if (!C2) return false; - const APInt &C1APIntVal = C1->getAPIntValue(); const APInt &C2APIntVal = C2->getAPIntValue(); - if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64) - return false; + if (auto *C1 = dyn_cast(N0.getOperand(1))) { + if (N0.hasOneUse()) + return false; - const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; - if (CombinedValueIntVal.getBitWidth() > 64) - return false; - const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); - - for (SDNode *Node : N->uses()) { - auto LoadStore = dyn_cast(Node); - if (LoadStore) { - // Is x[offset2] already not a legal addressing mode? If so then - // reassociating the constants breaks nothing (we test offset2 because - // that's the one we hope to fold into the load or store). + const APInt &C1APIntVal = C1->getAPIntValue(); + if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64) + return false; + + const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; + if (CombinedValueIntVal.getBitWidth() > 64) + return false; + const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); + + for (SDNode *Node : N->uses()) { + if (auto *LoadStore = dyn_cast(Node)) { + // Is x[offset2] already not a legal addressing mode? If so then + // reassociating the constants breaks nothing (we test offset2 because + // that's the one we hope to fold into the load or store). + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.BaseOffs = C2APIntVal.getSExtValue(); + EVT VT = LoadStore->getMemoryVT(); + unsigned AS = LoadStore->getAddressSpace(); + Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + continue; + + // Would x[offset1+offset2] still be a legal addressing mode? + AM.BaseOffs = CombinedValue; + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + return true; + } + } + } else { + if (auto *GA = dyn_cast(N0.getOperand(1))) + if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA)) + return false; + + for (SDNode *Node : N->uses()) { + auto *LoadStore = dyn_cast(Node); + if (!LoadStore) + return false; + + // Is x[offset2] a legal addressing mode? If so then + // reassociating the constants breaks address pattern TargetLoweringBase::AddrMode AM; AM.HasBaseReg = true; AM.BaseOffs = C2APIntVal.getSExtValue(); @@ -1042,13 +1071,9 @@ unsigned AS = LoadStore->getAddressSpace(); Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) - continue; - - // Would x[offset1+offset2] still be a legal addressing mode? - AM.BaseOffs = CombinedValue; - if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) - return true; + return false; } + return true; } return false; @@ -1099,6 +1124,20 @@ return N00; } + if (TLI.isReassocProfitable(DAG, N0, N1)) { + // Reassociate if (op N00, N1) already exist + if (N1 != N01) + if (SDNode *ExistNode = + DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) + return DAG.getNode(Opc, DL, VT, SDValue(ExistNode, 0), N01); + + // Reassociate if (op N01, N1) already exist + if (N1 != N00) + if (SDNode *ExistNode = + DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) + return DAG.getNode(Opc, DL, VT, SDValue(ExistNode, 0), N00); + } + return SDValue(); } diff --git a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll --- a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll +++ b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll @@ -5,17 +5,15 @@ define amdgpu_ps float @xor3_i1_const(float inreg %arg1, i32 inreg %arg2) { ; GCN-LABEL: xor3_i1_const: ; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000 ; GCN-NEXT: s_mov_b32 m0, s1 -; GCN-NEXT: v_cmp_lt_f32_e64 s[2:3], s0, 0 -; GCN-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 +; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000 +; GCN-NEXT: v_cmp_nlt_f32_e64 s[2:3], s0, 0 ; GCN-NEXT: v_interp_p2_f32 v0, v0, attr0.x -; GCN-NEXT: s_and_b64 s[2:3], s[2:3], vcc +; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v1 ; GCN-NEXT: v_cmp_gt_f32_e64 s[0:1], 0, v0 -; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], -1 -; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] -; GCN-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3] -; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], -1 +; GCN-NEXT: s_or_b64 s[2:3], s[2:3], vcc +; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], s[0:1] ; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1] ; GCN-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll --- a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll +++ b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll @@ -12,222 +12,222 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 ; CHECK-NEXT: pushq %r12 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movq X(%rip), %r8 -; CHECK-NEXT: movq X(%rip), %r10 ; CHECK-NEXT: movq X(%rip), %r9 -; CHECK-NEXT: movq X(%rip), %r12 ; CHECK-NEXT: movq X(%rip), %r15 +; CHECK-NEXT: movq X(%rip), %rax +; CHECK-NEXT: movq X(%rip), %rdx +; CHECK-NEXT: movq X(%rip), %r12 ; CHECK-NEXT: movq X(%rip), %r14 ; CHECK-NEXT: movq X(%rip), %r11 -; CHECK-NEXT: movq X(%rip), %rdx -; CHECK-NEXT: addq %r15, %rdx -; CHECK-NEXT: movq X(%rip), %rsi -; CHECK-NEXT: bswapq %rsi -; CHECK-NEXT: leaq (%r11,%r14), %rbx -; CHECK-NEXT: addq %r15, %rbx -; CHECK-NEXT: addq %rdx, %rbx -; CHECK-NEXT: addq %rsi, %rbx -; CHECK-NEXT: leaq (%r9,%r10), %rdx -; CHECK-NEXT: addq %rdx, %rdx -; CHECK-NEXT: addq %r8, %rdx ; CHECK-NEXT: movq X(%rip), %rdi -; CHECK-NEXT: addq %rbx, %r12 -; CHECK-NEXT: addq %r8, %rdx -; CHECK-NEXT: addq %rbx, %rdx -; CHECK-NEXT: bswapq %rdi -; CHECK-NEXT: leaq (%r15,%r14), %rsi -; CHECK-NEXT: addq %r12, %rsi -; CHECK-NEXT: addq %r11, %rdi -; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: leaq (%r10,%r8), %rsi -; CHECK-NEXT: addq %rsi, %rsi -; CHECK-NEXT: addq %rdx, %rsi -; CHECK-NEXT: movq X(%rip), %rbx ; CHECK-NEXT: addq %r12, %rdi -; CHECK-NEXT: addq %rdi, %r9 -; CHECK-NEXT: addq %rdx, %rsi +; CHECK-NEXT: movq X(%rip), %rcx +; CHECK-NEXT: movq X(%rip), %rbx +; CHECK-NEXT: bswapq %rcx +; CHECK-NEXT: leaq (%r11,%r14), %rsi +; CHECK-NEXT: addq %r12, %rsi ; CHECK-NEXT: addq %rdi, %rsi +; CHECK-NEXT: addq %rcx, %rsi +; CHECK-NEXT: leaq (%r15,%r9), %r8 +; CHECK-NEXT: leaq (%r8,%rax), %r10 +; CHECK-NEXT: addq %rsi, %rdx +; CHECK-NEXT: addq %r10, %r10 ; CHECK-NEXT: bswapq %rbx -; CHECK-NEXT: leaq (%r12,%r15), %rdi -; CHECK-NEXT: addq %r9, %rdi -; CHECK-NEXT: addq %r14, %rbx -; CHECK-NEXT: addq %rdi, %rbx -; CHECK-NEXT: leaq (%rdx,%r8), %rdi -; CHECK-NEXT: addq %rdi, %rdi -; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: addq %rsi, %r10 +; CHECK-NEXT: addq %r11, %rbx +; CHECK-NEXT: leaq (%r12,%r14), %rcx +; CHECK-NEXT: addq %rdx, %rcx +; CHECK-NEXT: addq %rcx, %rbx +; CHECK-NEXT: addq %r8, %r8 +; CHECK-NEXT: addq %r10, %r8 ; CHECK-NEXT: movq X(%rip), %rcx -; CHECK-NEXT: addq %r9, %rbx -; CHECK-NEXT: addq %rbx, %r10 -; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: addq %rbx, %rdi +; CHECK-NEXT: addq %rdx, %rbx +; CHECK-NEXT: addq %rbx, %rax +; CHECK-NEXT: addq %r10, %r8 +; CHECK-NEXT: addq %rbx, %r8 ; CHECK-NEXT: bswapq %rcx -; CHECK-NEXT: leaq (%r9,%r12), %rax -; CHECK-NEXT: addq %r10, %rax -; CHECK-NEXT: addq %r15, %rcx +; CHECK-NEXT: leaq (%rdx,%r12), %rsi +; CHECK-NEXT: addq %rax, %rsi +; CHECK-NEXT: addq %r14, %rcx +; CHECK-NEXT: addq %rsi, %rcx +; CHECK-NEXT: leaq (%r10,%r9), %rbx +; CHECK-NEXT: addq %rbx, %rbx +; CHECK-NEXT: addq %r8, %rbx +; CHECK-NEXT: movq X(%rip), %rdi ; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: leaq (%rsi,%rdx), %r11 -; CHECK-NEXT: addq %r11, %r11 -; CHECK-NEXT: addq %rdi, %r11 -; CHECK-NEXT: movq X(%rip), %rbx -; CHECK-NEXT: addq %r10, %rcx -; CHECK-NEXT: addq %rcx, %r8 -; CHECK-NEXT: addq %rdi, %r11 -; CHECK-NEXT: addq %rcx, %r11 -; CHECK-NEXT: bswapq %rbx -; CHECK-NEXT: leaq (%r10,%r9), %rcx -; CHECK-NEXT: addq %r8, %rcx -; CHECK-NEXT: addq %r12, %rbx +; CHECK-NEXT: addq %rcx, %r15 +; CHECK-NEXT: addq %r8, %rbx ; CHECK-NEXT: addq %rcx, %rbx -; CHECK-NEXT: leaq (%rdi,%rsi), %r14 +; CHECK-NEXT: bswapq %rdi +; CHECK-NEXT: leaq (%rax,%rdx), %rcx +; CHECK-NEXT: addq %r15, %rcx +; CHECK-NEXT: addq %r12, %rdi +; CHECK-NEXT: addq %rcx, %rdi +; CHECK-NEXT: leaq (%r8,%r10), %r12 +; CHECK-NEXT: addq %r12, %r12 +; CHECK-NEXT: addq %rbx, %r12 +; CHECK-NEXT: movq X(%rip), %rcx +; CHECK-NEXT: addq %r15, %rdi +; CHECK-NEXT: addq %rdi, %r9 +; CHECK-NEXT: addq %rbx, %r12 +; CHECK-NEXT: addq %rdi, %r12 +; CHECK-NEXT: bswapq %rcx +; CHECK-NEXT: leaq (%r15,%rax), %rdi +; CHECK-NEXT: addq %r9, %rdi +; CHECK-NEXT: addq %rdx, %rcx +; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: leaq (%rbx,%r8), %r13 +; CHECK-NEXT: addq %r13, %r13 +; CHECK-NEXT: addq %r12, %r13 +; CHECK-NEXT: movq X(%rip), %rdx +; CHECK-NEXT: addq %r9, %rcx +; CHECK-NEXT: addq %rcx, %r10 +; CHECK-NEXT: addq %r12, %r13 +; CHECK-NEXT: addq %rcx, %r13 +; CHECK-NEXT: bswapq %rdx +; CHECK-NEXT: leaq (%r9,%r15), %rcx +; CHECK-NEXT: addq %r10, %rcx +; CHECK-NEXT: addq %rax, %rdx +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: leaq (%r12,%rbx), %r14 ; CHECK-NEXT: addq %r14, %r14 -; CHECK-NEXT: addq %r11, %r14 +; CHECK-NEXT: addq %r13, %r14 ; CHECK-NEXT: movq X(%rip), %rax -; CHECK-NEXT: addq %r8, %rbx -; CHECK-NEXT: addq %rbx, %rdx -; CHECK-NEXT: addq %r11, %r14 -; CHECK-NEXT: addq %rbx, %r14 +; CHECK-NEXT: addq %r10, %rdx +; CHECK-NEXT: addq %rdx, %r8 +; CHECK-NEXT: addq %r13, %r14 +; CHECK-NEXT: addq %rdx, %r14 ; CHECK-NEXT: bswapq %rax -; CHECK-NEXT: leaq (%r8,%r10), %rbx -; CHECK-NEXT: addq %rdx, %rbx -; CHECK-NEXT: addq %r9, %rax +; CHECK-NEXT: leaq (%r10,%r9), %rcx +; CHECK-NEXT: addq %r8, %rcx +; CHECK-NEXT: addq %r15, %rax +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: leaq (%r13,%r12), %r11 +; CHECK-NEXT: addq %r11, %r11 +; CHECK-NEXT: addq %r14, %r11 +; CHECK-NEXT: movq X(%rip), %rcx +; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: addq %r14, %r11 +; CHECK-NEXT: addq %rax, %r11 +; CHECK-NEXT: bswapq %rcx +; CHECK-NEXT: leaq (%r8,%r10), %rax ; CHECK-NEXT: addq %rbx, %rax -; CHECK-NEXT: leaq (%r11,%rdi), %r9 +; CHECK-NEXT: addq %r9, %rcx +; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: leaq (%r14,%r13), %r9 ; CHECK-NEXT: addq %r9, %r9 -; CHECK-NEXT: addq %r14, %r9 -; CHECK-NEXT: movq X(%rip), %rbx -; CHECK-NEXT: addq %rdx, %rax -; CHECK-NEXT: addq %rax, %rsi -; CHECK-NEXT: addq %r14, %r9 -; CHECK-NEXT: addq %rax, %r9 -; CHECK-NEXT: bswapq %rbx -; CHECK-NEXT: leaq (%rdx,%r8), %rax -; CHECK-NEXT: addq %rsi, %rax -; CHECK-NEXT: addq %r10, %rbx -; CHECK-NEXT: addq %rax, %rbx -; CHECK-NEXT: leaq (%r14,%r11), %r10 +; CHECK-NEXT: addq %r11, %r9 +; CHECK-NEXT: movq X(%rip), %rax +; CHECK-NEXT: addq %rbx, %rcx +; CHECK-NEXT: addq %rcx, %r12 +; CHECK-NEXT: addq %r11, %r9 +; CHECK-NEXT: addq %rcx, %r9 +; CHECK-NEXT: bswapq %rax +; CHECK-NEXT: leaq (%rbx,%r8), %rcx +; CHECK-NEXT: addq %r12, %rcx +; CHECK-NEXT: addq %r10, %rax +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: leaq (%r11,%r14), %r10 ; CHECK-NEXT: addq %r10, %r10 ; CHECK-NEXT: addq %r9, %r10 -; CHECK-NEXT: movq X(%rip), %rax -; CHECK-NEXT: addq %rsi, %rbx -; CHECK-NEXT: addq %rbx, %rdi +; CHECK-NEXT: movq X(%rip), %rsi +; CHECK-NEXT: addq %r12, %rax +; CHECK-NEXT: addq %rax, %r13 ; CHECK-NEXT: addq %r9, %r10 -; CHECK-NEXT: addq %rbx, %r10 +; CHECK-NEXT: addq %rax, %r10 +; CHECK-NEXT: bswapq %rsi +; CHECK-NEXT: leaq (%r12,%rbx), %rax +; CHECK-NEXT: addq %r13, %rax +; CHECK-NEXT: addq %r8, %rsi +; CHECK-NEXT: addq %rax, %rsi +; CHECK-NEXT: leaq (%r9,%r11), %rdx +; CHECK-NEXT: addq %rdx, %rdx +; CHECK-NEXT: addq %r10, %rdx +; CHECK-NEXT: movq X(%rip), %rax +; CHECK-NEXT: addq %r13, %rsi +; CHECK-NEXT: addq %rsi, %r14 +; CHECK-NEXT: addq %r10, %rdx +; CHECK-NEXT: addq %rsi, %rdx ; CHECK-NEXT: bswapq %rax -; CHECK-NEXT: leaq (%rsi,%rdx), %rbx -; CHECK-NEXT: addq %rdi, %rbx -; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: leaq (%r13,%r12), %rsi +; CHECK-NEXT: addq %r14, %rsi ; CHECK-NEXT: addq %rbx, %rax -; CHECK-NEXT: leaq (%r9,%r14), %r8 +; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: leaq (%r10,%r9), %r8 ; CHECK-NEXT: addq %r8, %r8 -; CHECK-NEXT: addq %r10, %r8 -; CHECK-NEXT: movq X(%rip), %rbx -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: addq %rdx, %r8 +; CHECK-NEXT: movq X(%rip), %rsi +; CHECK-NEXT: addq %r14, %rax ; CHECK-NEXT: addq %rax, %r11 -; CHECK-NEXT: addq %r10, %r8 +; CHECK-NEXT: addq %rdx, %r8 ; CHECK-NEXT: addq %rax, %r8 -; CHECK-NEXT: bswapq %rbx -; CHECK-NEXT: leaq (%rdi,%rsi), %rax +; CHECK-NEXT: bswapq %rsi +; CHECK-NEXT: leaq (%r14,%r13), %rax ; CHECK-NEXT: addq %r11, %rax -; CHECK-NEXT: addq %rdx, %rbx -; CHECK-NEXT: addq %rax, %rbx -; CHECK-NEXT: leaq (%r10,%r9), %r15 -; CHECK-NEXT: addq %r15, %r15 -; CHECK-NEXT: addq %r8, %r15 -; CHECK-NEXT: movq X(%rip), %rax -; CHECK-NEXT: addq %r11, %rbx -; CHECK-NEXT: addq %rbx, %r14 -; CHECK-NEXT: addq %r8, %r15 -; CHECK-NEXT: addq %rbx, %r15 -; CHECK-NEXT: bswapq %rax -; CHECK-NEXT: leaq (%r11,%rdi), %rbx -; CHECK-NEXT: addq %r14, %rbx +; CHECK-NEXT: addq %r12, %rsi +; CHECK-NEXT: addq %rax, %rsi +; CHECK-NEXT: leaq (%rdx,%r10), %rax +; CHECK-NEXT: addq %rax, %rax +; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: movq X(%rip), %rdi +; CHECK-NEXT: addq %r11, %rsi +; CHECK-NEXT: addq %rsi, %r9 +; CHECK-NEXT: addq %r8, %rax ; CHECK-NEXT: addq %rsi, %rax -; CHECK-NEXT: addq %rbx, %rax -; CHECK-NEXT: leaq (%r8,%r10), %rsi +; CHECK-NEXT: bswapq %rdi +; CHECK-NEXT: leaq (%r11,%r14), %rsi +; CHECK-NEXT: addq %r9, %rsi +; CHECK-NEXT: addq %r13, %rdi +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: leaq (%r8,%rdx), %rsi ; CHECK-NEXT: addq %rsi, %rsi -; CHECK-NEXT: addq %r15, %rsi -; CHECK-NEXT: movq X(%rip), %rbx -; CHECK-NEXT: addq %r14, %rax -; CHECK-NEXT: addq %rax, %r9 -; CHECK-NEXT: addq %r15, %rsi ; CHECK-NEXT: addq %rax, %rsi -; CHECK-NEXT: bswapq %rbx -; CHECK-NEXT: leaq (%r14,%r11), %rax -; CHECK-NEXT: addq %r9, %rax -; CHECK-NEXT: addq %rdi, %rbx -; CHECK-NEXT: addq %rax, %rbx -; CHECK-NEXT: leaq (%r15,%r8), %r12 -; CHECK-NEXT: addq %r12, %r12 -; CHECK-NEXT: addq %rsi, %r12 ; CHECK-NEXT: movq X(%rip), %rcx -; CHECK-NEXT: addq %r9, %rbx -; CHECK-NEXT: addq %rbx, %r10 -; CHECK-NEXT: addq %rsi, %r12 -; CHECK-NEXT: addq %rbx, %r12 +; CHECK-NEXT: addq %r9, %rdi +; CHECK-NEXT: addq %rdi, %r10 +; CHECK-NEXT: addq %rax, %rsi +; CHECK-NEXT: addq %rdi, %rsi ; CHECK-NEXT: bswapq %rcx -; CHECK-NEXT: leaq (%r9,%r14), %rax -; CHECK-NEXT: addq %r10, %rax -; CHECK-NEXT: addq %r11, %rcx -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: leaq (%rsi,%r15), %rax -; CHECK-NEXT: addq %rax, %rax -; CHECK-NEXT: addq %r12, %rax -; CHECK-NEXT: movq X(%rip), %rbx +; CHECK-NEXT: leaq (%r9,%r11), %rdi +; CHECK-NEXT: addq %r10, %rdi +; CHECK-NEXT: addq %r14, %rcx +; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: leaq (%rax,%r8), %rdi +; CHECK-NEXT: addq %rdi, %rdi +; CHECK-NEXT: addq %rsi, %rdi ; CHECK-NEXT: addq %r10, %rcx -; CHECK-NEXT: addq %rcx, %r8 -; CHECK-NEXT: addq %r12, %rax -; CHECK-NEXT: addq %rcx, %rax -; CHECK-NEXT: bswapq %rbx -; CHECK-NEXT: leaq (%r10,%r9), %rcx -; CHECK-NEXT: addq %r8, %rcx -; CHECK-NEXT: addq %r14, %rbx -; CHECK-NEXT: addq %rcx, %rbx -; CHECK-NEXT: leaq (%r12,%rsi), %rcx -; CHECK-NEXT: addq %rcx, %rcx -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: movq X(%rip), %rdx -; CHECK-NEXT: addq %r8, %rbx -; CHECK-NEXT: addq %rbx, %r15 -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: addq %rbx, %rcx -; CHECK-NEXT: bswapq %rdx -; CHECK-NEXT: leaq (%r8,%r10), %rbx -; CHECK-NEXT: addq %r15, %rbx -; CHECK-NEXT: addq %r9, %rdx -; CHECK-NEXT: addq %rbx, %rdx -; CHECK-NEXT: leaq (%rax,%r12), %rbx -; CHECK-NEXT: addq %rbx, %rbx -; CHECK-NEXT: addq %rcx, %rbx -; CHECK-NEXT: addq %r15, %rdx -; CHECK-NEXT: addq %rdx, %rsi -; CHECK-NEXT: addq %rcx, %rbx -; CHECK-NEXT: addq %rdx, %rbx -; CHECK-NEXT: movq X(%rip), %rdx -; CHECK-NEXT: bswapq %rdx -; CHECK-NEXT: addq %r10, %rdx -; CHECK-NEXT: leaq (%r15,%r8), %rdi +; CHECK-NEXT: addq %rcx, %rdx ; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: addq %rdi, %rdx -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: addq %rcx, %rcx -; CHECK-NEXT: addq %rbx, %rcx +; CHECK-NEXT: addq %rcx, %rdi +; CHECK-NEXT: movq X(%rip), %rcx +; CHECK-NEXT: bswapq %rcx +; CHECK-NEXT: addq %r11, %rcx +; CHECK-NEXT: leaq (%r10,%r9), %rbx +; CHECK-NEXT: addq %rdx, %rbx ; CHECK-NEXT: addq %rbx, %rcx -; CHECK-NEXT: addq %rsi, %rdx -; CHECK-NEXT: addq %rdx, %r12 +; CHECK-NEXT: addq %rax, %rsi +; CHECK-NEXT: addq %rsi, %rsi +; CHECK-NEXT: addq %rdi, %rsi +; CHECK-NEXT: addq %rdi, %rsi ; CHECK-NEXT: addq %rdx, %rcx +; CHECK-NEXT: addq %rcx, %r8 +; CHECK-NEXT: addq %rcx, %rsi ; CHECK-NEXT: movq X(%rip), %rax ; CHECK-NEXT: bswapq %rax -; CHECK-NEXT: addq %r15, %rsi +; CHECK-NEXT: addq %r10, %rdx ; CHECK-NEXT: movq %rax, X(%rip) +; CHECK-NEXT: addq %r9, %rax +; CHECK-NEXT: addq %r8, %rdx +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq %r8, %rax -; CHECK-NEXT: addq %r12, %rsi ; CHECK-NEXT: addq %rsi, %rax -; CHECK-NEXT: addq %r12, %rax -; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll --- a/llvm/test/CodeGen/X86/ctpop-combine.ll +++ b/llvm/test/CodeGen/X86/ctpop-combine.ll @@ -162,33 +162,29 @@ define i1 @ctpop_trunc_non_power2(i255 %x) nounwind { ; CHECK-LABEL: ctpop_trunc_non_power2: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movabsq $9223372036854775807, %r8 # imm = 0x7FFFFFFFFFFFFFFF ; CHECK-NEXT: movq %rcx, %r9 ; CHECK-NEXT: andq %r8, %r9 -; CHECK-NEXT: movq %rdi, %r11 -; CHECK-NEXT: addq $-1, %r11 -; CHECK-NEXT: movq %rsi, %r10 -; CHECK-NEXT: adcq $-1, %r10 -; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: movq %rdi, %r10 +; CHECK-NEXT: addq $-1, %r10 +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: adcq $-1, %rax -; CHECK-NEXT: movq %rcx, %rbx -; CHECK-NEXT: adcq %r8, %rbx -; CHECK-NEXT: andq %rdi, %r11 -; CHECK-NEXT: andq %rdx, %rax +; CHECK-NEXT: movq %rdx, %r11 +; CHECK-NEXT: adcq $-1, %r11 +; CHECK-NEXT: adcq %r8, %rcx +; CHECK-NEXT: andq %rdi, %r10 +; CHECK-NEXT: andq %rdx, %r11 +; CHECK-NEXT: orq %r10, %r11 +; CHECK-NEXT: andq %r9, %rcx +; CHECK-NEXT: andq %rsi, %rax +; CHECK-NEXT: orq %rcx, %rax ; CHECK-NEXT: orq %r11, %rax -; CHECK-NEXT: andq %rsi, %r10 -; CHECK-NEXT: andq %r8, %rbx -; CHECK-NEXT: andq %rcx, %rbx -; CHECK-NEXT: orq %r10, %rbx -; CHECK-NEXT: orq %rax, %rbx ; CHECK-NEXT: sete %cl ; CHECK-NEXT: orq %rdx, %rdi ; CHECK-NEXT: orq %rsi, %r9 ; CHECK-NEXT: orq %rdi, %r9 ; CHECK-NEXT: setne %al ; CHECK-NEXT: andb %cl, %al -; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq %a = call i255 @llvm.ctpop.i255(i255 %x) %b = trunc i255 %a to i8 ; largest value from ctpop is 255, fits in 8 bits.