diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1487,12 +1487,13 @@ if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr || Opc == X86::TEST32rr || Opc == X86::TEST64rr) && N->getOperand(0) == N->getOperand(1) && - N->isOnlyUserOf(N->getOperand(0).getNode()) && + N->getOperand(0)->hasNUsesOfValue(2, N->getOperand(0).getResNo()) && N->getOperand(0).isMachineOpcode()) { SDValue And = N->getOperand(0); unsigned N0Opc = And.getMachineOpcode(); - if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr || - N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) { + if ((N0Opc == X86::AND8rr || N0Opc == X86::AND16rr || + N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) && + !And->hasAnyUseOfValue(1)) { MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i32, And.getOperand(0), @@ -1501,8 +1502,9 @@ MadeChange = true; continue; } - if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm || - N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) { + if ((N0Opc == X86::AND8rm || N0Opc == X86::AND16rm || + N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) && + !And->hasAnyUseOfValue(1)) { unsigned NewOpc; switch (N0Opc) { case X86::AND8rm: NewOpc = X86::TEST8mr; break; @@ -1523,7 +1525,8 @@ MVT::i32, MVT::Other, Ops); CurDAG->setNodeMemRefs( Test, cast(And.getNode())->memoperands()); - ReplaceUses(N, Test); + ReplaceUses(And.getValue(2), SDValue(Test, 1)); + ReplaceUses(SDValue(N, 0), SDValue(Test, 0)); MadeChange = true; continue; } diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll --- a/llvm/test/CodeGen/X86/cmp.ll +++ b/llvm/test/CodeGen/X86/cmp.ll @@ -759,14 +759,13 @@ declare i32 @g() declare i32 @f() -; FIXME: We should use a test from memory here instead of a load+and.i -; The store makes sure the chain result of the load is used which prevents the -; post isel peephole from catching this. +; Make sure we fold the load+and into a test from memory. +; The store makes sure the chain result of the load is used which used to +; prevent the post isel peephole from catching this. define i1 @fold_test_and_with_chain(i32* %x, i32* %y, i32 %z) { ; CHECK-LABEL: fold_test_and_with_chain: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] -; CHECK-NEXT: andl %edx, %eax # encoding: [0x21,0xd0] +; CHECK-NEXT: testl %edx, (%rdi) # encoding: [0x85,0x17] ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] ; CHECK-NEXT: movl %edx, (%rsi) # encoding: [0x89,0x16] ; CHECK-NEXT: retq # encoding: [0xc3]