Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -2664,6 +2664,8 @@ /// is created but not inserted into any basic blocks, and this method is /// called to expand it into a sequence of instructions, potentially also /// creating new basic blocks and control flow. + /// As long as the returned basic block is different (i.e., we created a new + /// one), the custom inserter is free to modify the rest of \p MBB. virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -18108,6 +18108,30 @@ // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); + + // We also lower double CMOVs: + // (CMOV (CMOV F, T, cc1), T, cc2) + // to two successives branches. This lets us avoid inserting a PHI between + // them, which would result in needless copies. + MachineBasicBlock::iterator InstrIt = MI; + ++InstrIt; + assert(InstrIt != BB->end() && + "Basic block doesn't have a terminator, last instruction is CMOV!"); + MachineInstr *NextMI = &*InstrIt; + bool is2CMOV = + (NextMI->getOpcode() == MI->getOpcode() && + NextMI->getOperand(2).getReg() == MI->getOperand(2).getReg() && + NextMI->getOperand(1).getReg() == MI->getOperand(0).getReg()); + MachineBasicBlock *jcc1MBB = nullptr; + + // If we have a double CMOV, we lower it to two successive branches to + // the same block. EFLAGS is used by both, so mark it as live in the second. + if (is2CMOV) { + jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, jcc1MBB); + jcc1MBB->addLiveIn(X86::EFLAGS); + } + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, copy0MBB); @@ -18116,8 +18140,11 @@ // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); - if (!MI->killsRegister(X86::EFLAGS) && - !checkAndUpdateEFLAGSKill(MI, BB, TRI)) { + auto MIKillsEFLAGS = [BB, TRI](MachineInstr *MI) { + return (MI->killsRegister(X86::EFLAGS) || + checkAndUpdateEFLAGSKill(MI, BB, TRI)); + }; + if (!MIKillsEFLAGS(is2CMOV ? NextMI : MI)) { copy0MBB->addLiveIn(X86::EFLAGS); sinkMBB->addLiveIn(X86::EFLAGS); } @@ -18128,7 +18155,19 @@ sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); + if (is2CMOV) { + // The fallthrough block may be jcc1MBB, if we have a double CMOV. + BB->addSuccessor(jcc1MBB); + + // In that case, jcc1MBB will itself fallthrough the copy0MBB, and + // jump to the sinkMBB. + jcc1MBB->addSuccessor(copy0MBB); + jcc1MBB->addSuccessor(sinkMBB); + } else { + BB->addSuccessor(copy0MBB); + } + + // The true block target of the first (or only) branch is always sinkMBB. BB->addSuccessor(sinkMBB); // Create the conditional branch instruction. @@ -18136,6 +18175,12 @@ X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); + if (is2CMOV) { + unsigned Opc2 = + X86::GetCondBranchFromCond((X86::CondCode)NextMI->getOperand(3).getImm()); + BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB); + } + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -18144,10 +18189,22 @@ // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... - BuildMI(*sinkMBB, sinkMBB->begin(), DL, - TII->get(X86::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + MachineInstrBuilder MIB = + BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), + MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + + // If this was double CMOV, the second Jcc provides the same incoming + // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes). + if (is2CMOV) { + MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB); + // Copy the PHI result to the register defined by the second CMOV. + BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), + DL, TII->get(TargetOpcode::COPY), + NextMI->getOperand(0).getReg()).addReg(MI->getOperand(0).getReg()); + NextMI->eraseFromParent(); + } MI->eraseFromParent(); // The pseudo instruction is gone now. return sinkMBB; Index: test/CodeGen/X86/cmovcmov.ll =================================================================== --- test/CodeGen/X86/cmovcmov.ll +++ test/CodeGen/X86/cmovcmov.ll @@ -15,21 +15,18 @@ ; CMOV-NEXT: movl %edi, %eax ; CMOV-NEXT: retq -; NOCMOV-NEXT: flds 8(%esp) -; NOCMOV-NEXT: flds 4(%esp) -; NOCMOV-NEXT: fucompp -; NOCMOV-NEXT: fnstsw %ax -; NOCMOV-NEXT: sahf -; NOCMOV-NEXT: leal 16(%esp), %eax -; NOCMOV-NEXT: movl %eax, %ecx -; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] -; NOCMOV-NEXT: leal 12(%esp), %ecx -; NOCMOV-NEXT: [[TBB1]]: -; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; NOCMOV-NEXT: movl %ecx, %eax -; NOCMOV-NEXT: [[TBB2]]: -; NOCMOV-NEXT: movl (%eax), %eax -; NOCMOV-NEXT: retl +; NOCMOV-NEXT: flds 8(%esp) +; NOCMOV-NEXT: flds 4(%esp) +; NOCMOV-NEXT: fucompp +; NOCMOV-NEXT: fnstsw %ax +; NOCMOV-NEXT: sahf +; NOCMOV-NEXT: leal 16(%esp), %eax +; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; NOCMOV-NEXT: jp [[TBB]] +; NOCMOV-NEXT: leal 12(%esp), %eax +; NOCMOV-NEXT:[[TBB]]: +; NOCMOV-NEXT: movl (%eax), %eax +; NOCMOV-NEXT: retl define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 { entry: %cmp = fcmp oeq float %a, %b @@ -51,13 +48,10 @@ ; NOCMOV-NEXT: fnstsw %ax ; NOCMOV-NEXT: sahf ; NOCMOV-NEXT: leal 20(%esp), %ecx -; NOCMOV-NEXT: movl %ecx, %eax -; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] -; NOCMOV-NEXT: leal 12(%esp), %eax -; NOCMOV-NEXT: [[TBB1]]: -; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; NOCMOV-NEXT: movl %eax, %ecx -; NOCMOV-NEXT: [[TBB2]]: +; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; NOCMOV-NEXT: jp [[TBB]] +; NOCMOV-NEXT: leal 12(%esp), %ecx +; NOCMOV-NEXT: [[TBB]]: ; NOCMOV-NEXT: movl (%ecx), %eax ; NOCMOV-NEXT: orl $4, %ecx ; NOCMOV-NEXT: movl (%ecx), %edx @@ -83,13 +77,10 @@ ; NOCMOV-NEXT: fnstsw %ax ; NOCMOV-NEXT: sahf ; NOCMOV-NEXT: leal 12(%esp), %ecx -; NOCMOV-NEXT: movl %ecx, %eax -; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] -; NOCMOV-NEXT: leal 20(%esp), %eax -; NOCMOV-NEXT: [[TBB1]]: -; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; NOCMOV-NEXT: movl %eax, %ecx -; NOCMOV-NEXT: [[TBB2]]: +; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; NOCMOV-NEXT: jp [[TBB]] +; NOCMOV-NEXT: leal 20(%esp), %ecx +; NOCMOV-NEXT: [[TBB]]: ; NOCMOV-NEXT: movl (%ecx), %eax ; NOCMOV-NEXT: orl $4, %ecx ; NOCMOV-NEXT: movl (%ecx), %edx @@ -104,13 +95,10 @@ ; CHECK-LABEL: test_select_fcmp_oeq_f64: ; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: movaps %xmm3, %xmm0 -; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] -; CMOV-NEXT: movaps %xmm2, %xmm0 -; CMOV-NEXT: [[TBB1]]: -; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; CMOV-NEXT: movaps %xmm0, %xmm3 -; CMOV-NEXT: [[TBB2]]: +; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; CMOV-NEXT: jp [[TBB]] +; CMOV-NEXT: movaps %xmm2, %xmm3 +; CMOV-NEXT: [[TBB]]: ; CMOV-NEXT: movaps %xmm3, %xmm0 ; CMOV-NEXT: retq @@ -120,13 +108,10 @@ ; NOCMOV-NEXT: fnstsw %ax ; NOCMOV-NEXT: sahf ; NOCMOV-NEXT: leal 20(%esp), %eax -; NOCMOV-NEXT: movl %eax, %ecx -; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] -; NOCMOV-NEXT: leal 12(%esp), %ecx -; NOCMOV-NEXT: [[TBB1]]: -; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; NOCMOV-NEXT: movl %ecx, %eax -; NOCMOV-NEXT: [[TBB2]]: +; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; NOCMOV-NEXT: jp [[TBB]] +; NOCMOV-NEXT: leal 12(%esp), %eax +; NOCMOV-NEXT: [[TBB]]: ; NOCMOV-NEXT: fldl (%eax) ; NOCMOV-NEXT: retl define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 { @@ -139,68 +124,51 @@ ; CHECK-LABEL: test_select_fcmp_oeq_v4i32: ; CMOV-NEXT: ucomiss %xmm1, %xmm0 -; CMOV-NEXT: movaps %xmm3, %xmm0 -; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] -; CMOV-NEXT: movaps %xmm2, %xmm0 -; CMOV-NEXT: [[TBB1]]: -; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; CMOV-NEXT: movaps %xmm0, %xmm3 -; CMOV-NEXT: [[TBB2]]: +; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; CMOV-NEXT: jp [[TBB]] +; CMOV-NEXT: movaps %xmm2, %xmm3 +; CMOV-NEXT: [[TBB]]: ; CMOV-NEXT: movaps %xmm3, %xmm0 ; CMOV-NEXT: retq -; NOCMOV-NEXT: pushl %ebx ; NOCMOV-NEXT: pushl %edi ; NOCMOV-NEXT: pushl %esi -; NOCMOV-NEXT: flds 24(%esp) ; NOCMOV-NEXT: flds 20(%esp) +; NOCMOV-NEXT: flds 16(%esp) ; NOCMOV-NEXT: fucompp ; NOCMOV-NEXT: fnstsw %ax ; NOCMOV-NEXT: sahf -; NOCMOV-NEXT: leal 44(%esp), %eax -; NOCMOV-NEXT: movl %eax, %ecx -; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] +; NOCMOV-NEXT: leal 40(%esp), %eax +; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; NOCMOV-NEXT: jp [[TBB]] +; NOCMOV-NEXT: leal 24(%esp), %eax +; NOCMOV-NEXT: [[TBB]]: +; NOCMOV-NEXT: movl (%eax), %eax +; NOCMOV-NEXT: leal 44(%esp), %ecx +; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; NOCMOV-NEXT: jp [[TBB]] ; NOCMOV-NEXT: leal 28(%esp), %ecx -; NOCMOV-NEXT: [[TBB1]]: -; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; NOCMOV-NEXT: movl %ecx, %eax -; NOCMOV-NEXT: [[TBB2]]: -; NOCMOV-NEXT: movl (%eax), %eax -; NOCMOV-NEXT: leal 48(%esp), %ecx -; NOCMOV-NEXT: movl %ecx, %edx -; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] -; NOCMOV-NEXT: leal 32(%esp), %edx -; NOCMOV-NEXT: [[TBB1]]: -; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; NOCMOV-NEXT: movl %edx, %ecx -; NOCMOV-NEXT: [[TBB2]]: +; NOCMOV-NEXT: [[TBB]]: ; NOCMOV-NEXT: movl (%ecx), %ecx -; NOCMOV-NEXT: leal 52(%esp), %edx -; NOCMOV-NEXT: movl %edx, %esi -; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] -; NOCMOV-NEXT: leal 36(%esp), %esi -; NOCMOV-NEXT: [[TBB1]]: -; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; NOCMOV-NEXT: movl %esi, %edx -; NOCMOV-NEXT: [[TBB2]]: -; NOCMOV-NEXT: movl (%edx), %edx -; NOCMOV-NEXT: leal 56(%esp), %esi -; NOCMOV-NEXT: movl %esi, %ebx -; NOCMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] -; NOCMOV-NEXT: leal 40(%esp), %ebx -; NOCMOV-NEXT: [[TBB1]]: -; NOCMOV-NEXT: movl 16(%esp), %edi -; NOCMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; NOCMOV-NEXT: movl %ebx, %esi -; NOCMOV-NEXT: [[TBB2]]: +; NOCMOV-NEXT: leal 48(%esp), %esi +; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; NOCMOV-NEXT: jp [[TBB]] +; NOCMOV-NEXT: leal 32(%esp), %esi +; NOCMOV-NEXT: [[TBB]]: +; NOCMOV-NEXT: movl 12(%esp), %edx ; NOCMOV-NEXT: movl (%esi), %esi -; NOCMOV-NEXT: movl %esi, 12(%edi) -; NOCMOV-NEXT: movl %edx, 8(%edi) -; NOCMOV-NEXT: movl %ecx, 4(%edi) -; NOCMOV-NEXT: movl %eax, (%edi) +; NOCMOV-NEXT: leal 52(%esp), %edi +; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; NOCMOV-NEXT: jp [[TBB]] +; NOCMOV-NEXT: leal 36(%esp), %edi +; NOCMOV-NEXT: [[TBB]]: +; NOCMOV-NEXT: movl (%edi), %edi +; NOCMOV-NEXT: movl %edi, 12(%edx) +; NOCMOV-NEXT: movl %esi, 8(%edx) +; NOCMOV-NEXT: movl %ecx, 4(%edx) +; NOCMOV-NEXT: movl %eax, (%edx) ; NOCMOV-NEXT: popl %esi ; NOCMOV-NEXT: popl %edi -; NOCMOV-NEXT: popl %ebx ; NOCMOV-NEXT: retl $4 define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 { entry: @@ -217,17 +185,14 @@ ; CHECK-LABEL: test_zext_fcmp_une: ; CMOV-NEXT: ucomiss %xmm1, %xmm0 ; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0 -; CMOV-NEXT: movaps %xmm0, %xmm1 -; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] -; CMOV-NEXT: xorps %xmm1, %xmm1 -; CMOV-NEXT: [[TBB1]]: -; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; CMOV-NEXT: movaps %xmm1, %xmm0 -; CMOV-NEXT: [[TBB2]]: +; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; CMOV-NEXT: jp [[TBB]] +; CMOV-NEXT: xorps %xmm0, %xmm0 +; CMOV-NEXT: [[TBB]]: ; CMOV-NEXT: retq -; NOCMOV: jne -; NOCMOV: jp +; NOCMOV: jne +; NOCMOV-NEXT: jp define float @test_zext_fcmp_une(float %a, float %b) #0 { entry: %cmp = fcmp une float %a, %b @@ -242,17 +207,14 @@ ; CHECK-LABEL: test_zext_fcmp_oeq: ; CMOV-NEXT: ucomiss %xmm1, %xmm0 ; CMOV-NEXT: xorps %xmm0, %xmm0 -; CMOV-NEXT: xorps %xmm1, %xmm1 -; CMOV-NEXT: jne [[TBB1:.LBB[0-9_]+]] -; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm1 -; CMOV-NEXT: [[TBB1]]: -; CMOV-NEXT: jp [[TBB2:.LBB[0-9_]+]] -; CMOV-NEXT: movaps %xmm1, %xmm0 -; CMOV-NEXT: [[TBB2]]: +; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] +; CMOV-NEXT: jp [[TBB]] +; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0 +; CMOV-NEXT: [[TBB]]: ; CMOV-NEXT: retq -; NOCMOV: jne -; NOCMOV: jp +; NOCMOV: jne +; NOCMOV-NEXT: jp define float @test_zext_fcmp_oeq(float %a, float %b) #0 { entry: %cmp = fcmp oeq float %a, %b