Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -1286,6 +1286,10 @@ EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, + MachineInstr &MI2, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, MachineBasicBlock *BB) const; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -25567,65 +25567,76 @@ } } -MachineBasicBlock * -X86TargetLowering::EmitLoweredSelect(MachineInstr &MI, - MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); +// Helper function, which inserts PHI functions into SinkMBB: +// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], +// where %FalseValue(i) and %TrueValue(i) are taken from the consequent CMOVs +// in [MIItBegin, MIItEnd) range. It returns the last MachineInstrBuilder for +// the last PHI function inserted. +static MachineInstrBuilder createPHIsForCMOVsInSinkBB( + MachineBasicBlock::iterator MIItBegin, MachineBasicBlock::iterator MIItEnd, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, + MachineBasicBlock *SinkMBB) { + MachineFunction *MF = TrueMBB->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + DebugLoc DL = MIItBegin->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = ++BB->getIterator(); + X86::CondCode CC = X86::CondCode(MIItBegin->getOperand(3).getImm()); + X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); - // thisMBB: - // ... - // TrueVal = ... - // cmpTY ccX, r1, r2 - // bCC copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); + MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); - // This code lowers all pseudo-CMOV instructions. Generally it lowers these - // as described above, by inserting a BB, and then making a PHI at the join - // point to select the true and false operands of the CMOV in the PHI. - // - // The code also handles two different cases of multiple CMOV opcodes - // in a row. - // - // Case 1: - // In this case, there are multiple CMOVs in a row, all which are based on - // the same condition setting (or the exact opposite condition setting). - // In this case we can lower all the CMOVs using a single inserted BB, and - // then make a number of PHIs at the join point to model the CMOVs. The only - // trickiness here, is that in a case like: - // - // t2 = CMOV cond1 t1, f1 - // t3 = CMOV cond1 t2, f2 - // - // when rewriting this into PHIs, we have to perform some renaming on the - // temps since you cannot have a PHI operand refer to a PHI result earlier - // in the same block. The "simple" but wrong lowering would be: - // - // t2 = PHI t1(BB1), f1(BB2) - // t3 = PHI t2(BB1), f2(BB2) - // - // but clearly t2 is not defined in BB1, so that is incorrect. The proper - // renaming is to note that on the path through BB1, t2 is really just a - // copy of t1, and do that renaming, properly generating: - // - // t2 = PHI t1(BB1), f1(BB2) - // t3 = PHI t1(BB1), f2(BB2) - // - // Case 2, we lower cascaded CMOVs such as + // As we are creating the PHIs, we have to be careful if there is more than + // one. Later CMOVs may reference the results of earlier CMOVs, but later + // PHIs have to reference the individual true/false inputs from earlier PHIs. + // That also means that PHI construction must work forward from earlier to + // later, and that the code must maintain a mapping from earlier PHI's + // destination registers, and the registers that went into the PHI. + DenseMap> RegRewriteTable; + MachineInstrBuilder MIB; + + for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { + unsigned DestReg = MIIt->getOperand(0).getReg(); + unsigned Op1Reg = MIIt->getOperand(1).getReg(); + unsigned Op2Reg = MIIt->getOperand(2).getReg(); + + // If this CMOV we are generating is the opposite condition from + // the jump we generated, then we have to swap the operands for the + // PHI that is going to be generated. + if (MIIt->getOperand(3).getImm() == OppCC) + std::swap(Op1Reg, Op2Reg); + + if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end()) + Op1Reg = RegRewriteTable[Op1Reg].first; + + if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end()) + Op2Reg = RegRewriteTable[Op2Reg].second; + + MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg) + .addReg(Op1Reg) + .addMBB(FalseMBB) + .addReg(Op2Reg) + .addMBB(TrueMBB); + + // Add this PHI to the rewrite table. + RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); + } + + return MIB; +} + +// Lower cascaded selects in form of (SecondCmov (FirstCMOV F, T, cc1), T, cc2). +MachineBasicBlock * +X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV, + MachineInstr &SecondCascadedCMOV, + MachineBasicBlock *ThisMBB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = FirstCMOV.getDebugLoc(); + + // We lower cascaded CMOVs such as // - // (CMOV (CMOV F, T, cc1), T, cc2) + // (SecondCascadedCMOV (FirstCMOV F, T, cc1), T, cc2) // - // to two successive branches. For that, we look for another CMOV as the - // following instruction. + // to two successive branches. // // Without this, we would add a PHI between the two jumps, which ends up // creating a few copies all around. For instance, for @@ -25689,10 +25700,145 @@ // .LBB5_4: // retq // - MachineInstr *CascadedCMOV = nullptr; - MachineInstr *LastCMOV = &MI; + + // We lower cascaded CMOV into two successive branches to the same block. + // EFLAGS is used by both, so mark it as live in the second. + const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); + MachineFunction *F = ThisMBB->getParent(); + MachineBasicBlock *FirstInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SecondInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + + MachineFunction::iterator It = ++ThisMBB->getIterator(); + F->insert(It, FirstInsertedMBB); + F->insert(It, SecondInsertedMBB); + F->insert(It, SinkMBB); + + // For a cascaded CMOV, we lower it to two successive branches to + // the same block (SinkMBB). EFLAGS is used by both, so mark it as live in + // the FirstInsertedMBB. + FirstInsertedMBB->addLiveIn(X86::EFLAGS); + + // If the EFLAGS register isn't dead in the terminator, then claim that it's + // live into the sink and copy blocks. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + if (!SecondCascadedCMOV.killsRegister(X86::EFLAGS) && + !checkAndUpdateEFLAGSKill(SecondCascadedCMOV, ThisMBB, TRI)) { + SecondInsertedMBB->addLiveIn(X86::EFLAGS); + SinkMBB->addLiveIn(X86::EFLAGS); + } + + // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. + SinkMBB->splice(SinkMBB->begin(), ThisMBB, + std::next(MachineBasicBlock::iterator(FirstCMOV)), + ThisMBB->end()); + SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); + + // Fallthrough block for ThisMBB. + ThisMBB->addSuccessor(FirstInsertedMBB); + // The true block target of the first branch is always SinkMBB. + ThisMBB->addSuccessor(SinkMBB); + // Fallthrough block for FirstInsertedMBB. + FirstInsertedMBB->addSuccessor(SecondInsertedMBB); + // The true block for the branch of FirstInsertedMBB. + FirstInsertedMBB->addSuccessor(SinkMBB); + // This is fallthrough. + SecondInsertedMBB->addSuccessor(SinkMBB); + + // Create the conditional branch instructions. + X86::CondCode FirstCC = X86::CondCode(FirstCMOV.getOperand(3).getImm()); + unsigned Opc = X86::GetCondBranchFromCond(FirstCC); + BuildMI(ThisMBB, DL, TII->get(Opc)).addMBB(SinkMBB); + + X86::CondCode SecondCC = + X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm()); + unsigned Opc2 = X86::GetCondBranchFromCond(SecondCC); + BuildMI(FirstInsertedMBB, DL, TII->get(Opc2)).addMBB(SinkMBB); + + // SinkMBB: + // %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ] + unsigned DestReg = FirstCMOV.getOperand(0).getReg(); + unsigned Op1Reg = FirstCMOV.getOperand(1).getReg(); + unsigned Op2Reg = FirstCMOV.getOperand(2).getReg(); + MachineInstrBuilder MIB = + BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(X86::PHI), DestReg) + .addReg(Op1Reg) + .addMBB(SecondInsertedMBB) + .addReg(Op2Reg) + .addMBB(ThisMBB); + + // The second SecondInsertedMBB provides the same incoming value as the + // FirstInsertedMBB (the True operand of the SELECT_CC/CMOV nodes). + MIB.addReg(FirstCMOV.getOperand(2).getReg()).addMBB(FirstInsertedMBB); + // Copy the PHI result to the register defined by the second CMOV. + BuildMI(*SinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), DL, + TII->get(TargetOpcode::COPY), + SecondCascadedCMOV.getOperand(0).getReg()) + .addReg(FirstCMOV.getOperand(0).getReg()); + + // Now remove the CMOVs. + FirstCMOV.eraseFromParent(); + SecondCascadedCMOV.eraseFromParent(); + + return SinkMBB; +} + +MachineBasicBlock * +X86TargetLowering::EmitLoweredSelect(MachineInstr &MI, + MachineBasicBlock *ThisMBB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between and a branch opcode to use. + + // ThisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // bCC copy1MBB + // fallthrough --> FalseMBB + + // This code lowers all pseudo-CMOV instructions. Generally it lowers these + // as described above, by inserting a BB, and then making a PHI at the join + // point to select the true and false operands of the CMOV in the PHI. + // + // The code also handles two different cases of multiple CMOV opcodes + // in a row. + // + // Case 1: + // In this case, there are multiple CMOVs in a row, all which are based on + // the same condition setting (or the exact opposite condition setting). + // In this case we can lower all the CMOVs using a single inserted BB, and + // then make a number of PHIs at the join point to model the CMOVs. The only + // trickiness here, is that in a case like: + // + // t2 = CMOV cond1 t1, f1 + // t3 = CMOV cond1 t2, f2 + // + // when rewriting this into PHIs, we have to perform some renaming on the + // temps since you cannot have a PHI operand refer to a PHI result earlier + // in the same block. The "simple" but wrong lowering would be: + // + // t2 = PHI t1(BB1), f1(BB2) + // t3 = PHI t2(BB1), f2(BB2) + // + // but clearly t2 is not defined in BB1, so that is incorrect. The proper + // renaming is to note that on the path through BB1, t2 is really just a + // copy of t1, and do that renaming, properly generating: + // + // t2 = PHI t1(BB1), f1(BB2) + // t3 = PHI t1(BB1), f2(BB2) + // + // Case 2: + // CMOV ((CMOV F, T, cc1), T, cc2) is checked here and handled by a separate + // function - EmitLoweredCascadedSelect. + X86::CondCode CC = X86::CondCode(MI.getOperand(3).getImm()); X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); + MachineInstr *LastCMOV = &MI; MachineBasicBlock::iterator NextMIIt = std::next(MachineBasicBlock::iterator(MI)); @@ -25702,7 +25848,7 @@ if (isCMOVPseudo(MI)) { // See if we have a string of CMOVS with the same condition. - while (NextMIIt != BB->end() && isCMOVPseudo(*NextMIIt) && + while (NextMIIt != ThisMBB->end() && isCMOVPseudo(*NextMIIt) && (NextMIIt->getOperand(3).getImm() == CC || NextMIIt->getOperand(3).getImm() == OppCC)) { LastCMOV = &*NextMIIt; @@ -25712,136 +25858,61 @@ // This checks for case 2, but only do this if we didn't already find // case 1, as indicated by LastCMOV == MI. - if (LastCMOV == &MI && NextMIIt != BB->end() && + if (LastCMOV == &MI && NextMIIt != ThisMBB->end() && NextMIIt->getOpcode() == MI.getOpcode() && NextMIIt->getOperand(2).getReg() == MI.getOperand(2).getReg() && NextMIIt->getOperand(1).getReg() == MI.getOperand(0).getReg() && NextMIIt->getOperand(1).isKill()) { - CascadedCMOV = &*NextMIIt; + return EmitLoweredCascadedSelect(MI, *NextMIIt, ThisMBB); } - MachineBasicBlock *jcc1MBB = nullptr; + const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); + MachineFunction *F = ThisMBB->getParent(); + MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - // If we have a cascaded CMOV, we lower it to two successive branches to - // the same block. EFLAGS is used by both, so mark it as live in the second. - if (CascadedCMOV) { - jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, jcc1MBB); - jcc1MBB->addLiveIn(X86::EFLAGS); - } - - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); + MachineFunction::iterator It = ++ThisMBB->getIterator(); + F->insert(It, FalseMBB); + F->insert(It, SinkMBB); // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - - MachineInstr *LastEFLAGSUser = CascadedCMOV ? CascadedCMOV : LastCMOV; - if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) && - !checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) { - copy0MBB->addLiveIn(X86::EFLAGS); - sinkMBB->addLiveIn(X86::EFLAGS); - } - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(LastCMOV)), BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Add the true and fallthrough blocks as its successors. - if (CascadedCMOV) { - // The fallthrough block may be jcc1MBB, if we have a cascaded CMOV. - BB->addSuccessor(jcc1MBB); - - // In that case, jcc1MBB will itself fallthrough the copy0MBB, and - // jump to the sinkMBB. - jcc1MBB->addSuccessor(copy0MBB); - jcc1MBB->addSuccessor(sinkMBB); - } else { - BB->addSuccessor(copy0MBB); - } - - // The true block target of the first (or only) branch is always sinkMBB. - BB->addSuccessor(sinkMBB); + if (!LastCMOV->killsRegister(X86::EFLAGS) && + !checkAndUpdateEFLAGSKill(LastCMOV, ThisMBB, TRI)) { + FalseMBB->addLiveIn(X86::EFLAGS); + SinkMBB->addLiveIn(X86::EFLAGS); + } + + // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. + SinkMBB->splice(SinkMBB->begin(), ThisMBB, + std::next(MachineBasicBlock::iterator(LastCMOV)), + ThisMBB->end()); + SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); + + // Fallthrough block for ThisMBB. + ThisMBB->addSuccessor(FalseMBB); + // The true block target of the first (or only) branch is always a SinkMBB. + ThisMBB->addSuccessor(SinkMBB); + // Fallthrough block for FalseMBB. + FalseMBB->addSuccessor(SinkMBB); // Create the conditional branch instruction. unsigned Opc = X86::GetCondBranchFromCond(CC); - BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); - - if (CascadedCMOV) { - unsigned Opc2 = X86::GetCondBranchFromCond( - (X86::CondCode)CascadedCMOV->getOperand(3).getImm()); - BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB); - } - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - copy0MBB->addSuccessor(sinkMBB); + BuildMI(ThisMBB, DL, TII->get(Opc)).addMBB(SinkMBB); - // sinkMBB: - // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // SinkMBB: + // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, ThisMBB ] // ... MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); MachineBasicBlock::iterator MIItEnd = - std::next(MachineBasicBlock::iterator(LastCMOV)); - MachineBasicBlock::iterator SinkInsertionPoint = sinkMBB->begin(); - DenseMap> RegRewriteTable; - MachineInstrBuilder MIB; - - // As we are creating the PHIs, we have to be careful if there is more than - // one. Later CMOVs may reference the results of earlier CMOVs, but later - // PHIs have to reference the individual true/false inputs from earlier PHIs. - // That also means that PHI construction must work forward from earlier to - // later, and that the code must maintain a mapping from earlier PHI's - // destination registers, and the registers that went into the PHI. - - for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { - unsigned DestReg = MIIt->getOperand(0).getReg(); - unsigned Op1Reg = MIIt->getOperand(1).getReg(); - unsigned Op2Reg = MIIt->getOperand(2).getReg(); - - // If this CMOV we are generating is the opposite condition from - // the jump we generated, then we have to swap the operands for the - // PHI that is going to be generated. - if (MIIt->getOperand(3).getImm() == OppCC) - std::swap(Op1Reg, Op2Reg); - - if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end()) - Op1Reg = RegRewriteTable[Op1Reg].first; - - if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end()) - Op2Reg = RegRewriteTable[Op2Reg].second; - - MIB = BuildMI(*sinkMBB, SinkInsertionPoint, DL, - TII->get(X86::PHI), DestReg) - .addReg(Op1Reg).addMBB(copy0MBB) - .addReg(Op2Reg).addMBB(thisMBB); - - // Add this PHI to the rewrite table. - RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); - } - - // If we have a cascaded CMOV, the second Jcc provides the same incoming - // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes). - if (CascadedCMOV) { - MIB.addReg(MI.getOperand(2).getReg()).addMBB(jcc1MBB); - // Copy the PHI result to the register defined by the second CMOV. - BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), - DL, TII->get(TargetOpcode::COPY), - CascadedCMOV->getOperand(0).getReg()) - .addReg(MI.getOperand(0).getReg()); - CascadedCMOV->eraseFromParent(); - } + std::next(MachineBasicBlock::iterator(LastCMOV)); + createPHIsForCMOVsInSinkBB(MIItBegin, MIItEnd, ThisMBB, FalseMBB, SinkMBB); // Now remove the CMOV(s). - for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ) - (MIIt++)->eraseFromParent(); + ThisMBB->erase(MIItBegin, MIItEnd); - return sinkMBB; + return SinkMBB; } MachineBasicBlock *