Index: llvm/include/llvm/CodeGen/MachineBasicBlock.h =================================================================== --- llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -457,6 +457,13 @@ /// Replace successor OLD with NEW and update probability info. void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New); + /// Copy a successor (and any probability info) from original block to this + /// block's. Uses an iterator into the original blocks successors. + /// + /// This is useful when doing a partial clone of successors. Afterward, the + /// probabilities may need to be normalized. + void copySuccessor(MachineBasicBlock *Orig, succ_iterator I); + /// Transfers all the successors from MBB to this machine basic block (i.e., /// copies all the successors FromMBB and remove all the successors from /// FromMBB). Index: llvm/lib/CodeGen/MachineBasicBlock.cpp =================================================================== --- llvm/lib/CodeGen/MachineBasicBlock.cpp +++ llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -720,6 +720,14 @@ removeSuccessor(OldI); } +void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig, + succ_iterator I) { + if (Orig->Probs.empty()) + addSuccessor(*I, Orig->getSuccProbability(I)); + else + addSuccessorWithoutProb(*I); +} + void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) { Predecessors.push_back(Pred); } Index: llvm/lib/Target/X86/CMakeLists.txt =================================================================== --- llvm/lib/Target/X86/CMakeLists.txt +++ llvm/lib/Target/X86/CMakeLists.txt @@ -32,6 +32,7 @@ X86FixupBWInsts.cpp X86FixupLEAs.cpp X86FixupSetCC.cpp + X86FlagsCopyLowering.cpp X86FloatingPoint.cpp X86FrameLowering.cpp X86InstructionSelector.cpp Index: llvm/lib/Target/X86/X86.h =================================================================== --- llvm/lib/Target/X86/X86.h +++ llvm/lib/Target/X86/X86.h @@ -70,6 +70,9 @@ /// Return a pass that transforms setcc + movzx pairs into xor + setcc. FunctionPass *createX86FixupSetCC(); +/// Return a pass that lowers EFLAGS copy nodes. +FunctionPass *createX86FlagsCopyLoweringPass(); + /// Return a pass that expands WinAlloca pseudo-instructions. FunctionPass *createX86WinAllocaExpander(); Index: llvm/lib/Target/X86/X86FlagsCopyLowering.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -0,0 +1,626 @@ +//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Lowers COPY nodes of EFLAGS to directly extract and preserve individual +/// flag bits so that we don't need to use pushf/popf which have serious +/// problems due to IF and other flags that should *not* be preserved across +/// arbitrary instructions in all cases. In addition to these correctness +/// concerns with pushf/popf, we also expect other approaches for preserving +/// flags to be substantially more efficient in common cases. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +using namespace llvm; + +#define PASS_KEY "x86-flags-copy-lowering" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated"); +STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted"); +STATISTIC(NumTestsInserted, "Number of test instructions inserted"); +STATISTIC(NumAddsInserted, "Number of adds instructions inserted"); + +namespace llvm { + +void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); + +} // end namespace llvm + +namespace { + +// Convenient array type for storing registers associated with each condition. +using CondRegArray = std::array; + +class X86FlagsCopyLoweringPass : public MachineFunctionPass { +public: + X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { + initializeX86FlagsCopyLoweringPassPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; } + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Pass identification, replacement for typeid. + static char ID; + +private: + const X86Subtarget *Subtarget; + MachineRegisterInfo *MRI; + const X86InstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetRegisterClass *PromoteRC; + + /// The information about a block's conditional terminators needed to trace + /// our predicate state through the exiting edges. + struct BlockCondInfo { + MachineBasicBlock *MBB; + + // We mostly have one conditional branch, and in extremely rare cases have + // two. Three and more are so rare as to be unimportant for compile time. + SmallVector CondBrs; + + MachineInstr *UncondBr; + }; + + unsigned promoteCondToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, X86::CondCode Cond, + bool IsKill = false); + void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, + DebugLoc Loc, unsigned Reg); + + void rewriteSetCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &SetCCI, + MachineOperand &FlagUse, CondRegArray &CondRegs); + void rewriteCMov(MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &CMovI, + MachineOperand &FlagUse, CondRegArray &CondRegs); + void rewriteArithmetic(MachineBasicBlock &MBB, + MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, + MachineInstr &MI, MachineOperand &FlagUse, + CondRegArray &CondRegs); + + void rewriteCondJmp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, + MachineBasicBlock &JmpMBB, MachineInstr &JmpI, + CondRegArray &CondRegs); +}; + +} // end anonymous namespace + +char X86FlagsCopyLoweringPass::ID = 0; + +void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); +} + +namespace { +/// An enumeration of the arithmetic instruction mnemonics which have +/// interesting flag semantics. +/// +/// We can map instruction opcodes into these mnemonics to make it easy to +/// dispatch with specific functionality. +enum class FlagArithMnemonic { + ADC, + ADCX, + ADOX, + RCL, + RCR, + SBB, +}; +} // namespace + +static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { + switch (Opcode) { + default: + report_fatal_error("No support for lowering a copy into EFLAGS when used " + "by this instruction!"); + +#define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX) \ + case X86::MNEMONIC##8##SUFFIX: \ + case X86::MNEMONIC##16##SUFFIX: \ + case X86::MNEMONIC##32##SUFFIX: \ + case X86::MNEMONIC##64##SUFFIX: + +#define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \ + LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \ + LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \ + LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \ + LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \ + case X86::MNEMONIC##8ri: \ + case X86::MNEMONIC##16ri8: \ + case X86::MNEMONIC##32ri8: \ + case X86::MNEMONIC##64ri8: \ + case X86::MNEMONIC##16ri: \ + case X86::MNEMONIC##32ri: \ + case X86::MNEMONIC##64ri32: \ + case X86::MNEMONIC##8mi: \ + case X86::MNEMONIC##16mi8: \ + case X86::MNEMONIC##32mi8: \ + case X86::MNEMONIC##64mi8: \ + case X86::MNEMONIC##16mi: \ + case X86::MNEMONIC##32mi: \ + case X86::MNEMONIC##64mi32: \ + case X86::MNEMONIC##8i8: \ + case X86::MNEMONIC##16i16: \ + case X86::MNEMONIC##32i32: \ + case X86::MNEMONIC##64i32: + + LLVM_EXPAND_ADC_SBB_INSTR(ADC) + return FlagArithMnemonic::ADC; + + LLVM_EXPAND_ADC_SBB_INSTR(SBB) + return FlagArithMnemonic::SBB; + +#undef LLVM_EXPAND_ADC_SBB_INSTR + + LLVM_EXPAND_INSTR_SIZES(RCL, rCL) + LLVM_EXPAND_INSTR_SIZES(RCL, r1) + LLVM_EXPAND_INSTR_SIZES(RCL, ri) + return FlagArithMnemonic::RCL; + + LLVM_EXPAND_INSTR_SIZES(RCR, rCL) + LLVM_EXPAND_INSTR_SIZES(RCR, r1) + LLVM_EXPAND_INSTR_SIZES(RCR, ri) + return FlagArithMnemonic::RCR; + +#undef LLVM_EXPAND_INSTR_SIZES + + case X86::ADCX32rr: + case X86::ADCX64rr: + case X86::ADCX32rm: + case X86::ADCX64rm: + return FlagArithMnemonic::ADCX; + + case X86::ADOX32rr: + case X86::ADOX64rr: + case X86::ADOX32rm: + case X86::ADOX64rm: + return FlagArithMnemonic::ADOX; + } +} + +static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB, + MachineInstr &SplitI, + const X86InstrInfo &TII) { + MachineFunction &MF = *MBB.getParent(); + + assert(SplitI.getParent() == &MBB && + "Split instruction must be in the split block!"); + assert(SplitI.isBranch() && + "Only designed to split a tail of branch instructions!"); + assert(X86::getCondFromBranchOpc(SplitI.getOpcode()) != X86::COND_INVALID && + "Must split on an actual jCC instruction!"); + + // Dig out the previous instruction to the split point. + MachineInstr &PrevI = *std::prev(SplitI.getIterator()); + assert(PrevI.isBranch() && "Must split after a branch!"); + assert(X86::getCondFromBranchOpc(PrevI.getOpcode()) != X86::COND_INVALID && + "Must split after an actual jCC instruction!"); + assert(!std::prev(PrevI.getIterator())->isTerminator() && + "Must only have this one terminator prior to the split!"); + + // Grab the one successor edge that will stay in `MBB`. + MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB(); + + // Analyze the original block to see if we are actually splitting an edge + // into two edges. This can happen when we have multiple conditional jumps to + // the same successor. + bool IsEdgeSplit = + std::any_of(SplitI.getIterator(), MBB.instr_end(), + [&](MachineInstr &MI) { + assert(MI.isTerminator() && + "Should only have spliced terminators!"); + return llvm::any_of( + MI.operands(), [&](MachineOperand &MOp) { + return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc; + }); + }) || + MBB.getFallThrough() == &UnsplitSucc; + + MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); + + // Insert the new block immediately after the current one. Any existing + // fallthrough will be sunk into this new block anyways. + MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); + + // Splice the tail of instructions into the new block. + NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end()); + + // Copy the necessary succesors (and their probability info) into the new + // block. + for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) + if (IsEdgeSplit || *SI != &UnsplitSucc) + NewMBB.copySuccessor(&MBB, SI); + // Normalize the probabilities if we didn't end up splitting the edge. + if (!IsEdgeSplit) + NewMBB.normalizeSuccProbs(); + + // Now replace all of the moved successors in the original block with the new + // block. This will merge their probabilities. + for (MachineBasicBlock *Succ : NewMBB.successors()) + if (Succ != &UnsplitSucc) + MBB.replaceSuccessor(Succ, &NewMBB); + + // We should always end up replacing at least one successor. + assert(MBB.isSuccessor(&NewMBB) && + "Failed to make the new block a successor!"); + + // Now update all the PHIs. + for (MachineBasicBlock *Succ : NewMBB.successors()) { + for (MachineInstr &MI : *Succ) { + if (!MI.isPHI()) + break; + + for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; + OpIdx += 2) { + MachineOperand &OpV = MI.getOperand(OpIdx); + MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); + assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!"); + if (OpMBB.getMBB() != &MBB) + continue; + + // Replace the operand for unsplit successors + if (!IsEdgeSplit || Succ != &UnsplitSucc) { + OpMBB.setMBB(&NewMBB); + + // We have to continue scanning as there may be multiple entries in + // the PHI. + continue; + } + + // When we have split the edge append a new successor. + MI.addOperand(MF, OpV); + MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); + break; + } + } + } + + return NewMBB; +} + +bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() + << " **********\n"); + + Subtarget = &MF.getSubtarget(); + MRI = &MF.getRegInfo(); + TII = Subtarget->getInstrInfo(); + TRI = Subtarget->getRegisterInfo(); + PromoteRC = &X86::GR8RegClass; + + if (MF.begin() == MF.end()) + // Nothing to do for a degenerate empty function... + return false; + + SmallVector Copies; + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : MBB) + if (MI.getOpcode() == TargetOpcode::COPY && + MI.getOperand(0).getReg() == X86::EFLAGS) + Copies.push_back(&MI); + + for (MachineInstr *CopyI : Copies) { + MachineBasicBlock &MBB = *CopyI->getParent(); + + MachineOperand &DOp = CopyI->getOperand(0); + assert(DOp.isDef() && "Expected register def!"); + assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!"); + if (DOp.isDead()) + // Nothing to do here. + continue; + + MachineOperand &VOp = CopyI->getOperand(1); + assert(VOp.isReg() && + "The input to the copy for EFLAGS should always be a register!"); + MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg()); + auto TestPos = CopyDefI.getIterator(); + DebugLoc TestLoc = CopyDefI.getDebugLoc(); + + DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump()); + + // Scan for usage of newly set EFLAGS so we can rewrite them. We just buffer + // jumps because their usage is very constrained. + bool IsKilled = false; + SmallVector JmpIs; + CondRegArray CondRegs = {}; + for (auto MII = std::next(CopyI->getIterator()), MIE = MBB.instr_end(); + MII != MIE;) { + MachineInstr &MI = *MII++; + MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS); + if (!FlagUse) { + if (MachineOperand *FlagDef = MI.findRegisterDefOperand(X86::EFLAGS)) { + // If EFLAGS are defined, it's as-if they were killed. We can stop + // scanning here. + // + // NB!!! Many instructions only modify some flags. LLVM currently + // models this as clobbering all flags, but if that ever changes this + // will need to be carefully updated to handle that more complex + // logic. + IsKilled = true; + break; + } + continue; + } + + DEBUG(dbgs() << " Rewriting use: "; MI.dump()); + + // Check the kill flag before we rewrite as that may change it. + if (FlagUse->isKill()) + IsKilled = true; + + // Once we encounter a branch, we have a predictable scan to and we can + // quickly batch them up. We also can't rewrite in place here and + // handle that below. + // + // FIXME: handle conditional tail calls + if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) { + auto JmpIt = MI.getIterator(); + do { + JmpIs.push_back(&*JmpIt); + ++JmpIt; + } while (JmpIt != MBB.instr_end() && + X86::getCondFromBranchOpc(JmpIt->getOpcode()) != + X86::COND_INVALID); + break; + } + + // Otherwise we can just rewrite in-place. + if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) { + rewriteSetCC(MBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); + } else if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) { + rewriteCMov(MBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); + } else { + rewriteArithmetic(MBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); + + // We assume that instructions that use flags also def them. + assert(MI.findRegisterDefOperand(X86::EFLAGS) && + "Expected a def of EFLAGS for this instruction!"); + + // NB!!! Several arithmetic instructions only *partially* update + // flags. Theoretically, we could generate MI code sequences that + // would rely on this fact and observe different flags independently. + // But currently LLVM models all of these instructions as clobbering + // all the flags in an undef way. We rely on that to simplify the + // logic. + IsKilled = true; + break; + } + + // If this was the last use of the flags, we're done. + if (IsKilled) + break; + } + + // If we didn't find a kill (or equivalent) check that the flags don't + // live-out of the basic block. Currently we don't support lowering copies + // of flags that live out in this fashion. + if (!IsKilled && + llvm::any_of(MBB.successors(), [](MachineBasicBlock *SuccMBB) { + return SuccMBB->isLiveIn(X86::EFLAGS); + })) { + DEBUG( + dbgs() << "ERROR: Found a copied EFLAGS live-out from basic block:\n" + << "----\n"; + MBB.dump(); dbgs() << "----\n" + << "ERROR: Cannot lower this EFLAGS copy without " + "using dangerous popf construct!\n"); + report_fatal_error( + "Cannot lower EFLAGS copy that lives out of a basic block!"); + } + + // Now rewrite the jumps that use the flags. These we handle specially + // because if there are multiple jumps we'll have to do surgery on the CFG. + for (MachineInstr *JmpI : JmpIs) { + MachineBasicBlock &JmpMBB = + JmpI == JmpIs.front() ? MBB + : splitBlock(*JmpI->getParent(), *JmpI, *TII); + assert(JmpI->getParent() == &JmpMBB && + "Ended up with bad jump parent block!"); + rewriteCondJmp(MBB, TestPos, TestLoc, JmpMBB, *JmpI, CondRegs); + } + + // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if + // the copy's def operand is itself a kill. + + // All uses of the EFLAGS copy are now rewritten, kill the copy nodes. + CopyI->eraseFromParent(); + CopyDefI.eraseFromParent(); + ++NumCopiesEliminated; + } + + DEBUG(dbgs() << "\nFunction after rewriting copies:\n"; MF.dump(); + dbgs() << "\n"; MF.verify(this)); + return true; +} + +unsigned X86FlagsCopyLoweringPass::promoteCondToReg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, X86::CondCode Cond, bool IsKill) { + unsigned Reg = MRI->createVirtualRegister(PromoteRC); + auto SetI = + BuildMI(MBB, TestPos, TestLoc, TII->get(X86::getSETFromCond(Cond)), Reg); + SetI->getOperand(1).setIsKill(IsKill); + DEBUG(dbgs() << " save cond: "; SetI->dump()); + ++NumSetCCsInserted; + return Reg; +} + +void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB, + MachineBasicBlock::iterator Pos, + DebugLoc Loc, unsigned Reg) { + auto TestI = + BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8ri)).addReg(Reg).addImm(0); + (void)TestI; + DEBUG(dbgs() << " test cond: "; TestI->dump()); + ++NumTestsInserted; +} + +void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &MBB, + MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, + MachineInstr &SetCCI, + MachineOperand &FlagUse, + CondRegArray &CondRegs) { + X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode()); + // FIXME: Look for inverse condition and invert logic. + unsigned &CondReg = CondRegs[Cond]; + if (!CondReg) + CondReg = promoteCondToReg(MBB, TestPos, TestLoc, Cond); + + // Rewriting this is trivial: we just replace the register and remove the + // setcc. + MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg); + SetCCI.eraseFromParent(); +} + +void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &MBB, + MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, + MachineInstr &CMovI, + MachineOperand &FlagUse, + CondRegArray &CondRegs) { + // First get the register containing this specific condition. + X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode()); + // FIXME: Look for inverse condition and invert logic. + unsigned &CondReg = CondRegs[Cond]; + if (!CondReg) + CondReg = promoteCondToReg(MBB, TestPos, TestLoc, Cond); + + // Insert a direct test of the saved register. + insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg); + + // Rewrite the CMov to use the NE flag from the test (but match register size + // and memory operand), and then kill its use of the flags afterward. + auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg()); + CMovI.setDesc(TII->get(X86::getCMovFromCond(X86::COND_NE, + TRI->getRegSizeInBits(CMovRC) / 8, + !CMovI.memoperands_empty()))); + FlagUse.setIsKill(true); + DEBUG(dbgs() << " fixed cmov: "; CMovI.dump()); +} + +void X86FlagsCopyLoweringPass::rewriteArithmetic( + MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse, + CondRegArray &CondRegs) { + // Arithmetic is either reading CF or OF. Figure out which condition we need + // to preserve in a register. + X86::CondCode Cond; + + // The addend to use to reset CF or OF when added to the flag value. + int Addend; + + switch (getMnemonicFromOpcode(MI.getOpcode())) { + case FlagArithMnemonic::ADC: + case FlagArithMnemonic::ADCX: + case FlagArithMnemonic::RCL: + case FlagArithMnemonic::RCR: + case FlagArithMnemonic::SBB: + Cond = X86::COND_B; // CF == 1 + Addend = 127; + break; + + case FlagArithMnemonic::ADOX: + Cond = X86::COND_O; // OF == 1 + Addend = 255; + break; + } + + // Now get a register that contains the value of the flag input to the + // arithmetic. + // FIXME: Look for inverse condition and invert logic. + unsigned &CondReg = CondRegs[Cond]; + if (!CondReg) + CondReg = promoteCondToReg(MBB, TestPos, TestLoc, Cond); + + // Insert an instruction that will set the flag back to the desired value. + unsigned TmpReg = MRI->createVirtualRegister(PromoteRC); + auto AddI = + BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri)) + .addDef(TmpReg, RegState::Dead) + .addReg(CondReg) + .addImm(Addend); + (void)AddI; + DEBUG(dbgs() << " add cond: "; AddI->dump()); + ++NumAddsInserted; + FlagUse.setIsKill(true); +} + +void X86FlagsCopyLoweringPass::rewriteCondJmp( + MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineBasicBlock &JmpMBB, MachineInstr &JmpI, + CondRegArray &CondRegs) { + // First get the register containing this specific condition. + X86::CondCode Cond = X86::getCondFromBranchOpc(JmpI.getOpcode()); + // FIXME: Look for inverse condition and invert logic. + unsigned &CondReg = CondRegs[Cond]; + if (!CondReg) + CondReg = promoteCondToReg(MBB, TestPos, TestLoc, Cond); + + // Insert a direct test of the saved register. + insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg); + + // Rewrite the jump to use the NE flag from the test, and kill its use of + // flags afterward. + JmpI.setDesc(TII->get(X86::GetCondBranchFromCond(X86::COND_NE))); + const int ImplicitEFLAGSOpIdx = 1; + JmpI.getOperand(ImplicitEFLAGSOpIdx).setIsKill(true); + DEBUG(dbgs() << " fixed jCC: "; JmpI.dump()); +} + +INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE, + "X86 speculative load hardener", false, false) +INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE, + "X86 speculative load hardener", false, false) + +FunctionPass *llvm::createX86FlagsCopyLoweringPass() { + return new X86FlagsCopyLoweringPass(); +} Index: llvm/lib/Target/X86/X86TargetMachine.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetMachine.cpp +++ llvm/lib/Target/X86/X86TargetMachine.cpp @@ -62,6 +62,7 @@ void initializeX86CmovConverterPassPass(PassRegistry &); void initializeX86ExecutionDomainFixPass(PassRegistry &); void initializeX86DomainReassignmentPass(PassRegistry &); +void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); } // end namespace llvm @@ -80,6 +81,7 @@ initializeX86CmovConverterPassPass(PR); initializeX86ExecutionDomainFixPass(PR); initializeX86DomainReassignmentPass(PR); + initializeX86FlagsCopyLoweringPassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -451,6 +453,7 @@ addPass(createX86CallFrameOptimization()); } + addPass(createX86FlagsCopyLoweringPass()); addPass(createX86WinAllocaExpander()); } void X86PassConfig::addMachineSSAOptimization() {