Index: lib/Target/X86/CMakeLists.txt =================================================================== --- lib/Target/X86/CMakeLists.txt +++ lib/Target/X86/CMakeLists.txt @@ -17,6 +17,7 @@ X86CallFrameOptimization.cpp X86ExpandPseudo.cpp X86FastISel.cpp + X86FixupSeparateStack.cpp X86FloatingPoint.cpp X86FrameLowering.cpp X86ISelDAGToDAG.cpp Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -78,6 +78,10 @@ /// in order to eliminate partial register usage, false dependences on /// the upper portions of registers, and to save code size. FunctionPass *createX86FixupBWInsts(); + +/// Return a pass that helps to support separate stack and data segments by +/// emitting segment override prefixes where necessary. +FunctionPass *createX86FixupSeparateStack(); } // End llvm namespace #endif Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -247,6 +247,10 @@ def FeatureFastPartialYMMWrite : SubtargetFeature<"fast-partial-ymm-write", "HasFastPartialYMMWrite", "true", "Partial writes to YMM registers are fast">; +def FeatureSeparateStackSeg + : SubtargetFeature<"separate-stack-seg", "UseSeparateStackSeg", "true", + "Help to support using different stack and data segments" + " by adding segment override prefixes (X86-32 only).">; //===----------------------------------------------------------------------===// // X86 processors supported. Index: lib/Target/X86/X86FixupSeparateStack.cpp =================================================================== --- /dev/null +++ lib/Target/X86/X86FixupSeparateStack.cpp @@ -0,0 +1,1079 @@ +//===----- X86FixupSeparateStack.cpp - Fixup stack memory accesses --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pass that emits segment override prefixes to help support +// separate stack and data segments for X86-32. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "X86MachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-fixup-separate-stack" + +enum class SPStoreVerif { None, IgnoreVA, All }; + +static cl::opt SPStoreVerification( + "sep-stk-seg-sp-store-verif", + cl::Hidden, cl::init(SPStoreVerif::All), + cl::desc("Verification for stores of pointers pointing into SS segment"), + cl::values(clEnumValN(SPStoreVerif::None, "none", + "No verification"), + clEnumValN(SPStoreVerif::IgnoreVA, "ignore-va", + "Verify only in functions that do not have a va_start"), + clEnumValN(SPStoreVerif::All, "all", + "Verify in all functions"), + clEnumValEnd)); + +namespace { +/// Records information about registers that directly or indirectly are used in +/// memory operands within a basic block. It is used to distinguish registers +/// that point to stack memory from those that do not. It only explicitly +/// stores information about registers that point to the stack, which implies +/// that all other physical registers do not point to the stack. +class AddrRegs { +private: + const X86RegisterInfo *TRI; + +public: + /// Registers that point to the stack + DenseSet Stack; + + AddrRegs(const X86RegisterInfo *TRI_) : TRI(TRI_) {} + + void insertStackReg(unsigned Reg); + + /// \brief Return true iff Reg points to a stack location. + bool pointsIntoStack(unsigned Reg) const; +}; + +/// Requirements for how registers that are directly or indirectly used in +/// memory operands are configured. +/// +/// Each basic block is associated with one AddrRegReqs object, although that +/// object may actually contain information about the successors of the basic +/// block with which it is associated. +class AddrRegReqs { +private: + const X86RegisterInfo *TRI; + + /// The collection of AddrRegReqs objects that is associated with a function + /// is structured as a directed graph that may contain cycles. This set of + /// Predecessors points to the AddrRegReqs objects corresponding to all of the + /// basic blocks that immediately precede the one associated with this object + /// and that have already been visited or are currently being visited by this + /// pass. + std::set Predecessors; + /// The original status of the registers at the time this object branched from + /// its first predecessor, according to the traversal order of this pass. + const AddrRegs OrigRegs; + + /// Pairs of registers (A, B) in which A may derive a pointer to stack data + /// from B. This is the case if an instruction form that the compiler selects + /// to compute a pointer to stack data uses B to define A. See + /// X86FixupSeparateStack::mayComputeStackPtr for more details. + /// + /// This also indicates cancellations of implicit derivations. Every physical + /// register is implicitly derived from itself unless an entry in Derivatives + /// indicates otherwise. Specifically, it is as though Derivatives contains + /// (A, A) for all physical registers A unless there exists an entry (A, X) in + /// Derivatives for some physical register X or the special value + /// X86::NoRegister. This special value indicates that A cannot possibly + /// point to stack data. + /// + /// Note that the presence of an entry (A, B) in Derivatives does NOT + /// automatically indicate that A points to stack data. It only indicates + /// that if B pointed to stack data when this basic block was entered, then A + /// points to stack data at the current instruction. Also keep in mind that + /// Derivatives gets updated as additional instructions in the basic block are + /// processed. + /// + /// The entry (ESP, ESP) is always implicitly (and never explicitly) present + /// in Derivatives. + std::vector > Derivatives; + /// Root registers that are used directly or indirectly in memory operands + DenseSet UsedInMemOp; + + /// \brief Lookup the register from which To may derive a pointer to stack + /// data, if such a register exists. + /// + /// \returns The register Root if an implicit or explicit entry (To, Root) is + /// currently present in Derivatives. Otherwise, returns X86::NoRegister. + /// Note that there may be an entry (To, X86::NoRegister) in Derivatives, + /// which would also result in X86::NoRegister being returned. + unsigned lookupRoot(unsigned To) const; + +public: + /// \brief Construct an object with no initial predecessor. One or more + /// predecessors may still be added later with addPredecessor. + AddrRegReqs(const X86RegisterInfo *TRI_) : TRI(TRI_), OrigRegs(TRI_) {} + /// \brief Construct an object with one initial predecessor. Initialize + /// OrigRegs to represent the register configuration at the end of + /// Predecessor, since this is only invoked after Predecessor has been + /// processed. + AddrRegReqs(AddrRegReqs *Predecessor); + + /// \brief Extract the register configuration represented by the current state + /// of this object. This is based on the contents of OrigRegs, and it is + /// adjusted to account for the contents of Derivatives. + AddrRegs extractRegs(); + + /// \brief Register a new predecessor for this object. This involves the + /// following steps: + /// 1. Insert Predecessor into Predecessors so that any time useInMemOp is + /// invoked on this object, it may also invoke Predecessor->useInMemOp as + /// necessary. + /// 2. Invoke Predecessor->useInMemOp for each register in UsedInMemOp, since + /// the basic block corresponding to this object uses those registers even + /// though the instructions that are in the predecessor basic block may not + /// have used those registers. + void addPredecessor(AddrRegReqs *Predecessor); + + /// \brief Record that To is derived from From in Derivatives. + void derive(unsigned From, unsigned To); + + /// \brief Record that To is redefined by some operation that cannot be used + /// to derive a pointer into the stack. + void markNotDerived(unsigned To); + + /// \brief Record that Reg is used as a base address register in a memory + /// operand and check that the requirement for this to be a stack or data + /// pointer is met. + /// + /// This routine is invoked recursively for each predecessor of this object, + /// and there may even be cycles. However, the recursion will eventually + /// terminate. The routine is only invoked recursively if an entry is added + /// to this->UsedInMemOp. The only entries that can be added are those + /// computed from the contents of Derivatives and OrigRegs. The resultant set + /// is a subset of the set of physical registers, which is finite. Entries + /// are never removed from UsedInMemOp by this routine. Thus, this routine + /// will eventually run out of entries to be added to UsedInMemOp. + /// + /// \param Reg Register that is used as a base address register in a memory + /// operand. + /// \param IsStackReg Set to true iff Reg is required to point to stack + /// memory. Otherwise, Reg is required to point to data memory. + void useInMemOp(unsigned Reg, bool IsStackReg); + + /// \brief Return true iff Reg points to a stack location at the current + /// instruction. + bool pointsIntoStack(unsigned Reg) const; +}; + +/// Requirements for how register spills of pointers to the stack segment are +/// arranged in the frame. +class StackPtrSpillReqs { +private: + std::set Predecessors; + + /// Frame slots that have been used in the associated basic block or one or + /// more of its successors to fill registers with stack pointers or other + /// data. + std::map Demand; + + /// Frame slots that have stack pointers or other data spilled into them by + /// instructions in the function associated with this object. + std::map Supply; + + /// True if this object is associated with the function entrypoint. + bool Top; + + enum class DemandResponse { Unsatisfied, SatisfiedSP, SatisfiedNonSP }; + + /// \brief Check whether a spill slot currently contains a stack pointer or + /// other data. + DemandResponse demand(int64_t Start, uint64_t Size, + std::set Demanders); + +public: + /// \brief Construct an object with no initial predecessor. One or more + /// predecessors may still be added later with addPredecessor. + StackPtrSpillReqs() : Top(true) {} + /// \brief Construct an object with one initial predecessor. + StackPtrSpillReqs(StackPtrSpillReqs *Predecessor); + + /// \brief Register a new predecessor for this object. This involves the + /// following steps: + /// 1. Insert Predecessor into Predecessors so that any time demand is + /// invoked on this object, it may also invoke Predecessor->demand as + /// necessary. + /// 2. Invoke Predecessor->demand for each frame slot demanded by this set of + /// requirements. + void addPredecessor(StackPtrSpillReqs *Predecessor); + + /// \brief Record that either a stack pointer or other data is spilled into + /// the specified slot by an instruction in the function associated with this + /// object. + void supply(bool SP, int64_t Start, uint64_t Size); + + /// \brief Check whether the specified slot most recently had stack pointer + /// data spilled into it. + bool isSPSpill(int64_t Start, uint64_t Size); +}; + +/// This pass performs a depth-first traversal of each function, visiting each +/// basic block once. +/// +/// To understand how this pass determines whether its transformation is +/// correct, it may be helpful to think of each basic block as an electronic +/// circuit with 12V and 5V inputs. Those inputs take the form of register +/// values fed into basic blocks. Think of pointers to stack data as 12V +/// inputs and other register values as 5V inputs. +/// +/// Continuing this analogy, the problem that this pass seeks to solve is to +/// determine for each circuit element whether it is always wired to 12V or 5V. +/// This pass considers each instruction to be analogous to a circuit element. +/// Of course, each basic block may be invoked after multiple predecessors. +/// Think of this like a circuit that can have multiple copies of a particular +/// circuit block comprising circuit elements such as resistors and capacitors, +/// and all replicas of a particular element must use the same voltage inputs. +/// +/// An assumption that helps to make the problem more tractable for this pass +/// is that pointers to data on the stack are only computed using a couple of +/// instruction forms. This is analogous to a specific material being used to +/// carry 12V in a circuit. It is still possible that a 5V input signal could +/// be connected to a 12V-capable trace, but a 12V input signal will never be +/// connected to a 5V trace. It is possible to determine which inputs may be +/// connected to 12V input signals just by looking at a particular circuit block +/// without needing to know how all replicas of it will be connected in the +/// whole circuit. Similarly, it is possible to determine which register inputs +/// to a particular instruction may point to stack data by analyzing just the +/// instructions preceding that one in its basic block. +/// +/// Ultimately, it is still necessary to analyze the control flow between basic +/// blocks to make sure that for each base address register used in each memory +/// operand, the register consistently refers to either stack data or non-stack +/// data for every control flow that contains the corresponding instruction. +/// This is analogous to checking circuit blocks to make sure that for each +/// 12V-capable trace leading to each circuit element, all replicas are +/// connected to 12V input signals or all replicas are connected to 5V input +/// signals. For efficiency, this pass only traverses each basic block once. +/// The connectivity check is performed by recording the predecessors of each +/// basic block and tracing a particular register value back through the chain +/// of derivations that produced it to check the property described above. This +/// is analogous to tracing a wire connected to a circuit element back to its +/// source. +/// +/// The SafeStack instrumentation pass will move any objects to the unsafe stack +/// that could have pointers to them passed between functions. This is +/// necessary, since the X86FixupSeparateStack pass only performs +/// intra-procedural analysis. The runtime can place the unsafe stack outside +/// of the stack segment, so that accesses to it do not require segment override +/// prefixes. +/// +/// Even with the SafeStack pass enabled, va_list objects containing pointers to +/// the stack may still be passed between C/C++ functions. That is why the +/// Clang frontend recognizes when this pass is enabled and adds the appropriate +/// segment override prefixes to __builtin_va_arg invocations. +/// +class X86FixupSeparateStack : public MachineFunctionPass { +public: + X86FixupSeparateStack() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + const char *getPassName() const override { + return "X86 Fixup Separate Stack"; + } + + const TargetInstrInfo *TII; + const X86RegisterInfo *TRI; + const X86Subtarget *STI; + unsigned FrameReg; + unsigned StackReg; + bool HasVAStart; + int VAStartFI; + static char ID; + + /// Map from basic block to corresponding AddrRegReqs and StackPtrSpillReqs + /// objects + std::map, + std::shared_ptr > > Reqs; + + /// \brief Perform fixups for a string instruction. + /// + /// \param I Iterator pointing to the string instruction in the basic block. + /// This routine may add new instructions around the string + /// instruction, in which case 'I' will point to the instruction that + /// was added at the highest address. + /// \param StringDS true if the instruction accesses DS + /// \param StringES true if the instruction accesses ES + /// + /// \returns true if the basic block was changed + bool processStringInstr(MachineBasicBlock *BB, MachineBasicBlock::iterator &I, + AddrRegReqs &AReqs, bool StringDS, bool StringES); + + /// \brief Perform fixups for an instruction that accesses memory with an + /// explicit memory operand. + /// + /// \param MemoryOperand Offset to the memory operand within the instruction + /// + /// \returns true if the instruction was changed + bool processMemAccessInstr(MachineInstr &I, int MemoryOperand, + AddrRegReqs &AReqs); + + enum FrameOp { + NoFrameOp, // or an irrelevant frame operation + SpillReg, + FillRegWithStackPtr + }; + + /// \brief Check whether an instruction is a register spill or fill and, if + /// so, update SpilledStackPtrs appropriately. + /// + /// \param PossibleSP Set to true if this instruction may possibly spill or + /// fill a stack pointer. Some instructions spill and fill registers, but + /// are assumed to not handle stack pointers (e.g. FP instructions). + /// + /// \returns type of frame operation performed by the instruction + FrameOp checkForRegisterSpill(MachineInstr &I, int MemoryOperand, + bool PossibleSP, + AddrRegReqs &AReqs, StackPtrSpillReqs &SReqs); + + /// \brief Check whether an instruction is of a type that may be used to + /// compute a derived stack pointer. + /// + /// \returns X86::NoRegister if the instruction is not of a type that may be + /// used to compute a derived stack pointer. Otherwise, returns the register + /// that is used as the source of the possible derivation. + unsigned mayComputeStackPtr(const MachineInstr &I) const; + + /// \brief Process a basic block. + /// + /// \param StackGrowth Current size of the stack at this invocation point. + /// This is only tracked if this function has a va_start, + /// so that this can be used to detect instructions that + /// store the address of the variadic arguments to memory. + /// This needs to be detected specially, since storing a + /// pointer to stack memory is otherwise disallowed. + /// + /// \returns true if the basic block was changed + bool processBasicBlock(MachineBasicBlock *BB, + int64_t StackGrowth, + std::shared_ptr AReqs, + std::shared_ptr SReqs); +}; + +char X86FixupSeparateStack::ID = 0; +} + +FunctionPass *llvm::createX86FixupSeparateStack() { + return new X86FixupSeparateStack(); +} + +void AddrRegs::insertStackReg(unsigned Reg) { + for (auto N : Stack) + if (TRI->regsOverlap(N, Reg)) + return; + + Stack.insert(Reg); +} + +bool AddrRegs::pointsIntoStack(unsigned Reg) const { + if (!TRI->isPhysicalRegister(Reg)) + return false; + + if (TRI->regsOverlap(TRI->getStackRegister(), Reg)) + // The stack register is assumed to always point to the stack + return true; + + for (auto N : Stack) + if (TRI->regsOverlap(N, Reg)) + return true; + + return false; +} + +AddrRegReqs::AddrRegReqs(AddrRegReqs *Predecessor) : + TRI(Predecessor->TRI), + OrigRegs(Predecessor->extractRegs()) { + + Predecessors.insert(Predecessor); +} + +AddrRegs AddrRegReqs::extractRegs() { + AddrRegs Regs(TRI); + + // Registers that have been redefined since the beginning of the basic block + DenseSet Redefined; + + // Handle the explicit derivations: + for (auto &D : Derivatives) { + // Note that this derivation overrides the corresponding implicit one: + Redefined.insert(D.first); + if (OrigRegs.pointsIntoStack(D.second)) + Regs.insertStackReg(D.first); + } + + // Handle the implicit derivations: + for (auto N : OrigRegs.Stack) + if (Redefined.find(N) == Redefined.end()) + Regs.insertStackReg(N); + + return Regs; +} + +void AddrRegReqs::addPredecessor(AddrRegReqs *Predecessor) { + assert(Predecessors.insert(Predecessor).second); + + // It is correct to copy the current value of UsedInMemOp and iterate through + // that in this method, even if this object is part of a cycle of predecessors + // and the useInMemOp routine adds a register to this->UsedInMemOp. In that + // case, the useInMemOp routine will itself invoke Predecessor->useInMemOp + // with the newly-added register, since Predecessor was added to Predecessors + // prior to invoking useInMemOp below. + DenseSet OrigUsedInMemOp(UsedInMemOp); + + for (auto N : OrigUsedInMemOp) + Predecessor->useInMemOp(N, OrigRegs.pointsIntoStack(N)); +} + +unsigned AddrRegReqs::lookupRoot(unsigned To) const { + if (!TRI->isPhysicalRegister(To)) + return X86::NoRegister; + + for (auto &Derivative : Derivatives) + if (TRI->regsOverlap(Derivative.first, To)) + return Derivative.second; + + return To; +} + +void AddrRegReqs::derive(unsigned From, unsigned To) { + if (TRI->regsOverlap(To, TRI->getStackRegister())) + // The stack register is always assumed to point to the stack, regardless + // of what instructions are used to update it. + return; + + // Lookup the root register from which From is derived, since the second + // identifier in each pair in Derivatives is actually for the register value + // at the beginning of the basic block from which the value for the register + // identified by the first number is derived. + unsigned RootFrom = lookupRoot(From); + + for (auto &Derivative : Derivatives) + if (TRI->regsOverlap(Derivative.first, To)) { + // Update an existing entry + Derivative.second = RootFrom; + return; + } + + // Replace the implicit derivation for To with the following explicit + // derivation. + Derivatives.push_back(std::make_pair(To, RootFrom)); +} + +void AddrRegReqs::markNotDerived(unsigned To) { + derive(X86::NoRegister, To); +} + +void AddrRegReqs::useInMemOp(unsigned Reg, bool IsStackReg) { + bool FromStackReg = false; + + unsigned RootFrom = lookupRoot(Reg); + if (RootFrom != X86::NoRegister) + FromStackReg = OrigRegs.pointsIntoStack(RootFrom); + + // For registers that are set by instructions of the types used by the + // compiler to compute pointers to stack data, this pass checks that + // each such register either always points to stack data or never points + // to stack data at each instruction that uses the register in a memory + // operand. + assert(IsStackReg == FromStackReg && + "One or more registers point to both stack and data locations at one " + "or more instructions, which is not supported by the " + "X86FixupSeparateStack pass."); + + if (RootFrom == X86::NoRegister) + // This register cannot point to stack data, so it is unnecessary to perform + // consistency checks on its values. + return; + + for (auto N : UsedInMemOp) + if (TRI->regsOverlap(RootFrom, N)) + return; + + UsedInMemOp.insert(RootFrom); + + for (AddrRegReqs *P : Predecessors) + P->useInMemOp(RootFrom, IsStackReg); +} + +/// \brief Return true iff Reg points to a stack location. +bool AddrRegReqs::pointsIntoStack(unsigned Reg) const { + unsigned From = lookupRoot(Reg); + if (From == X86::NoRegister) + return false; + + return OrigRegs.pointsIntoStack(From); +} + +StackPtrSpillReqs::StackPtrSpillReqs(StackPtrSpillReqs *Predecessor) : + Top(false) { + Predecessors.insert(Predecessor); +} + +class LockingSPToggle { +private: + bool Set; + bool Value; + +public: + LockingSPToggle() : Set(false) {} + + void set(bool V) { + if (Set) { + assert(V == Value && + "Register fill would get a mixture of stack pointer and other " + "data"); + return; + } + + Value = V; + Set = true; + } + + bool get() { + assert(Set); + + return Value; + } +}; + +StackPtrSpillReqs::DemandResponse +StackPtrSpillReqs::demand(int64_t Start, uint64_t Size, + std::set Demanders) { + if (!Demanders.insert(this).second) + // A cycle was detected, and the demand is not satisfied within this cycle. + return DemandResponse::Unsatisfied; + + LockingSPToggle SP; + + for (uint64_t Offset = 0; Offset < Size; Offset++) { + auto SupplyI = Supply.find(Start + Offset); + bool Supplied = SupplyI != Supply.end(); + + if (Supplied) { + // This demand is satisfied by supply earlier in this function. + SP.set(SupplyI->second); + continue; + } + + auto DemandI = Demand.find(Start + Offset); + bool Demanded = DemandI != Demand.end(); + + if (Demanded) { + // This demand was previously submitted and the result is cached. + SP.set(DemandI->second); + continue; + } + + if (Top) { + // This assumes that arguments and uninitialized frame slots are not stack + // pointers + SP.set(false); + continue; + } + + // Propagate this demand to the previously-visited predecessors of this + // function. + bool PredSP; + bool DemandSatisfied = false; + for (auto Pred : Predecessors) { + auto Resp = Pred->demand(Start + Offset, 1, Demanders); + if (Resp == DemandResponse::Unsatisfied) + // The demand was not satisfied by this cycle, but it may be satisfied + // by a different predecessor. + continue; + + bool SPLocal = Resp == DemandResponse::SatisfiedSP; + if (!DemandSatisfied) + PredSP = SPLocal; + else + assert(PredSP == SPLocal || + "Inconsistent register spills detected"); + DemandSatisfied = true; + } + if (!DemandSatisfied) + return DemandResponse::Unsatisfied; + + // Cache the result of propagating this demand to the function's predecessors. + Demand.emplace(Start + Offset, PredSP); + SP.set(PredSP); + } + + return SP.get()? DemandResponse::SatisfiedSP : DemandResponse::SatisfiedNonSP; +} + +bool StackPtrSpillReqs::isSPSpill(int64_t Start, uint64_t Size) +{ + std::set Demanders; + DemandResponse Resp = demand(Start, Size, Demanders); + + assert(Resp != DemandResponse::Unsatisfied); + + return Resp == DemandResponse::SatisfiedSP; +} + +void StackPtrSpillReqs::addPredecessor(StackPtrSpillReqs *Predecessor) { + assert(Predecessors.find(Predecessor) == Predecessors.end()); + + std::set Demanders; + + bool Satisfied = false; + for (auto D : Demand) { + auto Resp = Predecessor->demand(D.first, 1, Demanders); + + // The Unsatisfied response can only be returned from a cycle that does not + // include the function entrypoint. There will be at least one other + // predecessor that will be checked and that will lead back to the + // function entrypoint, because the basic block traversal starts at + // the function entrypoint. + if (Resp == DemandResponse::Unsatisfied) + continue; + + assert(Resp == + (D.second? + DemandResponse::SatisfiedSP : + DemandResponse::SatisfiedNonSP)); + + Satisfied = true; + } + + assert(Satisfied); + + Predecessors.insert(Predecessor); +} + +void StackPtrSpillReqs::supply(bool SP, int64_t Start, uint64_t Size) { + for (uint64_t Offset = 0; Offset < Size; Offset++) + Supply.emplace(Start + Offset, SP); +} + +bool X86FixupSeparateStack::processStringInstr(MachineBasicBlock *BB, + MachineBasicBlock::iterator &I, + AddrRegReqs &AReqs, + bool StringDS, bool StringES) { + MachineFunction &MF = *BB->getParent(); + + // FIXME: Check for a segment override prefix on the string instruction and + // skip the following if one is detected. + + bool StackDS = false; + bool StackES = false; + + if (StringDS) { + StackDS = AReqs.pointsIntoStack(X86::ESI); + AReqs.useInMemOp(X86::ESI, StackDS); + } + + if (StringES) { + StackES = AReqs.pointsIntoStack(X86::EDI); + AReqs.useInMemOp(X86::EDI, StackES); + } + + if (!(StackDS || StackES)) + return false; + + // Emit instructions to save a segment register, overwrite it with the + // contents of SS, and restore its original value after the string instruction + // is complete. + auto tempOverwriteSegWithSS = [&](unsigned PushOp, unsigned PopOp) { + // Save the original value of the segment register to be overwritten. + MachineInstr *NewMI = + MF.CreateMachineInstr(TII->get(PushOp), I->getDebugLoc()); + I = BB->insert(I, NewMI); + // Push the stack segment selector onto the stack. + NewMI = MF.CreateMachineInstr(TII->get(X86::PUSHSS32), I->getDebugLoc()); + I = BB->insertAfter(I, NewMI); + // Overwrite the segment register with the stack segment selector. + NewMI = MF.CreateMachineInstr(TII->get(PopOp), I->getDebugLoc()); + I = BB->insertAfter(I, NewMI); + // Advance to the string instruction. + I++; + // Restore the original value of the segment register. + NewMI = MF.CreateMachineInstr(TII->get(PopOp), I->getDebugLoc()); + return BB->insertAfter(I, NewMI); + }; + + if (StackDS) { + tempOverwriteSegWithSS(X86::PUSHDS32, X86::POPDS32); + } + if (StackES) { + if (StackDS) + // Back up to the string instruction. + I--; + tempOverwriteSegWithSS(X86::PUSHES32, X86::POPES32); + if (StackDS) + // Advance past the POP instruction that restores DS. + I++; + } + + return true; +} + +bool X86FixupSeparateStack::processMemAccessInstr(MachineInstr &I, + int MemoryOperand, + AddrRegReqs &AReqs) { + MachineOperand &SegRegOp = I.getOperand(MemoryOperand + X86::AddrSegmentReg); + + unsigned PrevSegReg = SegRegOp.getReg(); + + if (TRI->isPhysicalRegister(PrevSegReg)) + // Do not replace the existing segment override prefix. + return false; + + unsigned BaseReg = I.getOperand(MemoryOperand + X86::AddrBaseReg).getReg(); + + bool BaseIsPhysReg = TRI->isPhysicalRegister(BaseReg); + + bool InStack = AReqs.pointsIntoStack(BaseReg); + + if (BaseIsPhysReg) + AReqs.useInMemOp(BaseReg, InStack); + + if (InStack && + (TRI->regsOverlap(X86::ESP, BaseReg) || + TRI->regsOverlap(X86::EBP, BaseReg))) + // Memory operand with a base register of ESP or EBP implicitly references + // SS. + return false; + + if (!InStack && + // Memory operand without a base register implicitly + // references DS. + (!(BaseIsPhysReg && + // Memory operand with a base register other than ESP and EBP implicitly + // references DS. This assumes that ESP is never used as a non-stack + // pointer. + TRI->regsOverlap(X86::EBP, BaseReg)))) + return false; + + SegRegOp.ChangeToRegister(InStack ? X86::SS : X86::DS, false); + + return true; +} + +unsigned X86FixupSeparateStack::mayComputeStackPtr(const MachineInstr &I) const { + switch (I.getOpcode()) { + // This assumes that only the following instructions are used by the compiler + // to compute a stack pointer derived directly or indirectly from ESP: + case X86::LEA32r: + case X86::MOV32rr: + case X86::ADD32ri8: + case X86::ADD32ri: + break; + default: + return X86::NoRegister; + } + + for (const MachineOperand &Op : I.operands()) { + if (!Op.isReg()) + continue; + + if (!Op.isUse()) + continue; + + // Return the register that is used in the address computation. + return Op.getReg(); + } + + assert(false && + "Unexpected failure to identify register used to compute address"); +} + +X86FixupSeparateStack::FrameOp +X86FixupSeparateStack::checkForRegisterSpill(MachineInstr &I, + int MemoryOperand, + bool PossibleSP, + AddrRegReqs &AReqs, + StackPtrSpillReqs &SReqs) { + unsigned BaseReg = I.getOperand(MemoryOperand + X86::AddrBaseReg).getReg(); + + if (!TRI->isPhysicalRegister(BaseReg)) + return NoFrameOp; + + if (!TRI->regsOverlap(FrameReg, BaseReg)) + return NoFrameOp; + + const MachineOperand &Disp = I.getOperand(MemoryOperand + X86::AddrDisp); + + // FIXME: Consider whether other forms of addressing should be handled. + if (!Disp.isImm()) + return NoFrameOp; + + bool SP = false; + if (PossibleSP) { + int RegOpNo = I.mayStore()? MemoryOperand + X86::AddrNumOperands : 0; + MachineOperand *RegOp = &(I.getOperand(RegOpNo)); + SP = AReqs.pointsIntoStack(RegOp->getReg()); + } + + bool FoundMemOp = false; + uint64_t Size; + for (auto MO : I.memoperands()) { + assert(!FoundMemOp && "Only a single memory operand is supported"); + FoundMemOp = true; + Size = MO->getSize(); + } + + if (!FoundMemOp) + return NoFrameOp; + + if (I.mayStore()) { + SReqs.supply(SP, Disp.getImm(), Size); + return SpillReg; + } + + if (SReqs.isSPSpill(Disp.getImm(), Size)) + return FillRegWithStackPtr; + + return NoFrameOp; +} + +bool X86FixupSeparateStack::processBasicBlock(MachineBasicBlock *BB, + int64_t StackGrowth, + std::shared_ptr AReqs, + std::shared_ptr SReqs) { + bool Changed = false; + + Reqs.emplace(BB, std::make_pair(AReqs, SReqs)); + + // Last-computed register that points to a variadic argument. + unsigned VAPtr = X86::NoRegister; + + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + bool StringDS, StringES; + + unsigned Opcode = I->getOpcode(); + + switch (Opcode) { + case X86::REP_MOVSB_32: + case X86::REP_MOVSW_32: + case X86::REP_MOVSD_32: + StringDS = true; + StringES = true; + break; + case X86::REP_STOSB_32: + case X86::REP_STOSW_32: + case X86::REP_STOSD_32: + StringDS = false; + StringES = true; + break; + default: + StringDS = false; + StringES = false; + break; + } + + if (StringDS || StringES) { + Changed |= processStringInstr(BB, I, *AReqs, StringDS, StringES); + continue; + } + + const MCInstrDesc &Desc = TII->get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + + int MemoryOperand = -1; + if (I->mayLoadOrStore()) { + // Determine where the memory operand starts, if present. + MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand != -1) + MemoryOperand += X86II::getOperandBias(Desc); + } + + bool HasEVEX_K = TSFlags & X86II::EVEX_K; + bool HasVEX_4V = TSFlags & X86II::VEX_4V; + + unsigned MayDeriveStackPtrFrom = X86::NoRegister; + + // This assumes that the compiler only uses the following forms of + // instructions to store stack pointers (including derived ones) to memory. + // Certain VEX-encoded instructions are specifically excluded to reduce the + // complexity of the subsequent code that computes the operand number of the + // register being stored. See X86MCCodeEmitter::EmitVEXOpcodePrefix for + // information about how these instructions are represented in LLVM. Again, + // this assumes that such instructions are not used to store stack pointers. + MachineOperand *StackPtrStoreSrcOp = nullptr; + if (Opcode == X86::PUSH32r) { + StackPtrStoreSrcOp = &(I->getOperand(0)); + } else if (I->mayLoadOrStore() && !I->isCall() && MemoryOperand != -1) { + bool PossibleSP = + (((TSFlags & X86II::FormMask) == X86II::MRMDestMem) || + ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem)) && + !(HasEVEX_K || HasVEX_4V); + + FrameOp FrmOp = NoFrameOp; + if (FrameReg != StackReg) + FrmOp = checkForRegisterSpill(*I, MemoryOperand, PossibleSP, *AReqs, *SReqs); + + switch (FrmOp) { + case FillRegWithStackPtr: + assert(PossibleSP); + MayDeriveStackPtrFrom = StackReg; + break; + case SpillReg: + break; + default: + if (PossibleSP && I->mayStore()) + StackPtrStoreSrcOp = + &(I->getOperand(MemoryOperand + X86::AddrNumOperands)); + break; + } + } + + assert((StackPtrStoreSrcOp == nullptr || + !AReqs->pointsIntoStack(StackPtrStoreSrcOp->getReg()) || + I->readsRegister(VAPtr) || + (SPStoreVerification == SPStoreVerif::None) || + ((SPStoreVerification == SPStoreVerif::IgnoreVA) && HasVAStart)) && + "The X86FixupSeparateStack pass is unable to track stack " + "pointers that are stored to memory.\n"); + + if (MemoryOperand != -1) + Changed |= processMemAccessInstr(*I, MemoryOperand, *AReqs); + else + MayDeriveStackPtrFrom = mayComputeStackPtr(*I); + + for (MachineOperand &Op : I->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + + unsigned Reg = Op.getReg(); + + if (!TRI->isPhysicalRegister(Reg)) + continue; + + if (MayDeriveStackPtrFrom != X86::NoRegister) { + AReqs->derive(MayDeriveStackPtrFrom, Reg); + continue; + } + + AReqs->markNotDerived(Reg); + } + + if (SPStoreVerification == SPStoreVerif::None || !HasVAStart) + continue; + + if (I->definesRegister(VAPtr)) + VAPtr = X86::NoRegister; + + // FIXME: Support more complex instructions for computing addresses of + // variadic arguments. + if (Opcode == X86::LEA32r && + I->getOperand(2).getImm() == 1 && + I->getOperand(3).getReg() == X86::NoRegister && + I->getOperand(5).getReg() == X86::NoRegister && + ((I->getOperand(1).getReg() == StackReg && + I->getOperand(4).getImm() >= StackGrowth + VAStartFI * -4) || + (StackReg != FrameReg && + I->getOperand(1).getReg() == FrameReg && + I->getOperand(4).getImm() >= VAStartFI * -4))) + // This assumes that this relative stack slot will always correspond to + // the first variadic argument in all invocations of this basic block, + // since it is only checked during the first invocation that is + // encountered by this pass. + VAPtr = I->getOperand(0).getReg(); + + if (!I->definesRegister(StackReg)) + continue; + + // This assumes that only the following instructions are used to update ESP. + switch (Opcode) { + case X86::ADD32ri: + case X86::ADD32ri8: + assert(I->getOperand(0).getReg() == StackReg && + I->getOperand(1).getReg() == StackReg && + "Unsupported instruction"); + StackGrowth -= I->getOperand(2).getImm(); + break; + case X86::SUB32ri: + case X86::SUB32ri8: + assert(I->getOperand(0).getReg() == StackReg && + I->getOperand(1).getReg() == StackReg && + "Unsupported instruction"); + StackGrowth += I->getOperand(2).getImm(); + break; + case X86::LEA32r: + assert(I->getOperand(2).getImm() == 1 && + I->getOperand(3).getReg() == X86::NoRegister && + I->getOperand(5).getReg() == X86::NoRegister && + "Unsupported instruction"); + StackGrowth -= I->getOperand(4).getImm(); + break; + case X86::PUSHi32: + case X86::PUSH32i8: + case X86::PUSH32r: + case X86::PUSH32rmm: + StackGrowth += 4; + break; + case X86::POP32r: + case X86::POP32rmm: + StackGrowth -= 4; + break; + } + + assert(0 <= StackGrowth && "Error occurred while computing stack size"); + } + + for (auto Succ : BB->successors()) { + auto RI = Reqs.find(Succ); + if (RI == Reqs.end()) { + std::shared_ptr SuccAReqs(new AddrRegReqs(AReqs.get())); + std::shared_ptr SuccSReqs( + new StackPtrSpillReqs(SReqs.get())); + Changed |=processBasicBlock(Succ, StackGrowth, SuccAReqs, SuccSReqs); + } else { + RI->second.first->addPredecessor(AReqs.get()); + RI->second.second->addPredecessor(SReqs.get()); + } + } + + return Changed; +} + +bool X86FixupSeparateStack::runOnMachineFunction(MachineFunction &MF) { + STI = &MF.getSubtarget(); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); + FrameReg = TRI->getFrameRegister(MF); + StackReg = TRI->getStackRegister(); + + if (!STI->useSeparateStackSeg()) + return false; + + assert(!STI->is64Bit() && "Only X86-32 is supported."); + + HasVAStart = MF.getFrameInfo()->hasVAStart(); + if (HasVAStart) + VAStartFI = MF.getInfo()->getVarArgsFrameIndex(); + + Reqs.clear(); + + std::shared_ptr InitAReqs(new AddrRegReqs(TRI)); + std::shared_ptr InitSReqs(new StackPtrSpillReqs()); + return processBasicBlock(&MF.front(), 0, InitAReqs, InitSReqs); +} Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -269,6 +269,9 @@ /// Use software floating point for code generation. bool UseSoftFloat; + /// Use a separate stack segment + bool UseSeparateStackSeg; + /// The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -448,6 +451,7 @@ bool isAtom() const { return X86ProcFamily == IntelAtom; } bool isSLM() const { return X86ProcFamily == IntelSLM; } bool useSoftFloat() const { return UseSoftFloat; } + bool useSeparateStackSeg() const { return UseSeparateStackSeg; } /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for /// no-sse2). There isn't any reason to disable it if the target processor Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -333,6 +333,7 @@ // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; UseSoftFloat = false; + UseSeparateStackSeg = false; } X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -284,4 +284,6 @@ addPass(createX86PadShortFunctions()); addPass(createX86FixupLEAs()); } + + addPass(createX86FixupSeparateStack()); }