diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -994,6 +994,21 @@ return false; } + /// Given a call instr, find the boundary of the function call. + /// Return true on success. + virtual bool getCallBoundary( + MachineInstr &MI, + std::pair + &CallBoundary) const { + return false; + } + + // Given a call instr, return the Function pointer of the callee. + // Only returns not-null values when MI is a direct call. + virtual const Function *getDirectCallsCallee(const MachineInstr &MI) const { + return nullptr; + } + protected: /// Target-dependent implementation for IsCopyInstr. /// If the specific machine instruction is a instruction that moves/copies diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -15,6 +15,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/MapVector.h" @@ -24,8 +25,10 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -38,6 +41,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -56,6 +60,8 @@ #include #include #include +#include +#include #include #include #include @@ -106,6 +112,10 @@ cl::desc("The maximum number of instructions considered for cycle sinking."), cl::init(50), cl::Hidden); +static cl::opt SinkCalls("machine-sink-calls", + cl::desc("Sink call instructions."), + cl::init(false), cl::Hidden); + STATISTIC(NumSunk, "Number of machine instructions sunk"); STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle"); STATISTIC(NumSplit, "Number of critical edges split"); @@ -251,7 +261,10 @@ AllSuccsCache &AllSuccessors) const; std::vector &getBBRegisterPressure(MachineBasicBlock &MBB); - }; + + int SinkCallInstr(MachineInstr &MI, MachineBasicBlock::iterator &PreInstr, + AllSuccsCache &AllSuccessors, bool &ProcessedBegin); +}; } // end anonymous namespace @@ -508,6 +521,370 @@ return EverMadeChange; } +/// If the sunk instruction is a copy, try to forward the copy instead of +/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if +/// there's any subregister weirdness involved. Returns true if copy +/// propagation occurred. +static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI, + Register Reg) { + const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo(); + const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo(); + + // Copy DBG_VALUE operand and set the original to undef. We then check to + // see whether this is something that can be copy-forwarded. If it isn't, + // continue around the loop. + + const MachineOperand *SrcMO = nullptr, *DstMO = nullptr; + auto CopyOperands = TII.isCopyInstr(SinkInst); + if (!CopyOperands) + return false; + SrcMO = CopyOperands->Source; + DstMO = CopyOperands->Destination; + + // Check validity of forwarding this copy. + bool PostRA = MRI.getNumVirtRegs() == 0; + + // Trying to forward between physical and virtual registers is too hard. + if (Reg.isVirtual() != SrcMO->getReg().isVirtual()) + return false; + + // Only try virtual register copy-forwarding before regalloc, and physical + // register copy-forwarding after regalloc. + bool arePhysRegs = !Reg.isVirtual(); + if (arePhysRegs != PostRA) + return false; + + // Pre-regalloc, only forward if all subregisters agree (or there are no + // subregs at all). More analysis might recover some forwardable copies. + if (!PostRA) + for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) + if (DbgMO.getSubReg() != SrcMO->getSubReg() || + DbgMO.getSubReg() != DstMO->getSubReg()) + return false; + + // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register + // of this copy. Only forward the copy if the DBG_VALUE operand exactly + // matches the copy destination. + if (PostRA && Reg != DstMO->getReg()) + return false; + + for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) { + DbgMO.setReg(SrcMO->getReg()); + DbgMO.setSubReg(SrcMO->getSubReg()); + } + return true; +} + +static bool InstrModifiesMem(const MachineInstr &MI, + const TargetInstrInfo *TII) { + if (MI.mayStore()) + return true; + + // We also need to check there is no call to mem-write functions. + if (MI.isCall()) { + const Function *Func = TII->getDirectCallsCallee(MI); + if (!Func) + return true; // This is an indrect call, we can't tell if it + // modifies mem or not, so just bail out + if (!Func->onlyReadsMemory()) { + return true; + } + } + + if (MI.hasUnmodeledSideEffects()) + return true; + + return false; +} + +/// Perform DFS to find every possible path from SrcBB to DestBB, given no +/// loop in the path. For each in-path node, check to make sure there is no +/// mem-modifying instr. Nodes between SrcBB and DestBB are stored into InPath, +/// in case if any in-path node can be reached from that loop, we can check +/// the nodes in the loop. +/// SrcBB is pushed to the stack before calling this function. +/// Return true if there is any store-like instr found along the path from +/// SrcBB to DestBB +static bool DepthFirstTraverse(MachineBasicBlock &DestBB, BitVector &Visited, + DenseSet &InPath, + std::deque &Path, + const TargetInstrInfo *TII) { + if (!Path.empty()) { + MachineBasicBlock &CurBB = *Path.back(); + + for (MachineBasicBlock *Succ : CurBB.successors()) { + if (Succ == &DestBB) { // found a path from SrcBB to DestBB + for (MachineBasicBlock *MBB : + make_range(std::next(Path.begin()), Path.end())) { + // check all BBs along the path + if (!InPath.contains(MBB)) { + // make sure there is no store in this BB + for (MachineInstr &CurInstr : *MBB) { + if (InstrModifiesMem(CurInstr, TII)) + return true; + } + InPath.insert(MBB); + } + } + } + + if (!Visited.test(Succ->getNumber())) { + Path.push_back(Succ); + Visited.set(Succ->getNumber()); + + if (DepthFirstTraverse(DestBB, Visited, InPath, Path, TII)) + return true; + } + } + + Path.pop_back(); + } + + return false; +} + +using MIRegs = std::pair>; +int MachineSinking::SinkCallInstr(MachineInstr &MI, + MachineBasicBlock::iterator &PreInstr, + AllSuccsCache &AllSuccessors, + bool &ProcessedBegin) { + if (!MI.isCall()) + return 0; + std::pair Boundary; + + if (!TII->getCallBoundary(MI, Boundary)) + return 0; + MachineBasicBlock::iterator &Begin = Boundary.first, &End = Boundary.second; + MachineBasicBlock &SrcBB = *MI.getParent(); + + // collect registers used in the call sequence + SmallVector DefPhysRegs, PhysRegs, DefVirtRegs; + SmallVector AllRegs; + + for (MachineInstr &CurInstr : make_range(Begin, End)) { + for (const MachineOperand &MO : CurInstr.operands()) { + if (!MO.isReg()) + continue; // Ignore non-register operands. + + Register Reg = MO.getReg(); + if (!Reg.isValid()) + continue; + + AllRegs.push_back(Reg); + + if (Reg.isVirtual()) { + if (MO.isUse()) { + continue; + } + DefVirtRegs.push_back(Reg); + + // If it's not safe to move defs of the register class, then abort. + if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) + return 0; + + } else { + assert(Reg.isPhysical()); + PhysRegs.push_back(Reg); + if (MO.isDef()) + DefPhysRegs.push_back(Reg); + } + } + } + + DenseSet UseBBs; + for (Register Reg : DefVirtRegs) { + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + MachineBasicBlock::iterator CurInstrIter = MO.getParent()->getIterator(); + if (MO.getParent()->getParent() == &SrcBB && + any_of(make_range(std::next(Begin), End), [&](MachineInstr &CurrMI) { + return CurInstrIter == CurrMI.getIterator(); + })) { + continue; // this use is within the calling sequence, we don't need to + // worry about it + } + + MachineInstr &UseInstr = *MO.getParent(); + if (UseInstr.isPHI()) { + unsigned OpNo = UseInstr.getOperandNo(&MO); + MachineBasicBlock *MBB = UseInstr.getOperand(OpNo + 1).getMBB(); + if (MBB != &SrcBB) + UseBBs.insert(MBB); + continue; + } + UseBBs.insert(UseInstr.getParent()); + } + } + + if (UseBBs.empty()) + return 0; + + MachineBasicBlock *DestBBPtr = nullptr; + // find a BB that dominates all uses of the def'ed vregs in call sequence + for (MachineBasicBlock *MBB : UseBBs) { + if (MBB == &SrcBB) + return 0; // there is local use, can't sink + + if (!DestBBPtr) { + DestBBPtr = MBB; + continue; + } + DestBBPtr = DT->findNearestCommonDominator(DestBBPtr, MBB); + if (!DestBBPtr || DestBBPtr == &SrcBB) + return 0; + } + + for (Register Reg : DefVirtRegs) { + MachineInstr &CurInstr = *MRI->getOneDef(Reg)->getParent(); + if (!isProfitableToSinkTo(Reg, CurInstr, &SrcBB, DestBBPtr, + AllSuccessors)) // (todo:T124759579) + return 0; + } + + MachineBasicBlock &DestBB = *DestBBPtr; + + // Make sure mem ops in the called function does not interfere with + // local mem ops. Basically, there are 4 types of functions: + // 1: does not access mem + // 2: only read mem + // 3: only write mem + // 4: read and write mem + // If the function does not access mem, we can safely sink it. Otherwise, + // we can only sink if the function only reads mem and there is no store + // along the path. + const Function *Func = TII->getDirectCallsCallee(MI); + if (!Func) + return 0; // a null Function pointer means this is an indrect call + + if (!Func->doesNotAccessMemory()) { + // only need to check when Func actually access mem + if (!Func->onlyReadsMemory()) { + // there is store in Func, abort + return 0; + } + + // make sure there is no store after the call instr + for (MachineInstr &CurInstr : make_range(End, SrcBB.end())) { + if (InstrModifiesMem(CurInstr, TII)) + return 0; + } + + // Go through all reachable blocks along the path from SrcBB to DestBB and + // make sure there is no store within them + BitVector Visited(SrcBB.getParent()->getNumBlockIDs()); + DenseSet InPath; + std::deque Path; + Path.push_back(&SrcBB); + Visited.set(SrcBB.getNumber()); + if (DepthFirstTraverse(DestBB, Visited, InPath, Path, TII)) + return 0; + + // If any BB in the Path can be reached from a cycle, we have to check + // all BBs in that cycle as well. + DenseSet CheckedCycles; + DenseSet CheckedMBBs(InPath.begin(), InPath.end()); + for (MachineBasicBlock *MBB : InPath) { + // There could be a loop that is connected to an in-path node by a + // bidirectional edge. Checking the predecessors will include this case, + // as well as the case where an in-path node is within the loop. + for (MachineBasicBlock *PredBB : MBB->predecessors()) { + MachineCycle *CurrCycle = CI->getCycle(PredBB); + if (CurrCycle && !CheckedCycles.contains(CurrCycle)) { + CheckedCycles.insert(CurrCycle); + + for (MachineBasicBlock *CycleMBB : + make_range(CurrCycle->block_begin(), CurrCycle->block_end())) { + if (CheckedMBBs.contains(CycleMBB)) + continue; + CheckedMBBs.insert(CycleMBB); + + for (MachineInstr &MI : *CycleMBB) { + if (InstrModifiesMem(MI, TII)) + return 0; + } + } + } + } + } + } + + // make sure phys regs are not live-out of the current block + // Use LiveRegUnits to analyze liveness of Phys Regs + LiveRegUnits RegUnits(*TRI); + RegUnits.addLiveOuts(SrcBB); + + for (MachineBasicBlock::iterator Iter = std::prev(SrcBB.end()); + Iter != std::prev(End); --Iter) { + MachineInstr &CurrMI = *Iter; + RegUnits.stepBackward(CurrMI); + } + + for (Register Reg : DefPhysRegs) { + if (!RegUnits.available(Reg)) + return 0; // The Defed PhysReg is used after the call sequence, can't sink + } + + if (Begin == SrcBB.begin()) + ProcessedBegin = true; + else + PreInstr = std::prev(Begin); + + // count # of instrs moved + NumSunk = std::distance(Begin, End); + + // Collect debug users of any vreg that this inst defines. + SmallVector DbgUsersToSink; + + for (MachineInstr &CurInstr : make_range(Begin, End)) { + for (MachineOperand &MO : CurInstr.operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual()) + continue; + if (!SeenDbgUsers.count(MO.getReg())) + continue; + + // Sink any users that don't pass any other DBG_VALUEs for this variable. + auto &Users = SeenDbgUsers[MO.getReg()]; + for (auto &User : Users) { + MachineInstr *DbgMI = User.getPointer(); + if (User.getInt()) { + // This DBG_VALUE would re-order assignments. If we can't + // copy-propagate it, it can't be recovered. Set it undef. + if (!attemptDebugCopyProp(CurInstr, *DbgMI, MO.getReg())) + DbgMI->setDebugValueUndef(); + } else { + DbgUsersToSink.push_back( + {DbgMI, SmallVector(1, MO.getReg())}); + } + } + } + + // After sinking, some debug users may not be dominated any more. If + // possible, copy-propagate their operands. As it's expensive, don't do this + // if there's no debuginfo in the program. + if (CurInstr.getMF()->getFunction().getSubprogram() && CurInstr.isCopy()) + SalvageUnsunkDebugUsersOfCopy(CurInstr, &DestBB); + } + + // Move the instrs to the beginning of the DestBB. That way we can avoid + // checking if the PhysRegs are alive before UseInstr. + DestBB.splice(DestBB.SkipPHIsAndLabels(DestBB.begin()), &SrcBB, Begin, End); + LLVM_DEBUG(dbgs() << "Sank " << NumSunk << " call-related instrs\n"); + + // Conservatively, clear any kill flags, since it's possible that they are no + // longer correct. + // Note that we have to clear the kill flags for any register this instruction + // uses as we may sink over another instruction which currently kills the + // used registers. + for (Register &Reg : AllRegs) { + for (MachineOperand &MO : MRI->use_operands(Reg)) { + if (MO.isUse()) + RegsToClearKillFlags.insert( + MO.getReg()); // Remember to clear kill flags. + } + } + + return NumSunk; +} + bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { // Can't sink anything out of a block that has less than two successors. if (MBB.succ_size() <= 1 || MBB.empty()) return false; @@ -547,6 +924,14 @@ continue; } + int NumCallSunk = 0; + if (SinkCalls && MI.isCall() && + (NumCallSunk = SinkCallInstr(MI, I, AllSuccessors, ProcessedBegin))) { + MadeChange = true; + NumSunk += NumCallSunk; + continue; + } + if (SinkInstruction(MI, SawStore, AllSuccessors)) { ++NumSunk; MadeChange = true; @@ -1033,61 +1418,6 @@ MBP.LHS.getReg() == BaseOp->getReg(); } -/// If the sunk instruction is a copy, try to forward the copy instead of -/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if -/// there's any subregister weirdness involved. Returns true if copy -/// propagation occurred. -static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI, - Register Reg) { - const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo(); - const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo(); - - // Copy DBG_VALUE operand and set the original to undef. We then check to - // see whether this is something that can be copy-forwarded. If it isn't, - // continue around the loop. - - const MachineOperand *SrcMO = nullptr, *DstMO = nullptr; - auto CopyOperands = TII.isCopyInstr(SinkInst); - if (!CopyOperands) - return false; - SrcMO = CopyOperands->Source; - DstMO = CopyOperands->Destination; - - // Check validity of forwarding this copy. - bool PostRA = MRI.getNumVirtRegs() == 0; - - // Trying to forward between physical and virtual registers is too hard. - if (Reg.isVirtual() != SrcMO->getReg().isVirtual()) - return false; - - // Only try virtual register copy-forwarding before regalloc, and physical - // register copy-forwarding after regalloc. - bool arePhysRegs = !Reg.isVirtual(); - if (arePhysRegs != PostRA) - return false; - - // Pre-regalloc, only forward if all subregisters agree (or there are no - // subregs at all). More analysis might recover some forwardable copies. - if (!PostRA) - for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) - if (DbgMO.getSubReg() != SrcMO->getSubReg() || - DbgMO.getSubReg() != DstMO->getSubReg()) - return false; - - // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register - // of this copy. Only forward the copy if the DBG_VALUE operand exactly - // matches the copy destination. - if (PostRA && Reg != DstMO->getReg()) - return false; - - for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) { - DbgMO.setReg(SrcMO->getReg()); - DbgMO.setSubReg(SrcMO->getSubReg()); - } - return true; -} - -using MIRegs = std::pair>; /// Sink an instruction and its associated debug instructions. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -328,6 +328,18 @@ SmallVectorImpl &Cond, bool AllowModify) const override; + /// Given a call instr, find the boundary of the function call. + /// Return true on success. + virtual bool getCallBoundary( + MachineInstr &MI, + std::pair + &CallBoundary) const override; + + // Given a call instr, return the Function pointer of the callee. + // Only returns not-null values when MI is a direct call. + virtual const Function * + getDirectCallsCallee(const MachineInstr &MI) const override; + Optional getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -17,11 +17,13 @@ #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -3179,6 +3181,96 @@ return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify); } +bool X86InstrInfo::getCallBoundary( + MachineInstr &MI, + std::pair + &CallBoundary) const { + if (!MI.isCall()) + return false; + + MachineBasicBlock::iterator Iter; + MachineBasicBlock::iterator CallBegin, CallEnd; + MachineBasicBlock &MBB = *MI.getParent(); + + for (Iter = --MI.getIterator(); Iter != --MBB.begin(); --Iter) { + if (Iter->getOpcode() == X86::ADJCALLSTACKDOWN64) { + CallBegin = Iter; + break; + } + } + + DenseSet StoreOpcodes({X86::MOV16mr, X86::MOV16mi, X86::MOV32mr, + X86::MOV32mi, X86::MOV64mr, X86::MOV64mi32}); + // make sure there is no unnecessary instrs in the sequence + for (Iter = std::next(CallBegin); Iter != MI.getIterator(); ++Iter) { + if (Iter->getOpcode() == X86::COPY) { + if (Iter->getOperand(0).getReg().isPhysical() && + Iter->getOperand(1).getReg().isVirtual()) { + continue; // this is copy param into phys reg + } else if (Iter->getOperand(0).getReg().isVirtual() && + Iter->getOperand(1).getReg() == X86::RSP) { + // this is an instr that saves %rsp to vreg + // go on to check if is a sequence that moves params into the stack + Register RegSP = Iter->getOperand(0).getReg(); + bool SeenOneStore = false; + for (++Iter; Iter != MI.getIterator() && + StoreOpcodes.contains(Iter->getOpcode()); + ++Iter) { + if (Iter->getOperand(2).getReg().isValid() || + Iter->getOperand(4).getReg().isValid()) + return false; // a reg is used to compute the mem addr, abort + + SeenOneStore = true; + if (Iter->getOperand(0).getReg() != RegSP) + return false; + } + + if (!SeenOneStore) + return false; // there should be at least one val saved to the stack + } + } else { // this instr is not part of the call sequence + return false; + } + } + + for (Iter = ++MI.getIterator(); Iter != MBB.end(); ++Iter) { + if (Iter->getOpcode() == X86::ADJCALLSTACKUP64) { + CallEnd = Iter; + break; + } + } + // Currently we assume sequence of call instrs ends with copying instrs whose + // source is a PhysReg. + for (++CallEnd; CallEnd->getOpcode() == X86::COPY && + CallEnd->getOperand(1).getReg().isPhysical(); + ++CallEnd) + ; + + CallBoundary.first = CallBegin; + CallBoundary.second = CallEnd; + return true; +} + +const Function * +X86InstrInfo::getDirectCallsCallee(const MachineInstr &MI) const { + if (MI.getOpcode() != X86::CALL64pcrel32 && + MI.getOpcode() != X86::CALLpcrel32) + return nullptr; + + const Function *Func = nullptr; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isGlobal()) + continue; + // locate the Function being called + Func = dyn_cast(MO.getGlobal()); + if (!Func) + return nullptr; + + break; + } + return Func; +} + bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const { diff --git a/llvm/test/CodeGen/X86/machine-sink-call-8-param-callee.mir b/llvm/test/CodeGen/X86/machine-sink-call-8-param-callee.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-sink-call-8-param-callee.mir @@ -0,0 +1,154 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-none-linux-gnu -run-pass=machine-sink -machine-sink-calls -verify-machineinstrs -o - %s | FileCheck %s + +--- | + @a = dso_local global i32 0, align 4 + + ; Function Attrs: mustprogress uwtable + define dso_local noundef i32 @_Z3bazPciiiiiiii(ptr nocapture noundef readnone %c, i32 noundef %cond1, i32 noundef %cond2, i32 noundef %cond3, i32 noundef %cond4, i32 noundef %cond5, i32 noundef %cond6, i32 noundef %cond7, i32 noundef %cond8) local_unnamed_addr #0 { + unreachable + } + + declare noundef i32 @_Z3fooiiiiiiii(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #1 + + attributes #1 = {readonly} + + !5 = !{!6, !6, i64 0} + !6 = !{!"int", !7, i64 0} + !7 = !{!"omnipotent char", !8, i64 0} + !8 = !{!"Simple C++ TBAA"} + +... +--- +name: _Z3bazPciiiiiiii +tracksRegLiveness: true +fixedStack: + - { id: 0, type: default, offset: 16, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: default, offset: 8, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + ; CHECK-LABEL: name: _Z3bazPciiiiiiii + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x30000000), %bb.4(0x50000000) + ; CHECK-NEXT: liveins: $esi, $edx, $ecx, $r8d, $r9d + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $r9d + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $r8d + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $ecx + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr32 = COPY $edx + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: TEST32rr [[COPY4]], [[COPY4]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 1 + ; CHECK-NEXT: [[MOV32ri1:%[0-9]+]]:gr32 = MOV32ri 10 + ; CHECK-NEXT: JMP_1 %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.6(0x30000000), %bb.5(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TEST32rr [[COPY3]], [[COPY3]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.5, 5, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16) + ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1, align 8) + ; CHECK-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.2, align 16) + ; CHECK: ADJCALLSTACKDOWN64 16, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY $rsp + ; CHECK-NEXT: MOV32mr [[COPY5]], 1, $noreg, 8, $noreg, [[MOV32rm2]] :: (store (s32) into stack + 8) + ; CHECK-NEXT: MOV32mr [[COPY5]], 1, $noreg, 0, $noreg, [[MOV32rm1]] :: (store (s32) into stack) + ; CHECK-NEXT: $edi = COPY [[COPY4]] + ; CHECK-NEXT: $esi = COPY [[COPY3]] + ; CHECK-NEXT: $edx = COPY [[COPY2]] + ; CHECK-NEXT: $ecx = COPY [[COPY1]] + ; CHECK-NEXT: $r8d = COPY [[COPY]] + ; CHECK-NEXT: $r9d = COPY [[MOV32rm]] + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @_Z3fooiiiiiiii, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit $ecx, implicit $r8d, implicit $r9d, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + ; CHECK-NEXT: ADJCALLSTACKUP64 16, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY $eax + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr32 = COPY [[COPY6]] + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV32ri2:%[0-9]+]]:gr32 = MOV32ri 200 + ; CHECK-NEXT: [[MOV32ri3:%[0-9]+]]:gr32 = MOV32ri 20 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32ri1]], %bb.4, [[MOV32ri3]], %bb.5 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32ri]], %bb.4, [[MOV32ri2]], %bb.5 + ; CHECK-NEXT: MOV32mr $rip, 1, $noreg, @a, $noreg, [[PHI]] :: (volatile store (s32) into @a, !tbaa !0) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr32 = PHI [[COPY7]], %bb.6, [[PHI1]], %bb.2 + ; CHECK-NEXT: $eax = COPY [[PHI2]] + ; CHECK-NEXT: RET 0, $eax + bb.0: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $esi, $edx, $ecx, $r8d, $r9d + + %9:gr32 = COPY $r9d + %8:gr32 = COPY $r8d + %7:gr32 = COPY $ecx + %6:gr32 = COPY $edx + %5:gr32 = COPY $esi + %12:gr32 = MOV32rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.2, align 16) + %13:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1, align 8) + %14:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16) + ADJCALLSTACKDOWN64 16, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %15:gr64 = COPY $rsp + MOV32mr %15, 1, $noreg, 8, $noreg, killed %14 :: (store (s32) into stack + 8) + MOV32mr %15, 1, $noreg, 0, $noreg, killed %13 :: (store (s32) into stack) + $edi = COPY %5 + $esi = COPY %6 + $edx = COPY %7 + $ecx = COPY %8 + $r8d = COPY %9 + $r9d = COPY %12 + CALL64pcrel32 target-flags(x86-plt) @_Z3fooiiiiiiii, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit $ecx, implicit $r8d, implicit $r9d, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + ADJCALLSTACKUP64 16, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %16:gr32 = COPY $eax + %0:gr32 = COPY %16 + %11:gr32 = MOV32ri 1 + %10:gr32 = MOV32ri 10 + TEST32rr %5, %5, implicit-def $eflags + JCC_1 %bb.2, 5, implicit $eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.3(0x30000000), %bb.2(0x50000000) + + %18:gr32 = MOV32ri 200 + %17:gr32 = MOV32ri 20 + TEST32rr %6, %6, implicit-def $eflags + JCC_1 %bb.3, 4, implicit $eflags + JMP_1 %bb.2 + + bb.2: + successors: %bb.3(0x80000000) + + %1:gr32 = PHI %10, %bb.0, %17, %bb.1 + %2:gr32 = PHI %11, %bb.0, %18, %bb.1 + MOV32mr $rip, 1, $noreg, @a, $noreg, %1 :: (volatile store (s32) into @a, !tbaa !5) + + bb.3: + %3:gr32 = PHI %0, %bb.1, %2, %bb.2 + $eax = COPY %3 + RET 0, $eax + +... diff --git a/llvm/test/CodeGen/X86/machine-sink-call-find-nearest-dom.mir b/llvm/test/CodeGen/X86/machine-sink-call-find-nearest-dom.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-sink-call-find-nearest-dom.mir @@ -0,0 +1,138 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-none-linux-gnu -run-pass=machine-sink -machine-sink-calls -verify-machineinstrs -o - %s | FileCheck %s + +--- | + ; ModuleID = 'bar-nearest-comm-dom.ll' + source_filename = "bar-nearest-comm-dom.cpp" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + @a = dso_local global i32 0, align 4 + + ; Function Attrs: mustprogress nofree nounwind uwtable + define dso_local noundef i32 @_Z3barPcii(ptr nocapture noundef readonly %c, i32 noundef %cond1, i32 noundef %cond2) local_unnamed_addr #0 { + unreachable + } + + ; Function Attrs: argmemonly mustprogress nofree nounwind readonly willreturn + declare i64 @strlen(ptr nocapture noundef) local_unnamed_addr #1 + + attributes #0 = { mustprogress nofree nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } + attributes #1 = { argmemonly mustprogress nofree nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } + attributes #2 = { nounwind readonly willreturn } + + !llvm.module.flags = !{!0, !1, !2, !3} + !llvm.ident = !{!4} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 7, !"PIC Level", i32 2} + !2 = !{i32 7, !"PIE Level", i32 2} + !3 = !{i32 7, !"uwtable", i32 2} + !4 = !{!"clang version 15.0.0 (ssh://git.vip.facebook.com/data/gitrepos/osmeta/external/llvm-project a5220c46584b115a1bed4ae2448a74556035039f)"} + !5 = !{!6, !6, i64 0} + !6 = !{!"int", !7, i64 0} + !7 = !{!"omnipotent char", !8, i64 0} + !8 = !{!"Simple C++ TBAA"} + +... +--- +name: _Z3barPcii +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: _Z3barPcii + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK-NEXT: liveins: $rdi, $esi, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK-NEXT: TEST32rr [[COPY1]], [[COPY1]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: MOV32mi $rip, 1, $noreg, @a, $noreg, 10 :: (volatile store (s32) into @a, !tbaa !5) + ; CHECK-NEXT: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 1 + ; CHECK-NEXT: JMP_1 %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.3(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: $rdi = COPY [[COPY2]] + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @strlen, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[COPY3]].sub_32bit + ; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.4, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: MOV32mi $rip, 1, $noreg, @a, $noreg, 20 :: (volatile store (s32) into @a, !tbaa !5) + ; CHECK-NEXT: [[ADD32ri:%[0-9]+]]:gr32 = nsw ADD32ri [[COPY4]], 200, implicit-def dead $eflags + ; CHECK-NEXT: JMP_1 %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADD32ri8_:%[0-9]+]]:gr32 = nsw ADD32ri8 [[COPY4]], 2, implicit-def dead $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32ri]], %bb.1, [[ADD32ri]], %bb.3, [[ADD32ri8_]], %bb.4 + ; CHECK-NEXT: $eax = COPY [[PHI]] + ; CHECK-NEXT: RET 0, $eax + bb.0: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $rdi, $esi, $edx + + %6:gr32 = COPY $edx + %5:gr32 = COPY $esi + %4:gr64 = COPY $rdi + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %4 + CALL64pcrel32 target-flags(x86-plt) @strlen, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %7:gr64 = COPY $rax + %0:gr32 = COPY %7.sub_32bit + TEST32rr %5, %5, implicit-def $eflags + JCC_1 %bb.2, 4, implicit $eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.5(0x80000000) + + MOV32mi $rip, 1, $noreg, @a, $noreg, 10 :: (volatile store (s32) into @a, !tbaa !5) + %8:gr32 = MOV32ri 1 + JMP_1 %bb.5 + + bb.2: + successors: %bb.4(0x30000000), %bb.3(0x50000000) + + TEST32rr %6, %6, implicit-def $eflags + JCC_1 %bb.4, 4, implicit $eflags + JMP_1 %bb.3 + + bb.3: + successors: %bb.5(0x80000000) + + MOV32mi $rip, 1, $noreg, @a, $noreg, 20 :: (volatile store (s32) into @a, !tbaa !5) + %1:gr32 = nsw ADD32ri %0, 200, implicit-def dead $eflags + JMP_1 %bb.5 + + bb.4: + successors: %bb.5(0x80000000) + + %2:gr32 = nsw ADD32ri8 %0, 2, implicit-def dead $eflags + + bb.5: + %3:gr32 = PHI %8, %bb.1, %1, %bb.3, %2, %bb.4 + $eax = COPY %3 + RET 0, $eax + +... diff --git a/llvm/test/CodeGen/X86/machine-sink-call-func-ptr.mir b/llvm/test/CodeGen/X86/machine-sink-call-func-ptr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-sink-call-func-ptr.mir @@ -0,0 +1,77 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-none-linux-gnu -run-pass=machine-sink -machine-sink-calls -verify-machineinstrs -o - %s | FileCheck %s +--- | + + define dso_local noundef i32 @_Z3fooiiPFiiE(i32 %i1, i32 %i2, ptr %bar) { + unreachable + } +... +--- +name: _Z3fooiiPFiiE +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: _Z3fooiiPFiiE + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $edi, $esi, $rdx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[COPY2]], [[COPY1]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 14, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADD32rr:%[0-9]+]]:gr32 = nsw ADD32rr [[COPY1]], [[COPY2]], implicit-def dead $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: $edi = COPY [[COPY1]] + ; CHECK-NEXT: CALL64r [[COPY]], csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr32 = COPY $eax + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[COPY3]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[COPY4]], %bb.2, [[ADD32rr]], %bb.1 + ; CHECK-NEXT: $eax = COPY [[PHI]] + ; CHECK-NEXT: RET 0, $eax + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $edi, $esi, $rdx + + %5:gr64 = COPY $rdx + %4:gr32 = COPY $esi + %3:gr32 = COPY $edi + %6:gr32 = SUB32rr %3, %4, implicit-def $eflags + JCC_1 %bb.2, 14, implicit $eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.3(0x80000000) + + %0:gr32 = nsw ADD32rr %4, %3, implicit-def dead $eflags + JMP_1 %bb.3 + + bb.2: + successors: %bb.3(0x80000000) + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $edi = COPY %4 + CALL64r %5, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %7:gr32 = COPY $eax + %1:gr32 = COPY %7 + + bb.3: + %2:gr32 = PHI %1, %bb.2, %0, %bb.1 + $eax = COPY %2 + RET 0, $eax + +... diff --git a/llvm/test/CodeGen/X86/machine-sink-call-multi-read-to-ret-val.mir b/llvm/test/CodeGen/X86/machine-sink-call-multi-read-to-ret-val.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-sink-call-multi-read-to-ret-val.mir @@ -0,0 +1,160 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-none-linux-gnu -run-pass=machine-sink -machine-sink-calls -verify-machineinstrs -o - %s | FileCheck %s +--- | + @a = dso_local global i32 0, align 4 + + define dso_local noundef i32 @_Z3bazPciiiiiiii(ptr nocapture noundef readnone %c, i32 noundef %cond1, i32 noundef %cond2, i32 noundef %cond3, i32 noundef %cond4, i32 noundef %cond5, i32 noundef %cond6, i32 noundef %cond7, i32 noundef %cond8) local_unnamed_addr #0 { + unreachable + } + + declare i64 @_Z3fooiiiiiiii(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #1 + + attributes #1 = { readonly } + + !5 = !{!6, !6, i64 0} + !6 = !{!"int", !7, i64 0} + !7 = !{!"omnipotent char", !8, i64 0} + !8 = !{!"Simple C++ TBAA"} + +... +--- +name: _Z3bazPciiiiiiii +tracksRegLiveness: true +fixedStack: + - { id: 0, type: default, offset: 16, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: default, offset: 8, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + ; CHECK-LABEL: name: _Z3bazPciiiiiiii + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK-NEXT: liveins: $esi, $edx, $ecx, $r8d, $r9d + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $r9d + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $r8d + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $ecx + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr32 = COPY $edx + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: TEST32rr [[COPY4]], [[COPY4]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: MOV32mi $rip, 1, $noreg, @a, $noreg, 10 :: (volatile store (s32) into @a, !tbaa !0) + ; CHECK-NEXT: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 1 + ; CHECK-NEXT: JMP_1 %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.3(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TEST32rr [[COPY3]], [[COPY3]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.4, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: MOV32mi $rip, 1, $noreg, @a, $noreg, 20 :: (volatile store (s32) into @a, !tbaa !0) + ; CHECK-NEXT: [[MOV32ri1:%[0-9]+]]:gr32 = MOV32ri 200 + ; CHECK-NEXT: JMP_1 %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16) + ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1, align 8) + ; CHECK-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.2, align 16) + ; CHECK-NEXT: ADJCALLSTACKDOWN64 16, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY $rsp + ; CHECK-NEXT: MOV32mr [[COPY5]], 1, $noreg, 8, $noreg, [[MOV32rm2]] :: (store (s32) into stack + 8) + ; CHECK-NEXT: MOV32mr [[COPY5]], 1, $noreg, 0, $noreg, [[MOV32rm1]] :: (store (s32) into stack) + ; CHECK-NEXT: $edi = COPY [[COPY4]] + ; CHECK-NEXT: $esi = COPY [[COPY3]] + ; CHECK-NEXT: $edx = COPY [[COPY2]] + ; CHECK-NEXT: $ecx = COPY [[COPY1]] + ; CHECK-NEXT: $r8d = COPY [[COPY]] + ; CHECK-NEXT: $r9d = COPY [[MOV32rm]] + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @_Z3fooiiiiiiii, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit $ecx, implicit $r8d, implicit $r9d, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 16, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr32 = COPY [[COPY6]].sub_32bit + ; CHECK-NEXT: [[SHR64ri:%[0-9]+]]:gr64 = SHR64ri [[COPY6]], 32, implicit-def dead $eflags + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr32 = COPY [[SHR64ri]].sub_32bit + ; CHECK-NEXT: [[ADD32rr:%[0-9]+]]:gr32 = nsw ADD32rr [[COPY8]], [[COPY7]], implicit-def dead $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32ri]], %bb.1, [[MOV32ri1]], %bb.3, [[ADD32rr]], %bb.4 + ; CHECK-NEXT: $eax = COPY [[PHI]] + ; CHECK-NEXT: RET 0, $eax + bb.0: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $esi, $edx, $ecx, $r8d, $r9d + + %9:gr32 = COPY $r9d + %8:gr32 = COPY $r8d + %7:gr32 = COPY $ecx + %6:gr32 = COPY $edx + %5:gr32 = COPY $esi + %10:gr32 = MOV32rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.2, align 16) + %11:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1, align 8) + %12:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16) + ADJCALLSTACKDOWN64 16, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %13:gr64 = COPY $rsp + MOV32mr %13, 1, $noreg, 8, $noreg, killed %12 :: (store (s32) into stack + 8) + MOV32mr %13, 1, $noreg, 0, $noreg, killed %11 :: (store (s32) into stack) + $edi = COPY %5 + $esi = COPY %6 + $edx = COPY %7 + $ecx = COPY %8 + $r8d = COPY %9 + $r9d = COPY %10 + CALL64pcrel32 target-flags(x86-plt) @_Z3fooiiiiiiii, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit $ecx, implicit $r8d, implicit $r9d, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ADJCALLSTACKUP64 16, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %14:gr64 = COPY $rax + %0:gr32 = COPY %14.sub_32bit + %15:gr64 = SHR64ri %14, 32, implicit-def dead $eflags + %1:gr32 = COPY %15.sub_32bit + TEST32rr %5, %5, implicit-def $eflags + JCC_1 %bb.2, 4, implicit $eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.5(0x80000000) + + MOV32mi $rip, 1, $noreg, @a, $noreg, 10 :: (volatile store (s32) into @a, !tbaa !5) + %16:gr32 = MOV32ri 1 + JMP_1 %bb.5 + + bb.2: + successors: %bb.4(0x30000000), %bb.3(0x50000000) + + TEST32rr %6, %6, implicit-def $eflags + JCC_1 %bb.4, 4, implicit $eflags + JMP_1 %bb.3 + + bb.3: + successors: %bb.5(0x80000000) + + MOV32mi $rip, 1, $noreg, @a, $noreg, 20 :: (volatile store (s32) into @a, !tbaa !5) + %17:gr32 = MOV32ri 200 + JMP_1 %bb.5 + + bb.4: + successors: %bb.5(0x80000000) + + %2:gr32 = nsw ADD32rr %1, %0, implicit-def dead $eflags + + bb.5: + %3:gr32 = PHI %16, %bb.1, %17, %bb.3, %2, %bb.4 + $eax = COPY %3 + RET 0, $eax + +... diff --git a/llvm/test/CodeGen/X86/machine-sink-call-no-ret-val-use.mir b/llvm/test/CodeGen/X86/machine-sink-call-no-ret-val-use.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-sink-call-no-ret-val-use.mir @@ -0,0 +1,89 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-none-linux-gnu -run-pass=machine-sink -machine-sink-calls -verify-machineinstrs -o - %s | FileCheck %s +--- | + + @vi = dso_local global i32 0, align 4 + + define dso_local noundef i32 @_Z3fooiPvS_(i32 noundef %a, ptr nocapture noundef writeonly %dest, ptr nocapture noundef readonly %src) local_unnamed_addr #0 { + unreachable + } + + declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1 + + !5 = !{!6, !6, i64 0} + !6 = !{!"int", !7, i64 0} + !7 = !{!"omnipotent char", !8, i64 0} + !8 = !{!"Simple C++ TBAA"} + +... +--- +name: _Z3fooiPvS_ +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: _Z3fooiPvS_ + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x30000000), %bb.1(0x50000000) + ; CHECK-NEXT: liveins: $edi, $rsi, $rdx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[MOVSX64rr32_:%[0-9]+]]:gr64 = MOVSX64rr32 [[COPY2]] + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: $rdi = COPY [[COPY1]] + ; CHECK-NEXT: $rsi = COPY [[COPY]] + ; CHECK-NEXT: $rdx = COPY [[MOVSX64rr32_]] + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: TEST32rr [[COPY2]], [[COPY2]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.1, 5, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 40 + ; CHECK-NEXT: JMP_1 %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: MOV32mi $rip, 1, $noreg, @vi, $noreg, 10 :: (volatile store (s32) into @vi, !tbaa !0) + ; CHECK-NEXT: [[MOV32ri1:%[0-9]+]]:gr32 = MOV32ri 20 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32ri]], %bb.3, [[MOV32ri1]], %bb.1 + ; CHECK-NEXT: $eax = COPY [[PHI]] + ; CHECK-NEXT: RET 0, $eax + bb.0: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $edi, $rsi, $rdx + + %3:gr64 = COPY $rdx + %2:gr64 = COPY $rsi + %1:gr32 = COPY $edi + %5:gr64 = MOVSX64rr32 %1 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %2 + $rsi = COPY %3 + $rdx = COPY %5 + CALL64pcrel32 target-flags(x86-plt) &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %6:gr64 = COPY $rax + %4:gr32 = MOV32ri 40 + TEST32rr %1, %1, implicit-def $eflags + JCC_1 %bb.2, 4, implicit $eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + MOV32mi $rip, 1, $noreg, @vi, $noreg, 10 :: (volatile store (s32) into @vi, !tbaa !5) + %7:gr32 = MOV32ri 20 + + bb.2: + %0:gr32 = PHI %4, %bb.0, %7, %bb.1 + $eax = COPY %0 + RET 0, $eax + +... diff --git a/llvm/test/CodeGen/X86/machine-sink-call-phi-at-use-block.mir b/llvm/test/CodeGen/X86/machine-sink-call-phi-at-use-block.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-sink-call-phi-at-use-block.mir @@ -0,0 +1,136 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-none-linux-gnu -run-pass=machine-sink -machine-sink-calls -verify-machineinstrs -o - %s | FileCheck %s + +--- | + + @a = dso_local global i32 0, align 4 + + define dso_local noundef i32 @_Z3barPcii(ptr %c, i32 %cond1, i32 %cond2) { + unreachable + } + + declare i64 @strlen(ptr nocapture noundef) local_unnamed_addr #1 + + attributes #1 = { readonly } + + !5 = !{!6, !6, i64 0} + !6 = !{!"int", !7, i64 0} + !7 = !{!"omnipotent char", !8, i64 0} + !8 = !{!"Simple C++ TBAA"} + +... +--- +name: _Z3barPcii +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: _Z3barPcii + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK-NEXT: liveins: $rdi, $esi, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK-NEXT: TEST32rr [[COPY1]], [[COPY1]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $rip, 1, $noreg, @a, $noreg :: (volatile dereferenceable load (s32) from @a, !tbaa !0) + ; CHECK-NEXT: [[ADD32ri8_:%[0-9]+]]:gr32 = nsw ADD32ri8 [[MOV32rm]], 7, implicit-def dead $eflags + ; CHECK-NEXT: JMP_1 %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.3(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags + ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm $rip, 1, $noreg, @a, $noreg :: (volatile dereferenceable load (s32) from @a, !tbaa !0) + ; CHECK-NEXT: JCC_1 %bb.4, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADD32ri8_1:%[0-9]+]]:gr32 = nsw ADD32ri8 [[MOV32rm1]], 9, implicit-def dead $eflags + ; CHECK-NEXT: JMP_1 %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADD32ri8_2:%[0-9]+]]:gr32 = nsw ADD32ri8 [[MOV32rm1]], 11, implicit-def dead $eflags + ; CHECK-NEXT: JMP_1 %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[ADD32ri8_]], %bb.1, [[ADD32ri8_1]], %bb.3 + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: $rdi = COPY [[COPY2]] + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @strlen, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[COPY3]].sub_32bit + ; CHECK-NEXT: [[ADD32rr:%[0-9]+]]:gr32 = nsw ADD32rr [[PHI]], [[COPY4]], implicit-def dead $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[ADD32rr]], %bb.5, [[ADD32ri8_2]], %bb.4 + ; CHECK-NEXT: $eax = COPY [[PHI1]] + ; CHECK-NEXT: RET 0, $eax + bb.0: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $rdi, $esi, $edx + + %10:gr32 = COPY $edx + %9:gr32 = COPY $esi + %8:gr64 = COPY $rdi + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %8 + CALL64pcrel32 target-flags(x86-plt) @strlen, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %11:gr64 = COPY $rax + %0:gr32 = COPY %11.sub_32bit + TEST32rr %9, %9, implicit-def $eflags + JCC_1 %bb.2, 4, implicit $eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.5(0x80000000) + + %12:gr32 = MOV32rm $rip, 1, $noreg, @a, $noreg :: (volatile dereferenceable load (s32) from @a, !tbaa !5) + %1:gr32 = nsw ADD32ri8 %12, 7, implicit-def dead $eflags + JMP_1 %bb.5 + + bb.2: + successors: %bb.4(0x30000000), %bb.3(0x50000000) + + TEST32rr %10, %10, implicit-def $eflags + %2:gr32 = MOV32rm $rip, 1, $noreg, @a, $noreg :: (volatile dereferenceable load (s32) from @a, !tbaa !5) + JCC_1 %bb.4, 4, implicit $eflags + JMP_1 %bb.3 + + bb.3: + successors: %bb.5(0x80000000) + + %3:gr32 = nsw ADD32ri8 %2, 9, implicit-def dead $eflags + JMP_1 %bb.5 + + bb.4: + successors: %bb.6(0x80000000) + + %4:gr32 = nsw ADD32ri8 %2, 11, implicit-def dead $eflags + JMP_1 %bb.6 + + bb.5: + successors: %bb.6(0x80000000) + + %5:gr32 = PHI %1, %bb.1, %3, %bb.3 + %6:gr32 = nsw ADD32rr %5, %0, implicit-def dead $eflags + + bb.6: + %7:gr32 = PHI %6, %bb.5, %4, %bb.4 + $eax = COPY %7 + RET 0, $eax + +... diff --git a/llvm/test/CodeGen/X86/machine-sink-call.mir b/llvm/test/CodeGen/X86/machine-sink-call.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-sink-call.mir @@ -0,0 +1,164 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-none-linux-gnu -run-pass=machine-sink -machine-sink-calls -verify-machineinstrs -o - %s | FileCheck %s + +# This test was originally generated from the following sample: +# include +# +# volatile int a; +# +# int bar(char *c, int cond1, int cond2) { +# int y = strlen(c); +# if (cond1) { +# a = 10; +# return 1; +# } +# if (cond2) { +# a = 20; +# return 200; +# } +# return y + 2; +# } +--- | + @a = dso_local global i32 0, align 4 + + define dso_local noundef i32 @_Z3barPcii(ptr nocapture noundef readonly %c, i32 noundef %cond1, i32 noundef %cond2) local_unnamed_addr #0 { + entry: + %call = call i64 @strlen(ptr noundef nonnull dereferenceable(1) %c) #2 + %conv = trunc i64 %call to i32 + %tobool.not = icmp eq i32 %cond1, 0 + br i1 %tobool.not, label %if.end, label %if.then + + if.then: ; preds = %entry + store volatile i32 10, ptr @a, align 4, !tbaa !5 + br label %cleanup + + if.end: ; preds = %entry + %tobool1.not = icmp eq i32 %cond2, 0 + br i1 %tobool1.not, label %if.end3, label %if.then2 + + if.then2: ; preds = %if.end + store volatile i32 20, ptr @a, align 4, !tbaa !5 + br label %cleanup + + if.end3: ; preds = %if.end + %add = add nsw i32 %conv, 2 + br label %cleanup + + cleanup: ; preds = %if.end3, %if.then2, %if.then + %retval.0 = phi i32 [ 1, %if.then ], [ 200, %if.then2 ], [ %add, %if.end3 ] + ret i32 %retval.0 + } + + declare i64 @strlen(ptr nocapture noundef) local_unnamed_addr #1 + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 7, !"PIC Level", i32 2} + !2 = !{i32 7, !"PIE Level", i32 2} + !3 = !{i32 7, !"uwtable", i32 2} + !4 = !{!"clang version 15.0.0 (ssh://git.vip.facebook.com/data/gitrepos/osmeta/external/llvm-project d422f33acc4e0b084f2d8b0d21ae348a0aa557ba)"} + !5 = !{!6, !6, i64 0} + !6 = !{!"int", !7, i64 0} + !7 = !{!"omnipotent char", !8, i64 0} + !8 = !{!"Simple C++ TBAA"} +... +--- +name: _Z3barPcii +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: _Z3barPcii + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK-NEXT: liveins: $rdi, $esi, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK-NEXT: TEST32rr [[COPY1]], [[COPY1]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: MOV32mi $rip, 1, $noreg, @a, $noreg, 10 :: (volatile store (s32) into @a, !tbaa !0) + ; CHECK-NEXT: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 1 + ; CHECK-NEXT: JMP_1 %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.end: + ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.3(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.4, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.then2: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: MOV32mi $rip, 1, $noreg, @a, $noreg, 20 :: (volatile store (s32) into @a, !tbaa !0) + ; CHECK-NEXT: [[MOV32ri1:%[0-9]+]]:gr32 = MOV32ri 200 + ; CHECK-NEXT: JMP_1 %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.if.end3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: $rdi = COPY [[COPY2]] + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @strlen, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[COPY3]].sub_32bit + ; CHECK-NEXT: [[ADD32ri8_:%[0-9]+]]:gr32 = nsw ADD32ri8 [[COPY4]], 2, implicit-def dead $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.cleanup: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32ri]], %bb.1, [[MOV32ri1]], %bb.3, [[ADD32ri8_]], %bb.4 + ; CHECK-NEXT: $eax = COPY [[PHI]] + ; CHECK-NEXT: RET 0, $eax + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $rdi, $esi, $edx + + %5:gr32 = COPY $edx + %4:gr32 = COPY $esi + %3:gr64 = COPY $rdi + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %3 + CALL64pcrel32 target-flags(x86-plt) @strlen, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %6:gr64 = COPY $rax + %0:gr32 = COPY %6.sub_32bit + TEST32rr %4, %4, implicit-def $eflags + JCC_1 %bb.2, 4, implicit $eflags + JMP_1 %bb.1 + + bb.1.if.then: + successors: %bb.5(0x80000000) + + MOV32mi $rip, 1, $noreg, @a, $noreg, 10 :: (volatile store (s32) into @a, !tbaa !5) + %7:gr32 = MOV32ri 1 + JMP_1 %bb.5 + + bb.2.if.end: + successors: %bb.4(0x30000000), %bb.3(0x50000000) + + TEST32rr %5, %5, implicit-def $eflags + JCC_1 %bb.4, 4, implicit $eflags + JMP_1 %bb.3 + + bb.3.if.then2: + successors: %bb.5(0x80000000) + + MOV32mi $rip, 1, $noreg, @a, $noreg, 20 :: (volatile store (s32) into @a, !tbaa !5) + %8:gr32 = MOV32ri 200 + JMP_1 %bb.5 + + bb.4.if.end3: + successors: %bb.5(0x80000000) + + %1:gr32 = nsw ADD32ri8 %0, 2, implicit-def dead $eflags + + bb.5.cleanup: + %2:gr32 = PHI %7, %bb.1, %8, %bb.3, %1, %bb.4 + $eax = COPY %2 + RET 0, $eax + +...