Index: lib/CodeGen/RegAllocFast.cpp =================================================================== --- lib/CodeGen/RegAllocFast.cpp +++ lib/CodeGen/RegAllocFast.cpp @@ -54,7 +54,13 @@ STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); -STATISTIC(NumCopies, "Number of copies coalesced"); +STATISTIC(NumCoalesced, "Number of copies coalesced"); + +#ifndef NDEBUG +// FIXME: Remove this switch when all testcases are fixed! +static cl::opt IgnoreMissingDefs("rafast-ignore-missing-defs", + cl::Hidden); +#endif static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); @@ -85,10 +91,13 @@ MachineInstr *LastUse = nullptr; ///< Last instr to use reg. unsigned VirtReg; ///< Virtual register number. MCPhysReg PhysReg = 0; ///< Currently held here. - unsigned short LastOpNum = 0; ///< OpNum on LastUse. - bool Dirty = false; ///< Register needs spill. + bool LiveOut = false; ///< Register is possibly live out. + bool Reloaded = false; ///< Register was reloaded. + bool Error = false; ///< Could not allocate. - explicit LiveReg(unsigned v) : VirtReg(v) {} + explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); + } unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); @@ -96,45 +105,51 @@ }; using LiveRegMap = SparseSet; - /// This map contains entries for each virtual register that is currently /// available in a physical register. LiveRegMap LiveVirtRegs; - DenseMap> LiveDbgValueMap; + DenseMap> LiveDbgValueMap; + /// List of DBG_VALUE that we encountered after the last use of a vreg + /// which could not get allocated yet. + DenseMap> DanglingDbgValues; - /// Track the state of a physical register. - enum RegState { - /// A disabled register is not available for allocation, but an alias may - /// be in use. A register can only be moved out of the disabled state if - /// all aliases are disabled. - regDisabled, + /// Has a bit set for every virtual register for which it was determined + /// that it is alive accross blocks. + BitVector MayLiveAccrossBlocks; + /// State of a register unit. + enum RegState { /// A free register is not currently in use and can be allocated /// immediately without checking aliases. regFree, - /// A reserved register has been assigned explicitly (e.g., setting up a - /// call parameter), and it remains reserved until it is used. - regReserved + /// A pre-assigned register has been assigned before register allocation + /// (e.g., setting up a call parameter). + regPreAssigned, + + /// Used temporarily in reloadAtBegin() to mark register units that are + /// live-in to the basic block. + regLiveIn, /// A register state may also be a virtual register number, indication /// that the physical register is currently allocated to a virtual /// register. In that case, LiveVirtRegs contains the inverse mapping. }; - /// One of the RegState enums, or a virtreg. - std::vector PhysRegState; + /// One of the RegState enums for each register unit. + std::vector RegUnitState; - SmallVector VirtDead; SmallVector Coalesced; - /// Set of register units. - using UsedInInstrSet = SparseSet; - /// Set of register units that are used in the current instruction, and so /// cannot be allocated. - UsedInInstrSet UsedInInstr; + SparseSet> UsedInInstr; + SparseSet> PhysRegUses; + SmallVector DefOperandIndexes; + + void setPhysRegState(MCPhysReg PhysReg, unsigned NewState); + bool isPhysRegFree(MCPhysReg PhysReg) const; /// Mark a physreg as used in this instruction. void markRegUsedInInstr(MCPhysReg PhysReg) { @@ -142,21 +157,33 @@ UsedInInstr.insert(*Units); } - /// Check if a physreg or any of its aliases are used in this instruction. - bool isRegUsedInInstr(MCPhysReg PhysReg) const { + void unmarkRegUsedInInstr(MCPhysReg PhysReg) { for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + UsedInInstr.erase(*Units); + } + + /// Check if a physreg or any of its aliases are used in this instruction. + bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { if (UsedInInstr.count(*Units)) return true; + if (LookAtPhysRegUses && PhysRegUses.count(*Units)) + return true; + } return false; } - /// This flag is set when LiveRegMap will be cleared completely after - /// spilling all live registers. LiveRegMap entries should not be erased. - bool isBulkSpilling = false; + /// Mark physical register as being used in a register use operand. + /// This is only used by the special livethrough handling code. + void markPhysRegUsedInInstr(MCPhysReg PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + PhysRegUses.insert(*Units); + } enum : unsigned { - spillClean = 1, + spillClean = 50, spillDirty = 100, + spillPrefBonus = 20, spillImpossible = ~0u }; @@ -180,23 +207,19 @@ private: bool runOnMachineFunction(MachineFunction &MF) override; + void allocateBasicBlock(MachineBasicBlock &MBB); - void handleThroughOperands(MachineInstr &MI, - SmallVectorImpl &VirtDead); - int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass &RC); - bool isLastUseOfLocalReg(const MachineOperand &MO) const; - - void addKillFlag(const LiveReg &LRI); - void killVirtReg(LiveRegMap::iterator LRI); - void killVirtReg(unsigned VirtReg); - void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator); - void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); - - void usePhysReg(MachineOperand &MO); - void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, - RegState NewState); + void allocateInstruction(MachineInstr &MI); + void handleDebugValue(MachineInstr &MI); + bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + void freePhysReg(MCPhysReg PhysReg); + + int getStackSpaceFor(unsigned VirtReg); + void spillVirtReg(MachineBasicBlock::iterator Before, unsigned VirtReg, + MCPhysReg AssignedReg, bool Kill); unsigned calcSpillCost(MCPhysReg PhysReg) const; - void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); @@ -206,15 +229,25 @@ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); } - LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg); - LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator, - unsigned Hint); - LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint); - LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint); - void spillAll(MachineBasicBlock::iterator MI); - bool setPhysReg(MachineInstr &MI, unsigned OpNum, MCPhysReg PhysReg); + void assignVirtToPhysReg(MachineInstr &MI, LiveReg &, MCPhysReg PhysReg); + void allocVirtReg(MachineInstr &MI, LiveRegMap::iterator LRI, + unsigned Hint, bool LookAtPhysRegUses = false); + void allocVirtRegUndef(MachineOperand &MO); + void assignDanglingDebugValues(MachineInstr &Def, unsigned VirtReg, + MCPhysReg Reg); + void defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, + unsigned VirtReg); + void defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, + bool LookAtPhysRegUses = false); + void useVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg); + void reload(MachineBasicBlock::iterator Before, unsigned VirtReg, + MCPhysReg PhsReg); + void reloadAtBegin(MachineBasicBlock &MBB); + void setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); + + bool mayLiveOut(unsigned VirtReg); + unsigned traceCopies(unsigned VirtReg) const; + unsigned traceCopyChain(unsigned Reg) const; void dumpState(); }; @@ -228,8 +261,7 @@ /// This allocates space for the specified virtual register to be held on the /// stack. -int RegAllocFast::getStackSpaceFor(unsigned VirtReg, - const TargetRegisterClass &RC) { +int RegAllocFast::getStackSpaceFor(unsigned VirtReg) { // Find the location Reg would belong... int SS = StackSlotForVirtReg[VirtReg]; // Already has space allocated? @@ -237,6 +269,7 @@ return SS; // Allocate a new stack object for this spill location... + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); unsigned Size = TRI->getSpillSize(RC); unsigned Align = TRI->getSpillAlignment(RC); int FrameIdx = MFI->CreateSpillStackObject(Size, Align); @@ -246,845 +279,1012 @@ return FrameIdx; } -/// Return true if MO is the only remaining reference to its virtual register, -/// and it is guaranteed to be a block-local register. -bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const { - // If the register has ever been spilled or reloaded, we conservatively assume - // it is a global register used in multiple blocks. - if (StackSlotForVirtReg[MO.getReg()] != -1) - return false; - - // Check that the use/def chain has exactly one operand - MO. - MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); - if (&*I != &MO) - return false; - return ++I == MRI->reg_nodbg_end(); -} +void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator Before, + unsigned VirtReg, MCPhysReg AssignedReg, + bool Kill) { + LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) + << " in " << printReg(AssignedReg, TRI)); + int FI = getStackSpaceFor(VirtReg); + LLVM_DEBUG(dbgs() << " to stack slot #" << FI << "\n"); -/// Set kill flags on last use of a virtual register. -void RegAllocFast::addKillFlag(const LiveReg &LR) { - if (!LR.LastUse) return; - MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); - if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { - if (MO.getReg() == LR.PhysReg) - MO.setIsKill(); - // else, don't do anything we are problably redefining a - // subreg of this register and given we don't track which - // lanes are actually dead, we cannot insert a kill flag here. - // Otherwise we may end up in a situation like this: - // ... = (MO) physreg:sub1, implicit killed physreg - // ... <== Here we would allow later pass to reuse physreg:sub1 - // which is potentially wrong. - // LR:sub0 = ... - // ... = LR.sub1 <== This is going to use physreg:sub1 + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI); + ++NumStores; + + // When we spill a virtual register, we will have spill instructions behind + // every definition of it, meaning we can switch all the DBG_VALUEs over + // to just reference the stack slot. + SmallVectorImpl &LRIDbgValues = LiveDbgValueMap[VirtReg]; + for (MachineInstr *DBG : LRIDbgValues) { + MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI); + assert(NewDV->getParent() == MBB && "dangling parent pointer"); + (void)NewDV; + LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV); + // Rewrite unassigned dbg_values to use the stack slot. + MachineOperand &MO = DBG->getOperand(0); + if (MO.isReg() && MO.getReg() == 0) + updateDbgValueForSpill(*DBG, FI); } + // Now this register is spilled there is should not be any DBG_VALUE + // pointing to this register because they are all pointing to spilled value + // now. + LRIDbgValues.clear(); } -/// Mark virtreg as no longer available. -void RegAllocFast::killVirtReg(LiveRegMap::iterator LRI) { - addKillFlag(*LRI); - assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg && - "Broken RegState mapping"); - PhysRegState[LRI->PhysReg] = regFree; - // Erase from LiveVirtRegs unless we're spilling in bulk. - if (!isBulkSpilling) - LiveVirtRegs.erase(LRI); -} - -/// Mark virtreg as no longer available. -void RegAllocFast::killVirtReg(unsigned VirtReg) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "killVirtReg needs a virtual register"); - LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - if (LRI != LiveVirtRegs.end()) - killVirtReg(LRI); -} - -/// This method spills the value specified by VirtReg into the corresponding -/// stack slot if needed. -void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, - unsigned VirtReg) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Spilling a physical register is illegal!"); - LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register"); - spillVirtReg(MI, LRI); +/// Get basic block begin insertion point. +/// This is not just MBB.begin() because surprisingly we have EH_LABEL +/// instructions marking the begin of a basic block. This means we must insert +/// new instructions after such labels... +static MachineBasicBlock::iterator +getMBBBeginInsertionPoint(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator I = MBB.begin(); + while (I != MBB.end() && I->isLabel()) + ++I; + return I; } -/// Do the actual work of spilling. -void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, - LiveRegMap::iterator LRI) { - LiveReg &LR = *LRI; - assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping"); - - if (LR.Dirty) { - // If this physreg is used by the instruction, we want to kill it on the - // instruction, not on the spill. - bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; - LR.Dirty = false; - LLVM_DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI) << " in " - << printReg(LR.PhysReg, TRI)); - const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg); - int FI = getStackSpaceFor(LRI->VirtReg, RC); - LLVM_DEBUG(dbgs() << " to stack slot #" << FI << "\n"); - TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI); - ++NumStores; // Update statistics - - // If this register is used by DBG_VALUE then insert new DBG_VALUE to - // identify spilled location as the place to find corresponding variable's - // value. - SmallVectorImpl &LRIDbgValues = - LiveDbgValueMap[LRI->VirtReg]; - for (MachineInstr *DBG : LRIDbgValues) { - MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI); - assert(NewDV->getParent() == MBB && "dangling parent pointer"); - (void)NewDV; - LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:" - << "\n" - << *NewDV); - } - // Now this register is spilled there is should not be any DBG_VALUE - // pointing to this register because they are all pointing to spilled value - // now. - LRIDbgValues.clear(); - if (SpillKill) - LR.LastUse = nullptr; // Don't kill register again +/// Reload all currently assigned virtual registers. +void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) { + for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) { + MCPhysReg Reg = P.PhysReg; + // Set state to live-in. This possibly overrides mappings to virtual + // registers but we don't care anymore at this point. + setPhysRegState(Reg, regLiveIn); } - killVirtReg(LRI); -} -/// Spill all dirty virtregs without killing them. -void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) { - if (LiveVirtRegs.empty()) return; - isBulkSpilling = true; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order // of spilling here is deterministic, if arbitrary. - for (LiveRegMap::iterator I = LiveVirtRegs.begin(), E = LiveVirtRegs.end(); - I != E; ++I) - spillVirtReg(MI, I); - LiveVirtRegs.clear(); - isBulkSpilling = false; + MachineBasicBlock::iterator InsertBefore = getMBBBeginInsertionPoint(MBB); + for (const LiveReg &LR : LiveVirtRegs) { + MCPhysReg PhysReg = LR.PhysReg; + if (PhysReg == 0) + continue; + + unsigned FirstUnit = *MCRegUnitIterator(PhysReg, TRI); + if (RegUnitState[FirstUnit] == regLiveIn) + continue; + + assert((&MBB != &MBB.getParent()->front() || IgnoreMissingDefs) && + "no reload in start block. Missing vreg def?"); + reload(InsertBefore, LR.VirtReg, PhysReg); + } } /// Handle the direct use of a physical register. Check that the register is /// not used by a virtreg. Kill the physreg, marking it free. This may add /// implicit kills to MO->getParent() and invalidate MO. -void RegAllocFast::usePhysReg(MachineOperand &MO) { - // Ignore undef uses. - if (MO.isUndef()) - return; +bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected physreg"); + bool displacedAny = displacePhysReg(MI, Reg); + setPhysRegState(Reg, regPreAssigned); + markRegUsedInInstr(Reg); + return displacedAny; +} - unsigned PhysReg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && - "Bad usePhysReg operand"); +bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) { + bool displacedAny = displacePhysReg(MI, Reg); + setPhysRegState(Reg, regPreAssigned); + return displacedAny; +} - markRegUsedInInstr(PhysReg); - switch (PhysRegState[PhysReg]) { - case regDisabled: - break; - case regReserved: - PhysRegState[PhysReg] = regFree; - LLVM_FALLTHROUGH; - case regFree: - MO.setIsKill(); - return; - default: - // The physreg was allocated to a virtual register. That means the value we - // wanted has been clobbered. - llvm_unreachable("Instruction uses an allocated register"); - } +/// Mark PhysReg as reserved or free after spilling any virtregs. This is very +/// similar to defineVirtReg except the physreg is reserved instead of +/// allocated. +bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) { + bool displacedAny = false; - // Maybe a superregister is reserved? - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (PhysRegState[Alias]) { - case regDisabled: + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + unsigned Unit = *UI; + switch (unsigned VirtReg = RegUnitState[Unit]) { + default: { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && "datastructures in sync"); + MachineBasicBlock::iterator ReloadBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + reload(ReloadBefore, VirtReg, LRI->PhysReg); + + setPhysRegState(LRI->PhysReg, regFree); + LRI->PhysReg = 0; + LRI->Reloaded = true; + displacedAny = true; + break; + } + case regPreAssigned: + RegUnitState[Unit] = regFree; + displacedAny = true; break; - case regReserved: - // Either PhysReg is a subregister of Alias and we mark the - // whole register as free, or PhysReg is the superregister of - // Alias and we mark all the aliases as disabled before freeing - // PhysReg. - // In the latter case, since PhysReg was disabled, this means that - // its value is defined only by physical sub-registers. This check - // is performed by the assert of the default case in this loop. - // Note: The value of the superregister may only be partial - // defined, that is why regDisabled is a valid state for aliases. - assert((TRI->isSuperRegister(PhysReg, Alias) || - TRI->isSuperRegister(Alias, PhysReg)) && - "Instruction is not using a subregister of a reserved register"); - LLVM_FALLTHROUGH; case regFree: - if (TRI->isSuperRegister(PhysReg, Alias)) { - // Leave the superregister in the working set. - PhysRegState[Alias] = regFree; - MO.getParent()->addRegisterKilled(Alias, TRI, true); - return; - } - // Some other alias was in the working set - clear it. - PhysRegState[Alias] = regDisabled; break; - default: - llvm_unreachable("Instruction uses an alias of an allocated register"); } } + return displacedAny; +} - // All aliases are disabled, bring register into working set. - PhysRegState[PhysReg] = regFree; - MO.setIsKill(); +void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) + RegUnitState[*UI] = NewState; } -/// Mark PhysReg as reserved or free after spilling any virtregs. This is very -/// similar to defineVirtReg except the physreg is reserved instead of -/// allocated. -void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, - MCPhysReg PhysReg, RegState NewState) { - markRegUsedInInstr(PhysReg); - switch (unsigned VirtReg = PhysRegState[PhysReg]) { - case regDisabled: - break; - default: - spillVirtReg(MI, VirtReg); - LLVM_FALLTHROUGH; +void RegAllocFast::freePhysReg(MCPhysReg PhysReg) { + LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':'); + + unsigned FirstUnit = *MCRegUnitIterator(PhysReg, TRI); + switch (unsigned VirtReg = RegUnitState[FirstUnit]) { case regFree: - case regReserved: - PhysRegState[PhysReg] = NewState; + LLVM_DEBUG(dbgs() << '\n'); return; - } - - // This is a disabled register, disable all aliases. - PhysRegState[PhysReg] = NewState; - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (unsigned VirtReg = PhysRegState[Alias]) { - case regDisabled: - break; - default: - spillVirtReg(MI, VirtReg); - LLVM_FALLTHROUGH; - case regFree: - case regReserved: - PhysRegState[Alias] = regDisabled; - if (TRI->isSuperRegister(PhysReg, Alias)) - return; - break; + case regPreAssigned: + LLVM_DEBUG(dbgs() << '\n'); + setPhysRegState(PhysReg, regFree); + return; + default: { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end()); + LLVM_DEBUG(dbgs() << ' ' << printReg(LRI->VirtReg, TRI) << '\n'); + setPhysRegState(LRI->PhysReg, regFree); + LRI->PhysReg = 0; } + return; } } -/// Return the cost of spilling clearing out PhysReg and aliases so it is -/// free for allocation. Returns 0 when PhysReg is free or disabled with all -/// aliases disabled - it can be allocated directly. +/// Return the cost of spilling clearing out PhysReg and aliases so it is free +/// for allocation. Returns 0 when PhysReg is free or disabled with all aliases +/// disabled - it can be allocated directly. /// \returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { - if (isRegUsedInInstr(PhysReg)) { - LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) - << " is already used in instr.\n"); - return spillImpossible; - } - switch (unsigned VirtReg = PhysRegState[PhysReg]) { - case regDisabled: - break; - case regFree: - return 0; - case regReserved: - LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " - << printReg(PhysReg, TRI) << " is reserved already.\n"); - return spillImpossible; - default: { - LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); - assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); - return I->Dirty ? spillDirty : spillClean; - } - } - - // This is a disabled register, add up cost of aliases. - LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n"); - unsigned Cost = 0; - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (unsigned VirtReg = PhysRegState[Alias]) { - case regDisabled: - break; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + switch (unsigned VirtReg = RegUnitState[*UI]) { case regFree: - ++Cost; break; - case regReserved: + case regPreAssigned: + LLVM_DEBUG(dbgs() << "Cannot spill pre-assigned " + << printReg(PhysReg, TRI) << '\n'); return spillImpossible; default: { - LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); - assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); - Cost += I->Dirty ? spillDirty : spillClean; - break; + // TODO: it's not correct to return here, we may have additional + // virtregs assigned to other units or even have a preassigned bit in + // another unit... + bool SureSpill = StackSlotForVirtReg[VirtReg] != -1 || + findLiveVirtReg(VirtReg)->LiveOut; + return SureSpill ? spillClean : spillDirty; + } } + } + return 0; +} + +void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition, + unsigned VirtReg, MCPhysReg Reg) { + auto UDBGValIter = DanglingDbgValues.find(VirtReg); + if (UDBGValIter == DanglingDbgValues.end()) + return; + + SmallVectorImpl &Dangling = UDBGValIter->second; + for (MachineInstr *DbgValue : Dangling) { + assert(DbgValue->isDebugValue()); + MachineOperand &MO = DbgValue->getOperand(0); + if (!MO.isReg()) + continue; + + // Test whether the physreg survives from the definition to the DBG_VALUE. + MCPhysReg SetToReg = Reg; + unsigned Limit = 20; + for (MachineBasicBlock::iterator I = std::next(Definition.getIterator()), + E = DbgValue->getIterator(); I != E; ++I) { + if (I->modifiesRegister(Reg, TRI) || --Limit == 0) { + LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue + << '\n'); + SetToReg = 0; + break; + } } + MO.setReg(SetToReg); + if (SetToReg != 0) + MO.setIsRenamable(); } - return Cost; + Dangling.clear(); } /// This method updates local state so that we know that PhysReg is the /// proper container for VirtReg now. The physical register must not be used /// for anything else when this is called. -void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { - LLVM_DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to " +void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR, + MCPhysReg PhysReg) { + unsigned VirtReg = LR.VirtReg; + LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to " << printReg(PhysReg, TRI) << "\n"); - PhysRegState[PhysReg] = LR.VirtReg; - assert(!LR.PhysReg && "Already assigned a physreg"); + assert(LR.PhysReg == 0 && "Already assigned a physreg"); + assert(PhysReg != 0 && "Trying to assign no register"); LR.PhysReg = PhysReg; + setPhysRegState(PhysReg, VirtReg); + + assignDanglingDebugValues(AtMI, VirtReg, PhysReg); } -RegAllocFast::LiveRegMap::iterator -RegAllocFast::assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg) { - LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared"); - assignVirtToPhysReg(*LRI, PhysReg); - return LRI; +static bool isCoalescable(const MachineInstr &MI) { + return MI.isCopy() && MI.getOperand(0).getSubReg() == 0 && + MI.getOperand(1).getSubReg() == 0; +} + +unsigned RegAllocFast::traceCopyChain(unsigned Reg) const { + static const unsigned ChainLengthLimit = 3; + unsigned C = 0; + do { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Reg; + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + + MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg); + if (VRegDef == nullptr || !isCoalescable(*VRegDef)) + return 0; + Reg = VRegDef->getOperand(1).getReg(); + } while(++C <= ChainLengthLimit); + return 0; +} + +/// Check if any of \p VirtReg's definitions is a copy. If it is follow the +/// chain of copies to check whether we reach a physical register we can +/// coalesce with. +unsigned RegAllocFast::traceCopies(unsigned VirtReg) const { + static const unsigned DefLimit = 3; + unsigned C = 0; + for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) { + if (isCoalescable(MI)) { + unsigned Reg = MI.getOperand(1).getReg(); + Reg = traceCopyChain(Reg); + if (Reg != 0) + return Reg; + } + if (++C >= DefLimit) + break; + } + return 0; } +bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitState[*UI] != regFree) + return false; + } + return true; +} + + /// Allocates a physical register for VirtReg. -RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI, - LiveRegMap::iterator LRI, unsigned Hint) { +void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveRegMap::iterator LRI, + unsigned Hint1, bool LookAtPhysRegUses) { const unsigned VirtReg = LRI->VirtReg; + assert(LRI->PhysReg == 0); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Can only allocate virtual registers"); + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg) + << " in class " << TRI->getRegClassName(&RC) << "\n"); // Take hint when possible. - const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - if (TargetRegisterInfo::isPhysicalRegister(Hint) && - MRI->isAllocatable(Hint) && RC.contains(Hint)) { - // Ignore the hint if we would have to spill a dirty register. - unsigned Cost = calcSpillCost(Hint); - if (Cost < spillDirty) { - if (Cost) - definePhysReg(MI, Hint, regFree); - // definePhysReg may kill virtual registers and modify LiveVirtRegs. - // That invalidates LRI, so run a new lookup for VirtReg. - return assignVirtToPhysReg(VirtReg, Hint); + unsigned Hint0 = traceCopies(VirtReg); + if (TargetRegisterInfo::isPhysicalRegister(Hint0) && + MRI->isAllocatable(Hint0) && RC.contains(Hint0) && + !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) { + // Take hint if the register is currently free. + if (isPhysRegFree(Hint0)) { + LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint0, TRI) + << '\n'); + assignVirtToPhysReg(MI, *LRI, Hint0); + return; + } else { + LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint0, TRI) + << "occupied\n"); } + } else { + Hint0 = 0; } - // First try to find a completely free register. - ArrayRef AO = RegClassInfo.getOrder(&RC); - for (MCPhysReg PhysReg : AO) { - if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { - assignVirtToPhysReg(*LRI, PhysReg); - return LRI; + // Try first hint. + if (TargetRegisterInfo::isPhysicalRegister(Hint1) && + MRI->isAllocatable(Hint1) && RC.contains(Hint1) && + !isRegUsedInInstr(Hint1, LookAtPhysRegUses)) { + // Take hint if the register is currently free. + if (isPhysRegFree(Hint1)) { + LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint1, TRI) + << '\n'); + assignVirtToPhysReg(MI, *LRI, Hint1); + return; + } else { + LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint1, TRI) + << "occupied\n"); } } - LLVM_DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from " - << TRI->getRegClassName(&RC) << "\n"); - - unsigned BestReg = 0; + MCPhysReg BestReg = 0; unsigned BestCost = spillImpossible; - for (MCPhysReg PhysReg : AO) { + ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); + for (MCPhysReg PhysReg : AllocationOrder) { + LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' '); + if (isRegUsedInInstr(PhysReg, LookAtPhysRegUses)) { + LLVM_DEBUG(dbgs() << "already used in instr.\n"); + continue; + } + unsigned Cost = calcSpillCost(PhysReg); - LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n"); - LLVM_DEBUG(dbgs() << "\tCost: " << Cost << "\n"); - LLVM_DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); - // Cost is 0 when all aliases are already disabled. + LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << "\n"); + // Immediate take a register with cost 0. if (Cost == 0) { - assignVirtToPhysReg(*LRI, PhysReg); - return LRI; + assignVirtToPhysReg(MI, *LRI, PhysReg); + return; + } + if (PhysReg == Hint0 || PhysReg == Hint1) + Cost -= spillPrefBonus; + if (Cost < BestCost) { + BestReg = PhysReg; + BestCost = Cost; } - if (Cost < BestCost) - BestReg = PhysReg, BestCost = Cost; } - if (BestReg) { - definePhysReg(MI, BestReg, regFree); - // definePhysReg may kill virtual registers and modify LiveVirtRegs. - // That invalidates LRI, so run a new lookup for VirtReg. - return assignVirtToPhysReg(VirtReg, BestReg); + if (!BestReg) { + // Nothing we can do: Report an error and keep going with an invalid + // allocation. + if (MI.isInlineAsm()) + MI.emitError("inline assembly requires more registers than available"); + else + MI.emitError("ran out of registers during register allocation"); + + LRI->Error = true; + LRI->PhysReg = 0; + return; + } + + displacePhysReg(MI, BestReg); + assignVirtToPhysReg(MI, *LRI, BestReg); +} + +void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { + assert(MO.isUndef() && "expected undef use"); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Expected virtreg"); + + LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); + MCPhysReg PhysReg; + if (LRI != LiveVirtRegs.end() && LRI->PhysReg != 0) { + PhysReg = LRI->PhysReg; + } else { + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); + assert(!AllocationOrder.empty() && "Allocation order must not be empty"); + PhysReg = AllocationOrder[0]; + } + + unsigned SubRegIdx = MO.getSubReg(); + if (SubRegIdx != 0) { + PhysReg = TRI->getSubReg(PhysReg, SubRegIdx); + MO.setSubReg(0); } + MO.setReg(PhysReg); + MO.setIsRenamable(true); +} - // Nothing we can do. Report an error and keep going with a bad allocation. - if (MI.isInlineAsm()) - MI.emitError("inline assembly requires more registers than available"); - else - MI.emitError("ran out of registers during register allocation"); - definePhysReg(MI, *AO.begin(), regFree); - return assignVirtToPhysReg(VirtReg, *AO.begin()); +/// Heuristic to identify virtual registers not living out of current block. +bool RegAllocFast::mayLiveOut(unsigned VirtReg) { + unsigned C = 0; + if (MayLiveAccrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) + goto MayLO; + + static const unsigned Limit = 8; + for (const MachineOperand &Use : MRI->reg_nodbg_operands(VirtReg)) { + if (Use.getParent()->getParent() != MBB || ++C >= Limit) { + MayLiveAccrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + goto MayLO; + } + } + return false; + +MayLO: + // No vregs live out without successors (such as the return block). + return !MBB->succ_empty(); } -/// Allocates a register for VirtReg and mark it as dirty. -RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI, - unsigned OpNum, - unsigned VirtReg, - unsigned Hint) { +/// Variation of defineVirtReg() with special handling for livethrough regs +/// (tied or earlyclobber) that may interfere with preassigned uses. +void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, + unsigned VirtReg) { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + if (LRI != LiveVirtRegs.end()) { + MCPhysReg PrevReg = LRI->PhysReg; + if (PrevReg != 0 && isRegUsedInInstr(PrevReg, true)) { + LLVM_DEBUG(dbgs() << "Need new assignment for " << printReg(PrevReg, TRI) + << " (tied/earlyclobber resolution)\n"); + freePhysReg(PrevReg); + LRI->PhysReg = 0; + allocVirtReg(MI, LRI, 0, true); + MachineBasicBlock::iterator InsertBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + LLVM_DEBUG(dbgs() << "Copy " << printReg(LRI->PhysReg, TRI) << " to " + << printReg(PrevReg, TRI) << "\n"); + BuildMI(*MBB, InsertBefore, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY), PrevReg) + .addReg(LRI->PhysReg, llvm::RegState::Kill); + } + MachineOperand &MO = MI.getOperand(OpNum); + if (MO.getSubReg() && !MO.isUndef()) { + LRI->LastUse = &MI; + } + } + return defineVirtReg(MI, OpNum, VirtReg, true); +} + +/// Allocates a register for VirtReg definition. Typically the register is +/// already assigned from a use of the virtreg, however we still need to +/// perform an allocation if: +/// - It is a dead definition without any uses. +/// - The value is live out and all uses are in different basic blocks. +void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, + unsigned VirtReg, bool LookAtPhysRegUses) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); + MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); if (New) { - // If there is no hint, peek at the only use of this register. - if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && - MRI->hasOneNonDBGUse(VirtReg)) { - const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); - // It's a copy, use the destination register as a hint. - if (UseMI.isCopyLike()) - Hint = UseMI.getOperand(0).getReg(); + // Note that we have to prime the LiveOut cache, even if a dead flags is + // set. + bool MayLO = mayLiveOut(VirtReg); + if (!MO.isDead()) { + if (MayLO) + LRI->LiveOut = true; + else { + // It is a dead def without the dead flag; add the flag now. + MO.setIsDead(true); + } } - LRI = allocVirtReg(MI, LRI, Hint); - } else if (LRI->LastUse) { - // Redefining a live register - kill at the last use, unless it is this - // instruction defining VirtReg multiple times. - if (LRI->LastUse != &MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse()) - addKillFlag(*LRI); } - assert(LRI->PhysReg && "Register not assigned"); - LRI->LastUse = &MI; - LRI->LastOpNum = OpNum; - LRI->Dirty = true; - markRegUsedInInstr(LRI->PhysReg); - return LRI; + if (LRI->PhysReg == 0) + allocVirtReg(MI, LRI, 0, LookAtPhysRegUses); + else { + assert(!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) && + "TODO: preassign mismatch"); + LLVM_DEBUG(dbgs() << "In def of " << printReg(VirtReg, TRI) + << " use existing assignment to " + << printReg(LRI->PhysReg, TRI) << '\n'); + } + + MCPhysReg PhysReg = LRI->PhysReg; + assert(PhysReg != 0 && "Register not assigned"); + if (LRI->Reloaded || LRI->LiveOut) { + if (!MI.isImplicitDef()) { + MachineBasicBlock::iterator SpillBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: " + << LRI->Reloaded << '\n'); + bool Kill = LRI->LastUse == nullptr; + spillVirtReg(SpillBefore, VirtReg, PhysReg, Kill); + LRI->LastUse = nullptr; + } + LRI->LiveOut = false; + LRI->Reloaded = false; + } + markRegUsedInInstr(PhysReg); + setPhysReg(MI, MO, PhysReg); } -/// Make sure VirtReg is available in a physreg and return it. -RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI, - unsigned OpNum, - unsigned VirtReg, - unsigned Hint) { +/// Allocates a register for a VirtReg use. +void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum, + unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); + MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); - MachineOperand &MO = MI.getOperand(OpNum); if (New) { - LRI = allocVirtReg(MI, LRI, Hint); - const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " - << printReg(LRI->PhysReg, TRI) << "\n"); - TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI); - ++NumLoads; - } else if (LRI->Dirty) { - if (isLastUseOfLocalReg(MO)) { - LLVM_DEBUG(dbgs() << "Killing last use: " << MO << "\n"); - if (MO.isUse()) - MO.setIsKill(); - else - MO.setIsDead(); - } else if (MO.isKill()) { - LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); - MO.setIsKill(false); - } else if (MO.isDead()) { - LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); - MO.setIsDead(false); + MachineOperand &MO = MI.getOperand(OpNum); + // Prime the LiveOut cache. Note that we need to do this even if a kill + // flag is set, since we may query it later in other situations. + bool MayLO = mayLiveOut(VirtReg); + if (!MO.isKill()) { + if (MayLO) { + LRI->LiveOut = true; + } else { + // It is a last (killing) use without the kill flag; add the flag now. + MO.setIsKill(true); + } } - } else if (MO.isKill()) { - // We must remove kill flags from uses of reloaded registers because the - // register would be killed immediately, and there might be a second use: - // %foo = OR killed %x, %x - // This would cause a second reload of %x into a different register. - LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); - MO.setIsKill(false); - } else if (MO.isDead()) { - LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); - MO.setIsDead(false); + } else { + assert((!MO.isKill() || LRI->LastUse == &MI) && "Invalid kill flag"); } - assert(LRI->PhysReg && "Register not assigned"); + + // If necessary allocate a register. + if (LRI->PhysReg == 0) { + assert(!MO.isTied() && "tied op should be allocated"); + unsigned Hint = 0; + if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) { + Hint = MI.getOperand(0).getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(Hint) && + "Copy destination should already be assigned"); + } + allocVirtReg(MI, LRI, Hint, false); + if (LRI->Error) { + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); + setPhysReg(MI, MO, *AllocationOrder.begin()); + return; + } + } + LRI->LastUse = &MI; - LRI->LastOpNum = OpNum; markRegUsedInInstr(LRI->PhysReg); - return LRI; + setPhysReg(MI, MO, LRI->PhysReg); +} + +void RegAllocFast::reload(MachineBasicBlock::iterator Before, unsigned VirtReg, + MCPhysReg PhysReg) { + LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " + << printReg(PhysReg, TRI) << "\n"); + int FI = getStackSpaceFor(VirtReg); + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI); + ++NumLoads; } /// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This /// may invalidate any operand pointers. Return true if the operand kills its /// register. -bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum, +void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg) { - MachineOperand &MO = MI.getOperand(OpNum); - bool Dead = MO.isDead(); if (!MO.getSubReg()) { MO.setReg(PhysReg); MO.setIsRenamable(true); - return MO.isKill() || Dead; + return; } // Handle subregister index. MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); MO.setIsRenamable(true); - MO.setSubReg(0); + // Note: We leave the subreg number around a little longer in case of defs. + // This is so that the register freeing logic in allocateInstruction can still + // recognize this as subregister defs. The code there will clear the number. + if (!MO.isDef()) + MO.setSubReg(0); // A kill flag implies killing the full register. Add corresponding super // register kill. if (MO.isKill()) { MI.addRegisterKilled(PhysReg, TRI, true); - return true; + return; } // A of a sub-register requires an implicit def of the full // register. - if (MO.isDef() && MO.isUndef()) - MI.addRegisterDefined(PhysReg, TRI); - - return Dead; -} - -// Handles special instruction operand like early clobbers and tied ops when -// there are additional physreg defines. -void RegAllocFast::handleThroughOperands(MachineInstr &MI, - SmallVectorImpl &VirtDead) { - LLVM_DEBUG(dbgs() << "Scanning for through registers:"); - SmallSet ThroughRegs; - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) || - (MO.getSubReg() && MI.readsVirtualRegister(Reg))) { - if (ThroughRegs.insert(Reg).second) - LLVM_DEBUG(dbgs() << ' ' << printReg(Reg)); - } - } - - // If any physreg defines collide with preallocated through registers, - // we must spill and reallocate. - LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - markRegUsedInInstr(Reg); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - if (ThroughRegs.count(PhysRegState[*AI])) - definePhysReg(MI, *AI, regFree); - } - } - - SmallVector PartialDefs; - LLVM_DEBUG(dbgs() << "Allocating tied uses.\n"); - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (MO.isUse()) { - if (!MO.isTied()) continue; - LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO - << ") is tied to operand " << MI.findTiedOperandIdx(I) - << ".\n"); - LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0); - MCPhysReg PhysReg = LRI->PhysReg; - setPhysReg(MI, I, PhysReg); - // Note: we don't update the def operand yet. That would cause the normal - // def-scan to attempt spilling. - } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) { - LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); - // Reload the register, but don't assign to the operand just yet. - // That would confuse the later phys-def processing pass. - LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0); - PartialDefs.push_back(LRI->PhysReg); - } + if (MO.isDef() && MO.isUndef()) { + if (MO.isDead()) + MI.addRegisterDead(PhysReg, TRI, true); + else + MI.addRegisterDefined(PhysReg, TRI); } - - LLVM_DEBUG(dbgs() << "Allocating early clobbers.\n"); - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (!MO.isEarlyClobber()) - continue; - // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, 0); - MCPhysReg PhysReg = LRI->PhysReg; - if (setPhysReg(MI, I, PhysReg)) - VirtDead.push_back(Reg); - } - - // Restore UsedInInstr to a state usable for allocating normal virtual uses. - UsedInInstr.clear(); - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) - << " as used in instr\n"); - markRegUsedInInstr(Reg); - } - - // Also mark PartialDefs as used to avoid reallocation. - for (unsigned PartialDef : PartialDefs) - markRegUsedInInstr(PartialDef); } #ifndef NDEBUG void RegAllocFast::dumpState() { - for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { - if (PhysRegState[Reg] == regDisabled) continue; - dbgs() << " " << printReg(Reg, TRI); - switch(PhysRegState[Reg]) { + for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE; + ++Unit) { + switch(unsigned VirtReg = RegUnitState[Unit]) { case regFree: break; - case regReserved: - dbgs() << "*"; + case regPreAssigned: + dbgs() << " " << printRegUnit(Unit, TRI) << "[P]"; break; + case regLiveIn: + llvm_unreachable("Should not have regLiveIn in map"); default: { - dbgs() << '=' << printReg(PhysRegState[Reg]); - LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]); - assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); - if (I->Dirty) - dbgs() << "*"; - assert(I->PhysReg == Reg && "Bad inverse map"); + dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg); + LiveRegMap::iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry"); + if (I->LiveOut || I->Reloaded) { + dbgs() << '['; + if (I->LiveOut) dbgs() << 'O'; + if (I->Reloaded) dbgs() << 'R'; + dbgs() << ']'; + } + assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present"); break; } } } dbgs() << '\n'; // Check that LiveVirtRegs is the inverse. - for (LiveRegMap::iterator i = LiveVirtRegs.begin(), - e = LiveVirtRegs.end(); i != e; ++i) { - assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && + for (const LiveReg &LR : LiveVirtRegs) { + unsigned VirtReg = LR.VirtReg; + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Bad map key"); - assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && - "Bad map value"); - assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); + MCPhysReg PhysReg = LR.PhysReg; + if (PhysReg != 0) { + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && + "mapped to physreg"); + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) + assert(RegUnitState[*UI] == VirtReg && "inverse map valid"); + } } } #endif -void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { - this->MBB = &MBB; - LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); - - PhysRegState.assign(TRI->getNumRegs(), regDisabled); - assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); - - MachineBasicBlock::iterator MII = MBB.begin(); - - // Add live-in registers as live. - for (const MachineBasicBlock::RegisterMaskPair LI : MBB.liveins()) - if (MRI->isAllocatable(LI.PhysReg)) - definePhysReg(MII, LI.PhysReg, regReserved); +void RegAllocFast::allocateInstruction(MachineInstr &MI) { + // The basic algorithm here is: + // 1. Mark registers of def operands as free + // 2. Allocate registers to use operands and place reload instructions for + // registers displaced by the allocation. + // + // However we need to handle some corner cases: + // - pre-assigned defs and uses need to be handled before the other def/use + // operands are processed to avoid the allocation heuristics clashing with + // the pre-assignment. + // - The "free def operands" step has to come last instead of first for tied + // operands and early-clobbers. - VirtDead.clear(); - Coalesced.clear(); - - // Otherwise, sequentially allocate each instruction in the MBB. - for (MachineInstr &MI : MBB) { - const MCInstrDesc &MCID = MI.getDesc(); - LLVM_DEBUG(dbgs() << "\n>> " << MI << "Regs:"; dumpState()); - - // Debug values are not allowed to change codegen in any way. - if (MI.isDebugValue()) { - MachineInstr *DebugMI = &MI; - MachineOperand &MO = DebugMI->getOperand(0); + UsedInInstr.clear(); - // Ignore DBG_VALUEs that aren't based on virtual registers. These are - // mostly constants and frame indices. - if (!MO.isReg()) - continue; + // Scan for special cases; Apply pre-assigned register defs to state. + bool HasPhysRegUse = false; + bool HasRegMask = false; + bool HasVRegDef = false; + bool HasDef = false; + bool HasEarlyClobber = false; + bool NeedToAssignLiveThroughs = false; + for (MachineOperand &MO : MI.operands()) { + if (MO.isReg()) { unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - // See if this virtual register has already been allocated to a physical - // register or spilled to a stack slot. - LiveRegMap::iterator LRI = findLiveVirtReg(Reg); - if (LRI != LiveVirtRegs.end()) - setPhysReg(*DebugMI, 0, LRI->PhysReg); - else { - int SS = StackSlotForVirtReg[Reg]; - if (SS != -1) { - // Modify DBG_VALUE now that the value is in a spill slot. - updateDbgValueForSpill(*DebugMI, SS); - LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" - << "\t" << *DebugMI); - continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (MO.isDef()) { + HasDef = true; + HasVRegDef = true; + if (MO.isEarlyClobber()) { + HasEarlyClobber = true; + NeedToAssignLiveThroughs = true; + } + if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef())) + NeedToAssignLiveThroughs = true; + } + } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (!MRI->isReserved(Reg)) { + if (MO.isDef()) { + HasDef = true; + bool displacedAny = definePhysReg(MI, Reg); + if (MO.isEarlyClobber()) + HasEarlyClobber = true; + if (!displacedAny) + MO.setIsDead(true); + } + if (MO.readsReg()) + HasPhysRegUse = true; } - - // We can't allocate a physreg for a DebugValue, sorry! - LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); - MO.setReg(0); } - - // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so - // that future spills of Reg will have DBG_VALUEs. - LiveDbgValueMap[Reg].push_back(DebugMI); - continue; + } else if (MO.isRegMask()) { + HasRegMask = true; } + } - if (MI.isDebugLabel()) - continue; + // Allocate virtreg defs. + if (HasDef) { + if (HasVRegDef) { + // Special handling for early clobbers, tied operands or subregister defs: + // Compared to "normal" defs these: + // - Must not use a register that is pre-assigned for a use operand. + // - In order to solve tricky inline assembly constraints we change the + // heuristic to figure out a good operand order before doing + // assignments. + if (NeedToAssignLiveThroughs) { + DefOperandIndexes.clear(); + PhysRegUses.clear(); + unsigned NumRegDefOperands = 0; + LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n"); + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (MO.readsReg()) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI) + << '\n'); + markPhysRegUsedInInstr(Reg); + } + } + if (MO.isDef()) { + ++NumRegDefOperands; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + DefOperandIndexes.push_back(I); + } + } - // If this is a copy, we may be able to coalesce. - unsigned CopySrcReg = 0; - unsigned CopyDstReg = 0; - unsigned CopySrcSub = 0; - unsigned CopyDstSub = 0; - if (MI.isCopy()) { - CopyDstReg = MI.getOperand(0).getReg(); - CopySrcReg = MI.getOperand(1).getReg(); - CopyDstSub = MI.getOperand(0).getSubReg(); - CopySrcSub = MI.getOperand(1).getSubReg(); + llvm::sort(DefOperandIndexes.begin(), DefOperandIndexes.end(), + [&](uint16_t I0, uint16_t I1) { + const MachineOperand &MO0 = MI.getOperand(I0); + const MachineOperand &MO1 = MI.getOperand(I1); + unsigned Reg0 = MO0.getReg(); + unsigned Reg1 = MO1.getReg(); + const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0); + const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1); + + // Identify regclass that are easy to use up completely just in + // this instruction. + unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size(); + unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size(); + bool SmallClass0 = ClassSize0 < NumRegDefOperands; + bool SmallClass1 = ClassSize1 < NumRegDefOperands; + if (SmallClass0 > SmallClass1) + return true; + if (SmallClass0 < SmallClass1) + return false; + + // Allocate early clobbers and livethrough operands first. + bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() || + (MO0.getSubReg() == 0 && !MO0.isUndef()); + bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() || + (MO1.getSubReg() == 0 && !MO1.isUndef()); + if (Livethrough0 > Livethrough1) + return true; + if (Livethrough0 < Livethrough1) + return false; + + // Tie-break rule: operand index. + return I0 < I1; + }); + + for (uint16_t OpIdx : DefOperandIndexes) { + MachineOperand &MO = MI.getOperand(OpIdx); + LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); + unsigned Reg = MO.getReg(); + if (MO.isEarlyClobber() || MO.isTied() || + (MO.getSubReg() && !MO.isUndef())) { + defineLiveThroughVirtReg(MI, OpIdx, Reg); + } else { + defineVirtReg(MI, OpIdx, Reg); + } + } + } else { + // Assign virtual register defs. + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + defineVirtReg(MI, I, Reg); + } + } } - // Track registers used by instruction. - UsedInInstr.clear(); - - // First scan. - // Mark physreg uses and early clobbers as used. - // Find the end of the virtreg operands - unsigned VirtOpEnd = 0; - bool hasTiedOps = false; - bool hasEarlyClobbers = false; - bool hasPartialRedefs = false; - bool hasPhysDefs = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - // Make sure MRI knows about registers clobbered by regmasks. - if (MO.isRegMask()) { - MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + // Free registers occupied by defs. + // Iterate operands in reverse order, so we see the implicit super register + // defs first (we added them earlier in case of ). + for (unsigned I = MI.getNumOperands(); I-- > 0;) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef()) + continue; + // Do not free tied operands and early clobbers. + if (MO.isTied() || MO.isEarlyClobber()) + continue; + // subreg defs don't free the full register. We left the subreg number + // around as a marker in setPhysReg() to recognize this case here. + if (MO.getSubReg() != 0) { + MO.setSubReg(0); continue; } - if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - VirtOpEnd = i+1; - if (MO.isUse()) { - hasTiedOps = hasTiedOps || - MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; - } else { - if (MO.isEarlyClobber()) - hasEarlyClobbers = true; - if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) - hasPartialRedefs = true; - } + if (!Reg) continue; - } - if (!MRI->isAllocatable(Reg)) continue; - if (MO.isUse()) { - usePhysReg(MO); - } else if (MO.isEarlyClobber()) { - definePhysReg(MI, Reg, - (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); - hasEarlyClobbers = true; - } else - hasPhysDefs = true; + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (MRI->isReserved(Reg)) + continue; + freePhysReg(Reg); + unmarkRegUsedInInstr(Reg); } + } - // The instruction may have virtual register operands that must be allocated - // the same register at use-time and def-time: early clobbers and tied - // operands. If there are also physical defs, these registers must avoid - // both physical defs and uses, making them more constrained than normal - // operands. - // Similarly, if there are multiple defs and tied operands, we must make - // sure the same register is allocated to uses and defs. - // We didn't detect inline asm tied operands above, so just make this extra - // pass for all inline asm. - if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || - (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { - handleThroughOperands(MI, VirtDead); - // Don't attempt coalescing when we have funny stuff going on. - CopyDstReg = 0; - // Pretend we have early clobbers so the use operands get marked below. - // This is not necessary for the common case of a single tied use. - hasEarlyClobbers = true; - } + // Displace clobbered registers. + if (HasRegMask) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) { + // MRI bookkeeping. + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - // Second scan. - // Allocate virtreg uses. - for (unsigned I = 0; I != VirtOpEnd; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (MO.isUse()) { - LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, CopyDstReg); - MCPhysReg PhysReg = LRI->PhysReg; - CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0; - if (setPhysReg(MI, I, PhysReg)) - killVirtReg(LRI); + // Displace clobbered registers. + const uint32_t *Mask = MO.getRegMask(); + for (LiveRegMap::iterator LRI = LiveVirtRegs.begin(), + LRIE = LiveVirtRegs.end(); LRI != LRIE; ++LRI) { + MCPhysReg PhysReg = LRI->PhysReg; + if (PhysReg != 0 && MachineOperand::clobbersPhysReg(Mask, PhysReg)) + displacePhysReg(MI, PhysReg); + } } } + } - // Track registers defined by instruction - early clobbers and tied uses at - // this point. - UsedInInstr.clear(); - if (hasEarlyClobbers) { - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - // Look for physreg defs and tied uses. - if (!MO.isDef() && !MO.isTied()) continue; - markRegUsedInInstr(Reg); - } + // Apply pre-assigned register uses to state. + if (HasPhysRegUse) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MRI->isReserved(Reg)) + continue; + bool displacedAny = usePhysReg(MI, Reg); + if (!displacedAny && !MRI->isReserved(Reg)) + MO.setIsKill(true); } + } + + // Allocate virtreg uses and insert reloads as necessary. + bool HasUndefUse = false; + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; - unsigned DefOpEnd = MI.getNumOperands(); - if (MI.isCall()) { - // Spill all virtregs before a call. This serves one purpose: If an - // exception is thrown, the landing pad is going to expect to find - // registers in their spill slots. - // Note: although this is appealing to just consider all definitions - // as call-clobbered, this is not correct because some of those - // definitions may be used later on and we do not want to reuse - // those for virtual registers in between. - LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n"); - spillAll(MI); + if (MO.isUndef()) { + HasUndefUse = true; + continue; } - // Third scan. - // Allocate defs and collect dead defs. - for (unsigned I = 0; I != DefOpEnd; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) + assert(!MO.isInternalRead() && "Bundles not supported"); + assert(MO.readsReg() && "reading use"); + useVirtReg(MI, I, Reg); + } + + // Allocate undef operands. This is a separate step because in a situation + // like ` = OP undef %X, %X` both operands need the same register assign + // so we should perform the normal assignment first. + if (HasUndefUse) { + for (MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + assert(MO.isUndef() && "Should only have undef uses left"); + allocVirtRegUndef(MO); + } + } - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (!MRI->isAllocatable(Reg)) continue; - definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); + // Free early clobbers. + if (HasEarlyClobber) { + for (unsigned I = MI.getNumOperands(); I-- > 0; ) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber()) + continue; + // subreg defs don't free the full register. We left the subreg number + // around as a marker in setPhysReg() to recognize this case here. + if (MO.getSubReg() != 0) { + MO.setSubReg(0); continue; } - LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, CopySrcReg); - MCPhysReg PhysReg = LRI->PhysReg; - if (setPhysReg(MI, I, PhysReg)) { - VirtDead.push_back(Reg); - CopyDstReg = 0; // cancel coalescing; - } else - CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0; + + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "should have register assigned"); + + // We sometimes get odd situations like: + // early-clobber %x0 = INSTRUCTION %x0 + // which is semantically questionable as the early-clobber should + // apply before the use. But in practice we consider the use to + // happen before the early clobber now. Don't free the early clobber + // register in this case. + if (MI.readsRegister(Reg, TRI)) + continue; + + freePhysReg(Reg); } + } - // Kill dead defs after the scan to ensure that multiple defs of the same - // register are allocated identically. We didn't need to do this for uses - // because we are crerating our own kill flags, and they are always at the - // last use. - for (unsigned VirtReg : VirtDead) - killVirtReg(VirtReg); - VirtDead.clear(); - - if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) { - LLVM_DEBUG(dbgs() << "-- coalescing: " << MI); - Coalesced.push_back(&MI); - } else { - LLVM_DEBUG(dbgs() << "<< " << MI); + LLVM_DEBUG(dbgs() << "<< " << MI); + if (MI.isCopy() && MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && + MI.getNumOperands() == 2) { + LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n"); + Coalesced.push_back(&MI); + ++NumCoalesced; + } +} + +void RegAllocFast::handleDebugValue(MachineInstr &MI) { + MachineOperand &MO = MI.getOperand(0); + + // Ignore DBG_VALUEs that aren't based on virtual registers. These are + // mostly constants and frame indices. + if (!MO.isReg()) + return; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return; + + // Already spilled to a stackslot? + int SS = StackSlotForVirtReg[Reg]; + if (SS != -1) { + // Modify DBG_VALUE now that the value is in a spill slot. + updateDbgValueForSpill(MI, SS); + LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI); + return; + } + + // See if this virtual register has already been allocated to a physical + // register or spilled to a stack slot. + LiveRegMap::iterator LRI = findLiveVirtReg(Reg); + if (LRI != LiveVirtRegs.end() && LRI->PhysReg != 0) { + setPhysReg(MI, MO, LRI->PhysReg); + } else { + DanglingDbgValues[Reg].push_back(&MI); + } + + // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so + // that future spills of Reg will have DBG_VALUEs. + LiveDbgValueMap[Reg].push_back(&MI); +} + +void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { + this->MBB = &MBB; + LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); + + RegUnitState.assign(TRI->getNumRegUnits(), regFree); + assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); + + Coalesced.clear(); + + // Traverse block in reverse order allocating instructions one by one. + for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { + LLVM_DEBUG( + dbgs() << "\n>> " << MI << "Regs:"; + dumpState() + ); + + // Special handling for debug values. Note that they are not allowed to + // affect codegen of the other instructions in any way. + if (MI.isDebugValue()) { + handleDebugValue(MI); + continue; } + + allocateInstruction(MI); } + LLVM_DEBUG( + dbgs() << "Begin Regs:"; + dumpState() + ); + // Spill all physical registers holding virtual registers now. - LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n"); - spillAll(MBB.getFirstTerminator()); + LLVM_DEBUG(dbgs() << "Loading live registers at begin of block.\n"); + reloadAtBegin(MBB); + + LiveVirtRegs.clear(); // Erase all the coalesced copies. We are delaying it until now because // LiveVirtRegs might refer to the instrs. for (MachineInstr *MI : Coalesced) MBB.erase(MI); - NumCopies += Coalesced.size(); + + for (auto &UDBGPair : DanglingDbgValues) { + for (MachineInstr *DbgValue : UDBGPair.second) { + assert(DbgValue->isDebugValue() && "expected DBG_VALUE"); + MachineOperand &MO = DbgValue->getOperand(0); + // Nothing to do if the vreg was spilled in the meantime. + if (!MO.isReg()) + continue; + LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue + << '\n'); + MO.setReg(0); + } + } + DanglingDbgValues.clear(); LLVM_DEBUG(MBB.dump()); } -/// Allocates registers for a function. bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" << "********** Function: " << MF.getName() << '\n'); @@ -1095,14 +1295,19 @@ MFI = &MF.getFrameInfo(); MRI->freezeReservedRegs(MF); RegClassInfo.runOnMachineFunction(MF); + unsigned NumRegUnits = TRI->getNumRegUnits(); UsedInInstr.clear(); - UsedInInstr.setUniverse(TRI->getNumRegUnits()); + UsedInInstr.setUniverse(NumRegUnits); + PhysRegUses.clear(); + PhysRegUses.setUniverse(NumRegUnits); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers unsigned NumVirtRegs = MRI->getNumVirtRegs(); StackSlotForVirtReg.resize(NumVirtRegs); LiveVirtRegs.setUniverse(NumVirtRegs); + MayLiveAccrossBlocks.clear(); + MayLiveAccrossBlocks.resize(NumVirtRegs); // Loop over all of the basic blocks, eliminating virtual register references for (MachineBasicBlock &MBB : MF) Index: test/CodeGen/AArch64/arm64-fast-isel-br.ll =================================================================== --- test/CodeGen/AArch64/arm64-fast-isel-br.ll +++ test/CodeGen/AArch64/arm64-fast-isel-br.ll @@ -94,7 +94,7 @@ store i32 %c, i32* %c.addr, align 4 store i64 %d, i64* %d.addr, align 8 %0 = load i16, i16* %b.addr, align 2 -; CHECK: tbz w0, #0, LBB4_2 +; CHECK: tbz {{w[0-9]+}}, #0, LBB4_2 %conv = trunc i16 %0 to i1 br i1 %conv, label %if.then, label %if.end @@ -132,9 +132,8 @@ ; rdar://15174028 define i32 @trunc64(i64 %foo) nounwind { ; CHECK: trunc64 -; CHECK: and [[REG1:x[0-9]+]], x0, #0x1 -; CHECK: mov x[[REG2:[0-9]+]], [[REG1]] -; CHECK: tbz w[[REG2]], #0, LBB5_2 +; CHECK: and x[[REG1:[0-9]+]], x0, #0x1 +; CHECK: tbz w[[REG1]], #0, LBB5_2 %a = and i64 %foo, 1 %b = trunc i64 %a to i1 br i1 %b, label %if.then, label %if.else Index: test/CodeGen/AArch64/arm64-fast-isel-call.ll =================================================================== --- test/CodeGen/AArch64/arm64-fast-isel-call.ll +++ test/CodeGen/AArch64/arm64-fast-isel-call.ll @@ -79,8 +79,7 @@ define i32 @t2() { entry: ; CHECK-LABEL: t2 -; CHECK: mov [[REG1:x[0-9]+]], xzr -; CHECK: mov x0, [[REG1]] +; CHECK: mov x0, xzr ; CHECK: orr w1, wzr, #0xfffffff8 ; CHECK: orr [[REG2:w[0-9]+]], wzr, #0x3ff ; CHECK: uxth w2, [[REG2]] Index: test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll =================================================================== --- test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll +++ test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll @@ -4,9 +4,8 @@ define i32 @fptosi_wh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptosi_wh -; CHECK: fcvt s1, h0 -; CHECK: fcvtzs [[REG:w[0-9]+]], s1 -; CHECK: mov w0, [[REG]] +; CHECK: fcvt [[REG:s[0-9]+]], h0 +; CHECK: fcvtzs w0, [[REG]] %conv = fptosi half %a to i32 ret i32 %conv } @@ -15,9 +14,8 @@ define i32 @fptoui_swh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptoui_swh -; CHECK: fcvt s1, h0 -; CHECK: fcvtzu [[REG:w[0-9]+]], s1 -; CHECK: mov w0, [[REG]] +; CHECK: fcvt [[REG:s[0-9]+]], h0 +; CHECK: fcvtzu w0, [[REG]] %conv = fptoui half %a to i32 ret i32 %conv } @@ -26,8 +24,8 @@ define half @sitofp_hw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_hw_i1 -; CHECK: sbfx w0, w0, #0, #1 -; CHECK: scvtf s0, w0 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: scvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = sitofp i1 %a to half ret half %conv @@ -37,8 +35,8 @@ define half @sitofp_hw_i8(i8 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_hw_i8 -; CHECK: sxtb w0, w0 -; CHECK: scvtf s0, w0 +; CHECK: sxtb [[REG:w[0-9]+]], w0 +; CHECK: scvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = sitofp i8 %a to half ret half %conv @@ -48,8 +46,8 @@ define half @sitofp_hw_i16(i16 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_hw_i16 -; CHECK: sxth w0, w0 -; CHECK: scvtf s0, w0 +; CHECK: sxth [[REG:w[0-9]+]], w0 +; CHECK: scvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = sitofp i16 %a to half ret half %conv @@ -79,8 +77,8 @@ define half @uitofp_hw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_hw_i1 -; CHECK: and w0, w0, #0x1 -; CHECK: ucvtf s0, w0 +; CHECK: and [[REG:w[0-9]+]], w0, #0x1 +; CHECK: ucvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = uitofp i1 %a to half ret half %conv @@ -90,8 +88,8 @@ define half @uitofp_hw_i8(i8 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_hw_i8 -; CHECK: and w0, w0, #0xff -; CHECK: ucvtf s0, w0 +; CHECK: and [[REG:w[0-9]+]], w0, #0xff +; CHECK: ucvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = uitofp i8 %a to half ret half %conv @@ -101,8 +99,8 @@ define half @uitofp_hw_i16(i16 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_hw_i16 -; CHECK: and w0, w0, #0xffff -; CHECK: ucvtf s0, w0 +; CHECK: and [[REG:w[0-9]+]], w0, #0xffff +; CHECK: ucvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = uitofp i16 %a to half ret half %conv Index: test/CodeGen/AArch64/arm64-fast-isel-conversion.ll =================================================================== --- test/CodeGen/AArch64/arm64-fast-isel-conversion.ll +++ test/CodeGen/AArch64/arm64-fast-isel-conversion.ll @@ -9,13 +9,12 @@ ; CHECK: strh w1, [sp, #12] ; CHECK: str w2, [sp, #8] ; CHECK: str x3, [sp] -; CHECK: ldr x3, [sp] -; CHECK: mov x0, x3 -; CHECK: str w0, [sp, #8] -; CHECK: ldr w0, [sp, #8] -; CHECK: strh w0, [sp, #12] -; CHECK: ldrh w0, [sp, #12] -; CHECK: strb w0, [sp, #15] +; CHECK: ldr x[[REG:[0-9]+]], [sp] +; CHECK: str w[[REG]], [sp, #8] +; CHECK: ldr [[REG2:w[0-9]+]], [sp, #8] +; CHECK: strh [[REG2]], [sp, #12] +; CHECK: ldrh [[REG3:w[0-9]+]], [sp, #12] +; CHECK: strb [[REG3]], [sp, #15] ; CHECK: ldrb w0, [sp, #15] ; CHECK: add sp, sp, #16 ; CHECK: ret @@ -49,13 +48,12 @@ ; CHECK: strh w1, [sp, #12] ; CHECK: str w2, [sp, #8] ; CHECK: str x3, [sp] -; CHECK: ldrb w0, [sp, #15] -; CHECK: strh w0, [sp, #12] -; CHECK: ldrh w0, [sp, #12] -; CHECK: str w0, [sp, #8] -; CHECK: ldr w0, [sp, #8] -; CHECK: mov x3, x0 -; CHECK: str x3, [sp] +; CHECK: ldrb [[REG0:w[0-9]+]], [sp, #15] +; CHECK: strh [[REG0]], [sp, #12] +; CHECK: ldrh [[REG1:w[0-9]+]], [sp, #12] +; CHECK: str [[REG1]], [sp, #8] +; CHECK: ldr w[[REG2:[0-9]+]], [sp, #8] +; CHECK: str x[[REG2]], [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 @@ -105,12 +103,12 @@ ; CHECK: strh w1, [sp, #12] ; CHECK: str w2, [sp, #8] ; CHECK: str x3, [sp] -; CHECK: ldrsb w0, [sp, #15] -; CHECK: strh w0, [sp, #12] -; CHECK: ldrsh w0, [sp, #12] -; CHECK: str w0, [sp, #8] -; CHECK: ldrsw x3, [sp, #8] -; CHECK: str x3, [sp] +; CHECK: ldrsb [[REG0:w[0-9]+]], [sp, #15] +; CHECK: strh [[REG0]], [sp, #12] +; CHECK: ldrsh [[REG1:w[0-9]+]], [sp, #12] +; CHECK: str [[REG1]], [sp, #8] +; CHECK: ldrsw [[REG2:x[0-9]+]], [sp, #8] +; CHECK: str [[REG2]], [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 @@ -166,7 +164,8 @@ define signext i16 @sext_i1_i16(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sext_i1_i16 -; CHECK: sbfx w0, w0, #0, #1 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: sxth w0, [[REG]] %conv = sext i1 %a to i16 ret i16 %conv } @@ -175,7 +174,8 @@ define signext i8 @sext_i1_i8(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sext_i1_i8 -; CHECK: sbfx w0, w0, #0, #1 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: sxtb w0, [[REG]] %conv = sext i1 %a to i8 ret i8 %conv } @@ -238,8 +238,8 @@ define float @sitofp_sw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_sw_i1 -; CHECK: sbfx w0, w0, #0, #1 -; CHECK: scvtf s0, w0 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: scvtf s0, [[REG]] %conv = sitofp i1 %a to float ret float %conv } @@ -248,8 +248,8 @@ define float @sitofp_sw_i8(i8 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_sw_i8 -; CHECK: sxtb w0, w0 -; CHECK: scvtf s0, w0 +; CHECK: sxtb [[REG:w[0-9]+]], w0 +; CHECK: scvtf s0, [[REG]] %conv = sitofp i8 %a to float ret float %conv } @@ -302,8 +302,8 @@ define float @uitofp_sw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_sw_i1 -; CHECK: and w0, w0, #0x1 -; CHECK: ucvtf s0, w0 +; CHECK: and [[REG:w[0-9]+]], w0, #0x1 +; CHECK: ucvtf s0, [[REG]] %conv = uitofp i1 %a to float ret float %conv } @@ -363,7 +363,7 @@ define i32 @i64_trunc_i32(i64 %a) nounwind ssp { entry: ; CHECK-LABEL: i64_trunc_i32 -; CHECK: mov x1, x0 +; CHECK: ret %conv = trunc i64 %a to i32 ret i32 %conv } @@ -402,9 +402,8 @@ define void @stack_trunc() nounwind { ; CHECK-LABEL: stack_trunc ; CHECK: sub sp, sp, #16 -; CHECK: ldr [[REG:x[0-9]+]], [sp] -; CHECK: mov x[[REG2:[0-9]+]], [[REG]] -; CHECK: and [[REG3:w[0-9]+]], w[[REG2]], #0xff +; CHECK: ldr x[[REG:[0-9]+]], [sp] +; CHECK: and [[REG3:w[0-9]+]], w[[REG]], #0xff ; CHECK: strb [[REG3]], [sp, #15] ; CHECK: add sp, sp, #16 %a = alloca i8, align 1 Index: test/CodeGen/AArch64/br-cond-not-merge.ll =================================================================== --- test/CodeGen/AArch64/br-cond-not-merge.ll +++ test/CodeGen/AArch64/br-cond-not-merge.ll @@ -64,9 +64,9 @@ ; OPT: b.gt [[L:\.LBB[0-9_]+]] ; OPT: tbz w1, #0, [[L]] ; +; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]] ; NOOPT: subs w{{[0-9]+}}, w{{[0-9]+}}, #0 ; NOOPT: cset [[R1:w[0-9]+]], gt -; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]] ; NOOPT: str [[R1]], [sp, #[[SLOT1:[0-9]+]]] ; NOOPT: b .LBB ; NOOPT: ldr [[R2:w[0-9]+]], [sp, #[[SLOT1]]] Index: test/CodeGen/AArch64/cmpxchg-O0.ll =================================================================== --- test/CodeGen/AArch64/cmpxchg-O0.ll +++ test/CodeGen/AArch64/cmpxchg-O0.ll @@ -2,15 +2,15 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_8: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxrb [[OLD:w[0-9]+]], [x0] +; CHECK: ldaxrb [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], w1, uxtb ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxrb [[STATUS]], w2, [x0] +; CHECK: stlxrb [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: -; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 +; CHECK: subs {{w[0-9]+}}, [[OLD]], w1, uxtb ; CHECK: cset {{w[0-9]+}}, eq %res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic ret { i8, i1 } %res @@ -18,12 +18,12 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_16: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxrh [[OLD:w[0-9]+]], [x0] +; CHECK: ldaxrh [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], w1, uxth ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxrh [[STATUS:w[3-9]]], w2, [x0] +; CHECK: stlxrh [[STATUS:w[3-9]]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 @@ -34,12 +34,12 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_32: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxr [[OLD:w[0-9]+]], [x0] +; CHECK: ldaxr [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], w1 ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxr [[STATUS]], w2, [x0] +; CHECK: stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 @@ -50,12 +50,12 @@ define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_64: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxr [[OLD:x[0-9]+]], [x0] +; CHECK: ldaxr [[OLD:x[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], x1 ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxr [[STATUS]], x2, [x0] +; CHECK: stlxr [[STATUS:w[0-9]+]], x2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{x[0-9]+}}, [[OLD]], x1 @@ -66,14 +66,15 @@ define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_128: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0] +; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD_LO]], x2 ; CHECK: cset [[CMP_TMP:w[0-9]+]], ne ; CHECK: cmp [[OLD_HI]], x3 ; CHECK: cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne ; CHECK: cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, [x0] +; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: %res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst monotonic @@ -86,19 +87,21 @@ @var128 = global i128 0 define {i128, i1} @test_cmpxchg_128_unsplit(i128* %addr) { ; CHECK-LABEL: test_cmpxchg_128_unsplit: -; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 +; CHECK: mov [[ADDR:x[0-9]+]], x0 +; CHECK: adrp [[R0:x[0-9]+]], var128 +; CHECK: add x[[VAR128:[0-9]+]], [[R0]], :lo12:var128 ; CHECK: ldr [[DESIRED_HI:x[0-9]+]], [x[[VAR128]], #8] ; CHECK: ldr [[DESIRED_LO:x[0-9]+]], [x[[VAR128]]] ; CHECK: ldr [[NEW_HI:x[0-9]+]], [x[[VAR128]], #8] ; CHECK: ldr [[NEW_LO:x[0-9]+]], [x[[VAR128]]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0] +; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD_LO]], [[DESIRED_LO]] ; CHECK: cset [[CMP_TMP:w[0-9]+]], ne ; CHECK: cmp [[OLD_HI]], [[DESIRED_HI]] ; CHECK: cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne ; CHECK: cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], [x0] +; CHECK: stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: Index: test/CodeGen/AArch64/fast-isel-cmpxchg.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-cmpxchg.ll +++ test/CodeGen/AArch64/fast-isel-cmpxchg.ll @@ -1,20 +1,19 @@ ; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: cmpxchg_monotonic_32: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9_]+]]: -; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 -; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] -; CHECK-NEXT: cmp [[OLD]], w1 +; CHECK-NEXT: ldaxr w0, {{\[}}[[ADDR]]{{\]}} +; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: stlxr [[STATUS]], w2, [x0] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: -; CHECK-NEXT: cmp [[OLD]], w1 -; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: cset [[STATUS]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] -; CHECK-NEXT: mov w0, [[OLD]] define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 { %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new monotonic monotonic %tmp1 = extractvalue { i32, i1 } %tmp0, 0 @@ -26,21 +25,20 @@ ; CHECK-LABEL: cmpxchg_acq_rel_32_load: ; CHECK: // %bb.0: -; CHECK: ldr [[NEW:w[0-9]+]], [x2] +; CHECK: mov [[ADDR:x[0-9]+]], x0 +; CHECK: ldr [[NEW:w[0-9]+]], [x2] ; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]: -; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 -; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] -; CHECK-NEXT: cmp [[OLD]], w1 +; CHECK-NEXT: ldaxr w0, {{\[}}[[ADDR]]{{\]}} +; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: stlxr [[STATUS]], [[NEW]], [x0] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: -; CHECK-NEXT: cmp [[OLD]], w1 -; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: cset [[STATUS]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] -; CHECK-NEXT: mov w0, [[OLD]] define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 { %new = load i32, i32* %pnew %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel acquire @@ -52,20 +50,19 @@ } ; CHECK-LABEL: cmpxchg_seq_cst_64: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9_]+]]: -; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 -; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0] -; CHECK-NEXT: cmp [[OLD]], x1 +; CHECK-NEXT: ldaxr x0, {{\[}}[[ADDR]]{{\]}} +; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: stlxr [[STATUS]], x2, [x0] +; CHECK-NEXT: stlxr [[STATUS]], x2, {{\[}}[[ADDR]]{{\]}} ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: -; CHECK-NEXT: cmp [[OLD]], x1 +; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] -; CHECK-NEXT: mov x0, [[OLD]] define i64 @cmpxchg_seq_cst_64(i64* %p, i64 %cmp, i64 %new, i32* %ps) #0 { %tmp0 = cmpxchg i64* %p, i64 %cmp, i64 %new seq_cst seq_cst %tmp1 = extractvalue { i64, i1 } %tmp0, 0 Index: test/CodeGen/AArch64/fast-isel-sp-adjust.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-sp-adjust.ll +++ test/CodeGen/AArch64/fast-isel-sp-adjust.ll @@ -14,8 +14,7 @@ ; CHECK-LABEL: foo: ; CHECK-DAG: mov x[[SP:[0-9]+]], sp -; CHECK-DAG: mov [[TMP:w[0-9]+]], #4104 -; CHECK: mov w[[OFFSET:[0-9]+]], [[TMP]] +; CHECK-DAG: mov w[[OFFSET:[0-9]+]], #4104 ; CHECK: strb w0, [x[[SP]], x[[OFFSET]]] define void @foo(i8 %in) { Index: test/CodeGen/AArch64/regallocfast.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/regallocfast.mir @@ -0,0 +1,18 @@ +# RUN: llc -o - %s -mtriple=aarch64-- -run-pass=regallocfast | FileCheck %s +--- +# CHECK-LABEL: name: func0 +name: func0 +tracksRegLiveness: true +body: | + bb.0: + %0:gpr64 = MOVi64imm 42 + %1:gpr64 = MOVi64imm 13 + ; Make sure the undef operand vreg gets the same register assigned as the + ; corresponding non-undef one. + %2:gpr64 = EORXrr undef %1, %1 + %3:gpr64 = EORXrr undef %3, %0 + ; CHECK: renamable [[R0:\$x[0-9]+]] = MOVi64imm 42 + ; CHECK: renamable [[R1:\$x[0-9]+]] = MOVi64imm 13 + ; CHECK: dead renamable [[R2:\$x[0-9]+]] = EORXrr undef renamable [[R1]], killed renamable [[R1]] + ; CHECK: dead renamable [[R3:\$x[0-9]+]] = EORXrr undef renamable [[R3]], killed renamable [[R0]] +... Index: test/CodeGen/AArch64/swift-return.ll =================================================================== --- test/CodeGen/AArch64/swift-return.ll +++ test/CodeGen/AArch64/swift-return.ll @@ -201,10 +201,10 @@ ; CHECK-DAG: mov w3, w0 ; CHECK: ret ; CHECK-O0-LABEL: _gen7 -; CHECK-O0: str w0, [sp, #12] -; CHECK-O0: ldr w1, [sp, #12] -; CHECK-O0: ldr w2, [sp, #12] -; CHECK-O0: ldr w3, [sp, #12] +; CHECK-O0: mov w3, w0 +; CHECK-O0: mov w0, w3 +; CHECK-O0: mov w1, w3 +; CHECK-O0: mov w2, w3 define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) { %v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0 %v1 = insertvalue { i32, i32, i32, i32 } %v0, i32 %key, 1 @@ -219,10 +219,10 @@ ; CHECK: mov w3, w0 ; CHECK: ret ; CHECK-O0-LABEL: _gen9 -; CHECK-O0: str w0, [sp, #12] -; CHECK-O0: ldr w1, [sp, #12] -; CHECK-O0: ldr w2, [sp, #12] -; CHECK-O0: ldr w3, [sp, #12] +; CHECK-O0: mov w3, w0 +; CHECK-O0: mov w0, w3 +; CHECK-O0: mov w1, w3 +; CHECK-O0: mov w2, w3 define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) { %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0 %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1 Index: test/CodeGen/AArch64/swifterror.ll =================================================================== --- test/CodeGen/AArch64/swifterror.ll +++ test/CodeGen/AArch64/swifterror.ll @@ -19,11 +19,10 @@ ; CHECK-O0-LABEL: foo: ; CHECK-O0: orr w{{.*}}, wzr, #0x10 ; CHECK-O0: malloc -; CHECK-O0: mov x21, x0 -; CHECK-O0-NOT: x21 -; CHECK-O0: orr [[ID:w[0-9]+]], wzr, #0x1 -; CHECK-O0-NOT: x21 -; CHECK-O0: strb [[ID]], [x0, #8] +; CHECK-O0: mov [[MALLOCRES:x[0-9]+]], x0 +; CHECK-O0-DAG: orr [[ONE:w[0-9]+]], wzr, #0x1 +; CHECK-O0-DAG: strb [[ONE]], {{\[}}[[MALLOCRES]], #8] +; CHECK-O0-DAG: mov x21, x0 ; CHECK-O0-NOT: x21 entry: %call = call i8* @malloc(i64 16) @@ -133,14 +132,13 @@ ; CHECK-O0: cbz w0 ; CHECK-O0: orr w{{.*}}, wzr, #0x10 ; CHECK-O0: malloc -; CHECK-O0: mov [[ID:x[0-9]+]], x0 +; CHECK-O0: mov [[MALLOCRES:x[0-9]+]], x0 ; CHECK-O0: orr [[ID2:w[0-9]+]], wzr, #0x1 -; CHECK-O0: strb [[ID2]], [x0, #8] -; CHECK-O0: mov x21, [[ID]] +; CHECK-O0: strb [[ID2]], {{\[}}[[MALLOCRES]], #8] +; CHECK-O0: mov x21, x0 ; CHECK-O0: ret ; reload from stack -; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT]]] -; CHECK-O0: mov x21, [[ID3]] +; CHECK-O0: ldr x21, [sp, [[SLOT]]] ; CHECK-O0: ret entry: %cond = icmp ne i32 %cc, 0 @@ -174,11 +172,11 @@ ; CHECK-O0-LABEL: foo_loop: ; spill x21 -; CHECK-O0: str x21, [sp, [[SLOT:#[0-9]+]]] +; CHECK-O0: stur x21, [x29, [[SLOT:#-?[0-9]+]]] ; CHECK-O0: b [[BB1:[A-Za-z0-9_]*]] ; CHECK-O0: [[BB1]]: -; CHECK-O0: ldr x0, [sp, [[SLOT]]] -; CHECK-O0: str x0, [sp, [[SLOT2:#[0-9]+]]] +; CHECK-O0: ldur [[REG:x[0-9]+]], [x29, [[SLOT]]] +; CHECK-O0: str [[REG]], [sp, [[SLOT2:#[0-9]+]]] ; CHECK-O0: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]] ; CHECK-O0: orr w{{.*}}, wzr, #0x10 ; CHECK-O0: malloc @@ -188,12 +186,11 @@ ; CHECK-O0: str x0, [sp, [[SLOT2]]] ; CHECK-O0:[[BB2]]: ; CHECK-O0: ldr x0, [sp, [[SLOT2]]] +; CHECK-O0: str x0, [sp] ; CHECK-O0: fcmp -; CHECK-O0: str x0, [sp, [[SLOT3:#[0-9]+]] ; CHECK-O0: b.le [[BB1]] ; reload from stack -; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT3]]] -; CHECK-O0: mov x21, [[ID3]] +; CHECK-O0: ldr x21, [sp] ; CHECK-O0: ret entry: br label %bb_loop @@ -233,18 +230,17 @@ ; CHECK-APPLE-NOT: x21 ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: orr w{{.*}}, wzr, #0x10 ; spill x8 -; CHECK-O0-DAG: str x8 -; spill x21 -; CHECK-O0-DAG: str x21 +; CHECK-O0-DAG: str x8, [sp] +; CHECK-O0: orr w{{.*}}, wzr, #0x10 ; CHECK-O0: malloc +; CHECK-O0: ldr [[SRET:x[0-9]+]], [sp] +; CHECK-O0: mov [[MALLOCRES:x[0-9]+]], x0 ; CHECK-O0: orr [[ID:w[0-9]+]], wzr, #0x1 -; CHECK-O0: strb [[ID]], [x0, #8] +; CHECK-O0: strb [[ID]], {{\[}}[[MALLOCRES]], #8] ; reload from stack -; CHECK-O0: ldr [[SRET:x[0-9]+]] ; CHECK-O0: str w{{.*}}, [{{.*}}[[SRET]], #4] -; CHECK-O0: mov x21 +; CHECK-O0: mov x21, x0 ; CHECK-O0-NOT: x21 entry: %call = call i8* @malloc(i64 16) @@ -272,15 +268,15 @@ ; CHECK-O0-LABEL: caller3: ; spill x0 -; CHECK-O0: str x0 +; CHECK-O0: str x0, [sp, [[OFFSET:#[0-9]+]]] ; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo_sret ; CHECK-O0: mov [[ID2:x[0-9]+]], x21 ; CHECK-O0: cbnz x21 ; Access part of the error object and save it to error_ref ; reload from stack +; CHECK-O0: ldr [[ID:x[0-9]+]], [sp, [[OFFSET]]] ; CHECK-O0: ldrb [[CODE:w[0-9]+]] -; CHECK-O0: ldr [[ID:x[0-9]+]] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] ; CHECK-O0: bl {{.*}}free entry: @@ -601,8 +597,7 @@ ; Make sure we properly assign registers during fast-isel. ; CHECK-O0-LABEL: testAssign -; CHECK-O0: mov [[TMP:x.*]], xzr -; CHECK-O0: mov x21, [[TMP]] +; CHECK-O0: mov x21, xzr ; CHECK-O0: bl _foo2 ; CHECK-O0: str x21, [s[[STK:.*]]] ; CHECK-O0: ldr x0, [s[[STK]]] Index: test/CodeGen/AMDGPU/control-flow-fastregalloc.ll =================================================================== --- test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -17,31 +17,32 @@ ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] +; Spill load +; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill + ; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec -; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] - -; Spill saved exec -; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] -; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] - ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] ; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:8 ; 4-byte Folded Spill -; Spill load -; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; Spill saved exec +; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] +; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] + +; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] + ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} ; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: {{^}}BB{{[0-9]+}}_1: ; %if +; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] -; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -51,12 +52,13 @@ ; VMEM: [[ENDIF]]: +; Restore val +; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], s7 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload + ; Reload and restore exec mask ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] - - ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] @@ -67,9 +69,6 @@ ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} -; Restore val -; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], s7 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload - ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]] define amdgpu_kernel void @divergent_if_endif(i32 addrspace(1)* %out) #0 { entry: @@ -99,22 +98,21 @@ ; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 +; Spill load +; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD_OFFSET:[0-9]+]] ; 4-byte Folded Spill + ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec -; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] -; Spill load -; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill +; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] +; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] +; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; Spill saved exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] - -; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:20 ; 4-byte Folded Spill -; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:24 ; 4-byte Folded Spill - +; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} ; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]] @@ -122,7 +120,7 @@ ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: -; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload +; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:{{[0-9]+}} ; 4-byte Folded Reload ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]] ; GCN: v_cmp_ne_u32_e32 vcc, ; GCN: s_and_b64 vcc, exec, vcc @@ -131,19 +129,20 @@ ; GCN: [[END]]: +; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload + ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:20 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET]] ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:24 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET]] ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} -; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]] define amdgpu_kernel void @divergent_loop(i32 addrspace(1)* %out) #0 { @@ -174,6 +173,9 @@ ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] +; Spill load +; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill + ; GCN: s_mov_b32 [[ZERO:s[0-9]+]], 0 ; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], [[ZERO]], v0 @@ -181,9 +183,6 @@ ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} -; Spill load -; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill - ; Spill saved exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] @@ -215,10 +214,6 @@ ; GCN: s_or_saveexec_b64 s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} -; Regular spill value restored after exec modification -; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], s7 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload - - ; Spill saved exec ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]], [[FLOW_SAVEEXEC_LO_LANE:[0-9]+]] ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]], [[FLOW_SAVEEXEC_HI_LANE:[0-9]+]] @@ -229,7 +224,7 @@ ; VMEM: v_mov_b32_e32 v[[FLOW_V_SAVEEXEC_HI:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]] ; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; GCN: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN-NEXT: s_cbranch_execz [[ENDIF]] @@ -237,22 +232,23 @@ ; GCN: BB{{[0-9]+}}_2: ; %if ; GCN: ds_read_b32 -; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload -; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] +; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, [[LOAD0]] ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill ; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: [[ELSE]]: ; %else -; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload +; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: v_subrev_i32_e32 [[SUB:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[FLOW_RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN-NEXT: s_branch [[FLOW]] ; GCN: [[ENDIF]]: + +; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload + ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]] - ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET]] ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] @@ -263,7 +259,6 @@ ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} -; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]] define amdgpu_kernel void @divergent_if_else_endif(i32 addrspace(1)* %out) #0 { entry: Index: test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll @@ -42,6 +42,8 @@ ; VI-OPT: s_mov_b32 ; VI-OPT: s_mov_b32 ; VI-NOOPT: s_waitcnt +; VI-NOOPT: s_waitcnt +; VI-NOOPT: s_waitcnt ; VI-NOOPT-NEXT: s_nop 0 ; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; VI-OPT: s_nop 1 Index: test/CodeGen/AMDGPU/mubuf-legalize-operands.ll =================================================================== --- test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -129,6 +129,7 @@ ; CHECK-O0-DAG: buffer_store_dword [[IDX_V]], off, s[0:3], s5 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill ; CHECK-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]: +; CHECK-O0: buffer_load_dword ; CHECK-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], {{.*}} ; 4-byte Folded Reload ; CHECK-O0: s_waitcnt vmcnt(0) ; CHECK-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], {{.*}} ; 4-byte Folded Reload @@ -141,23 +142,22 @@ ; CHECK-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]] ; CHECK-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]] ; CHECK-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP3:[0-9]+]], v[[VRSRC3]] -; CHECK-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[SRSRCTMP0]] +; CHECK-O0-DAG: ; kill: def $sgpr[[SRSRCTMP0]] ; CHECK-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]] ; CHECK-O0-DAG: s_mov_b32 s[[SRSRC2:[0-9]+]], s[[SRSRCTMP2]] ; CHECK-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]] -; CHECK-O0: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}} +; CHECK-O0: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRCTMP0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}} ; CHECK-O0: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}} ; CHECK-O0: s_and_b64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] ; CHECK-O0: s_and_saveexec_b64 [[CMP]], [[CMP]] -; CHECK-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s[0:3], s5 offset:[[IDX_OFF]] ; 4-byte Folded Reload -; CHECK-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, {{.*}} idxen +; CHECK-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX_V]], s{{\[}}[[SRSRCTMP0]]:[[SRSRC3]]{{\]}}, {{.*}} idxen ; CHECK-O0: s_waitcnt vmcnt(0) ; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill ; CHECK-O0: s_xor_b64 exec, exec, [[CMP]] ; CHECK-O0-NEXT: s_cbranch_execnz [[LOOPBB0]] -; CHECK-O0: s_mov_b64 exec, [[SAVEEXEC]] ; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s5 offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload +; CHECK-O0: s_mov_b64 exec, s{{\[[0-9]+:[0-9]+\]}} ; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill ; CHECK-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]] @@ -168,6 +168,7 @@ ; CHECK-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]] ; CHECK-O0: [[LOOPBB1:BB[0-9]+_[0-9]+]]: +; CHECK-O0: buffer_load_dword ; CHECK-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], {{.*}} ; 4-byte Folded Reload ; CHECK-O0: s_waitcnt vmcnt(0) ; CHECK-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], {{.*}} ; 4-byte Folded Reload @@ -180,25 +181,24 @@ ; CHECK-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]] ; CHECK-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]] ; CHECK-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP3:[0-9]+]], v[[VRSRC3]] -; CHECK-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[SRSRCTMP0]] +; CHECK-O0-DAG: ; kill: def $sgpr[[SRSRCTMP0]] ; CHECK-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]] ; CHECK-O0-DAG: s_mov_b32 s[[SRSRC2:[0-9]+]], s[[SRSRCTMP2]] ; CHECK-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]] -; CHECK-O0: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}} +; CHECK-O0: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRCTMP0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}} ; CHECK-O0: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}} ; CHECK-O0: s_and_b64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] ; CHECK-O0: s_and_saveexec_b64 [[CMP]], [[CMP]] -; CHECK-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s[0:3], s5 offset:[[IDX_OFF]] ; 4-byte Folded Reload -; CHECK-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, {{.*}} idxen +; CHECK-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX_V]], s{{\[}}[[SRSRCTMP0]]:[[SRSRC3]]{{\]}}, {{.*}} idxen ; CHECK-O0: s_waitcnt vmcnt(0) ; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill ; CHECK-O0: s_xor_b64 exec, exec, [[CMP]] ; CHECK-O0-NEXT: s_cbranch_execnz [[LOOPBB1]] +; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s5 offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload ; CHECK-O0: v_readlane_b32 s[[SAVEEXEC0:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX0]] ; CHECK-O0: v_readlane_b32 s[[SAVEEXEC1:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX1]] ; CHECK-O0: s_mov_b64 exec, s{{\[}}[[SAVEEXEC0]]:[[SAVEEXEC1]]{{\]}} -; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s5 offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload ; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF]] ; 4-byte Folded Spill ; CHECK-O0: [[TERMBB]]: Index: test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll =================================================================== --- test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -11,17 +11,6 @@ ; GCN-LABEL: {{^}}spill_sgprs_to_multiple_vgprs: ; GCN: def s[4:11] -; GCN: def s[12:19] -; GCN: def s[20:27] -; GCN: def s[28:35] -; GCN: def s[36:43] -; GCN: def s[44:51] -; GCN: def s[52:59] -; GCN: def s[60:67] -; GCN: def s[68:75] -; GCN: def s[76:83] -; GCN: def s[84:91] - ; GCN: v_writelane_b32 v0, s4, 0 ; GCN-NEXT: v_writelane_b32 v0, s5, 1 ; GCN-NEXT: v_writelane_b32 v0, s6, 2 @@ -91,199 +80,196 @@ ; GCN-NEXT: v_writelane_b32 v0, s10, 54 ; GCN-NEXT: v_writelane_b32 v0, s[[TMP_HI]], 55 -; GCN-NEXT: v_writelane_b32 v0, s84, 56 -; GCN-NEXT: v_writelane_b32 v0, s85, 57 -; GCN-NEXT: v_writelane_b32 v0, s86, 58 -; GCN-NEXT: v_writelane_b32 v0, s87, 59 -; GCN-NEXT: v_writelane_b32 v0, s88, 60 -; GCN-NEXT: v_writelane_b32 v0, s89, 61 -; GCN-NEXT: v_writelane_b32 v0, s90, 62 -; GCN-NEXT: v_writelane_b32 v0, s91, 63 -; GCN-NEXT: v_writelane_b32 v1, s12, 0 -; GCN-NEXT: v_writelane_b32 v1, s13, 1 -; GCN-NEXT: v_writelane_b32 v1, s14, 2 -; GCN-NEXT: v_writelane_b32 v1, s15, 3 -; GCN-NEXT: v_writelane_b32 v1, s16, 4 -; GCN-NEXT: v_writelane_b32 v1, s17, 5 -; GCN-NEXT: v_writelane_b32 v1, s18, 6 -; GCN-NEXT: v_writelane_b32 v1, s19, 7 -; GCN-NEXT: v_writelane_b32 v1, s20, 8 -; GCN-NEXT: v_writelane_b32 v1, s21, 9 -; GCN-NEXT: v_writelane_b32 v1, s22, 10 -; GCN-NEXT: v_writelane_b32 v1, s23, 11 -; GCN-NEXT: v_writelane_b32 v1, s24, 12 -; GCN-NEXT: v_writelane_b32 v1, s25, 13 -; GCN-NEXT: v_writelane_b32 v1, s26, 14 -; GCN-NEXT: v_writelane_b32 v1, s27, 15 -; GCN-NEXT: v_writelane_b32 v1, s28, 16 -; GCN-NEXT: v_writelane_b32 v1, s29, 17 -; GCN-NEXT: v_writelane_b32 v1, s30, 18 -; GCN-NEXT: v_writelane_b32 v1, s31, 19 -; GCN-NEXT: v_writelane_b32 v1, s32, 20 -; GCN-NEXT: v_writelane_b32 v1, s33, 21 -; GCN-NEXT: v_writelane_b32 v1, s34, 22 -; GCN-NEXT: v_writelane_b32 v1, s35, 23 -; GCN-NEXT: v_writelane_b32 v1, s36, 24 -; GCN-NEXT: v_writelane_b32 v1, s37, 25 -; GCN-NEXT: v_writelane_b32 v1, s38, 26 -; GCN-NEXT: v_writelane_b32 v1, s39, 27 -; GCN-NEXT: v_writelane_b32 v1, s40, 28 -; GCN-NEXT: v_writelane_b32 v1, s41, 29 -; GCN-NEXT: v_writelane_b32 v1, s42, 30 -; GCN-NEXT: v_writelane_b32 v1, s43, 31 -; GCN-NEXT: v_writelane_b32 v1, s44, 32 -; GCN-NEXT: v_writelane_b32 v1, s45, 33 -; GCN-NEXT: v_writelane_b32 v1, s46, 34 -; GCN-NEXT: v_writelane_b32 v1, s47, 35 -; GCN-NEXT: v_writelane_b32 v1, s48, 36 -; GCN-NEXT: v_writelane_b32 v1, s49, 37 -; GCN-NEXT: v_writelane_b32 v1, s50, 38 -; GCN-NEXT: v_writelane_b32 v1, s51, 39 -; GCN-NEXT: v_writelane_b32 v1, s52, 40 -; GCN-NEXT: v_writelane_b32 v1, s53, 41 -; GCN-NEXT: v_writelane_b32 v1, s54, 42 -; GCN-NEXT: v_writelane_b32 v1, s55, 43 -; GCN-NEXT: v_writelane_b32 v1, s56, 44 -; GCN-NEXT: v_writelane_b32 v1, s57, 45 -; GCN-NEXT: v_writelane_b32 v1, s58, 46 -; GCN-NEXT: v_writelane_b32 v1, s59, 47 -; GCN-NEXT: v_writelane_b32 v1, s60, 48 -; GCN-NEXT: v_writelane_b32 v1, s61, 49 -; GCN-NEXT: v_writelane_b32 v1, s62, 50 -; GCN-NEXT: v_writelane_b32 v1, s63, 51 -; GCN-NEXT: v_writelane_b32 v1, s64, 52 -; GCN-NEXT: v_writelane_b32 v1, s65, 53 -; GCN-NEXT: v_writelane_b32 v1, s66, 54 -; GCN-NEXT: v_writelane_b32 v1, s67, 55 -; GCN-NEXT: v_writelane_b32 v1, s68, 56 -; GCN-NEXT: v_writelane_b32 v1, s69, 57 -; GCN-NEXT: v_writelane_b32 v1, s70, 58 -; GCN-NEXT: v_writelane_b32 v1, s71, 59 -; GCN-NEXT: v_writelane_b32 v1, s72, 60 -; GCN-NEXT: v_writelane_b32 v1, s73, 61 -; GCN-NEXT: v_writelane_b32 v1, s74, 62 -; GCN-NEXT: v_writelane_b32 v1, s75, 63 -; GCN-NEXT: v_writelane_b32 v2, s76, 0 -; GCN-NEXT: v_writelane_b32 v2, s77, 1 -; GCN-NEXT: v_writelane_b32 v2, s78, 2 -; GCN-NEXT: v_writelane_b32 v2, s79, 3 -; GCN-NEXT: v_writelane_b32 v2, s80, 4 -; GCN-NEXT: v_writelane_b32 v2, s81, 5 -; GCN-NEXT: v_writelane_b32 v2, s82, 6 -; GCN-NEXT: v_writelane_b32 v2, s83, 7 -; GCN: s_cbranch_scc1 - - -; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v0, 0 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 1 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 2 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 3 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 4 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 5 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 6 -; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v0, 7 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} - +; GCN: def s[4:11] +; GCN: v_writelane_b32 v0, s4, 56 +; GCN-NEXT: v_writelane_b32 v0, s5, 57 +; GCN-NEXT: v_writelane_b32 v0, s6, 58 +; GCN-NEXT: v_writelane_b32 v0, s7, 59 +; GCN-NEXT: v_writelane_b32 v0, s8, 60 +; GCN-NEXT: v_writelane_b32 v0, s9, 61 +; GCN-NEXT: v_writelane_b32 v0, s10, 62 +; GCN-NEXT: v_writelane_b32 v0, s11, 63 -; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 0 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 1 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 2 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 3 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 4 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 5 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 6 -; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 7 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: def s[4:11] +; GCN: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s5, 1 +; GCN-NEXT: v_writelane_b32 v1, s6, 2 +; GCN-NEXT: v_writelane_b32 v1, s7, 3 +; GCN-NEXT: v_writelane_b32 v1, s8, 4 +; GCN-NEXT: v_writelane_b32 v1, s9, 5 +; GCN-NEXT: v_writelane_b32 v1, s10, 6 +; GCN-NEXT: v_writelane_b32 v1, s11, 7 -; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 8 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 9 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 10 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 11 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 12 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 13 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 14 -; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 15 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: def s[4:11] +; GCN: v_writelane_b32 v1, s4, 8 +; GCN-NEXT: v_writelane_b32 v1, s5, 9 +; GCN-NEXT: v_writelane_b32 v1, s6, 10 +; GCN-NEXT: v_writelane_b32 v1, s7, 11 +; GCN-NEXT: v_writelane_b32 v1, s8, 12 +; GCN-NEXT: v_writelane_b32 v1, s9, 13 +; GCN-NEXT: v_writelane_b32 v1, s10, 14 +; GCN-NEXT: v_writelane_b32 v1, s11, 15 -; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 16 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 17 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 18 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 19 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 20 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 21 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 22 -; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 23 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: def s[4:11] +; GCN: v_writelane_b32 v1, s4, 16 +; GCN-NEXT: v_writelane_b32 v1, s5, 17 +; GCN-NEXT: v_writelane_b32 v1, s6, 18 +; GCN-NEXT: v_writelane_b32 v1, s7, 19 +; GCN-NEXT: v_writelane_b32 v1, s8, 20 +; GCN-NEXT: v_writelane_b32 v1, s9, 21 +; GCN-NEXT: v_writelane_b32 v1, s10, 22 +; GCN-NEXT: v_writelane_b32 v1, s11, 23 -; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 24 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 25 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 26 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 27 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 28 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 29 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 30 -; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 31 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: def s[4:11] +; GCN: v_writelane_b32 v1, s4, 24 +; GCN-NEXT: v_writelane_b32 v1, s5, 25 +; GCN-NEXT: v_writelane_b32 v1, s6, 26 +; GCN-NEXT: v_writelane_b32 v1, s7, 27 +; GCN-NEXT: v_writelane_b32 v1, s8, 28 +; GCN-NEXT: v_writelane_b32 v1, s9, 29 +; GCN-NEXT: v_writelane_b32 v1, s10, 30 +; GCN-NEXT: v_writelane_b32 v1, s11, 31 -; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 32 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 33 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 34 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 35 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 36 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 37 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 38 -; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 39 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: def s[4:11] +; GCN: v_writelane_b32 v1, s4, 32 +; GCN-NEXT: v_writelane_b32 v1, s5, 33 +; GCN-NEXT: v_writelane_b32 v1, s6, 34 +; GCN-NEXT: v_writelane_b32 v1, s7, 35 +; GCN-NEXT: v_writelane_b32 v1, s8, 36 +; GCN-NEXT: v_writelane_b32 v1, s9, 37 +; GCN-NEXT: v_writelane_b32 v1, s10, 38 +; GCN-NEXT: v_writelane_b32 v1, s11, 39 -; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 40 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 41 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 42 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 43 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 44 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 45 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 46 -; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 47 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: def s[4:11] +; GCN: v_writelane_b32 v1, s4, 40 +; GCN-NEXT: v_writelane_b32 v1, s5, 41 +; GCN-NEXT: v_writelane_b32 v1, s6, 42 +; GCN-NEXT: v_writelane_b32 v1, s7, 43 +; GCN-NEXT: v_writelane_b32 v1, s8, 44 +; GCN-NEXT: v_writelane_b32 v1, s9, 45 +; GCN-NEXT: v_writelane_b32 v1, s10, 46 +; GCN-NEXT: v_writelane_b32 v1, s11, 47 -; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 48 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 49 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 50 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 51 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 52 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 53 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 54 -; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 55 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: def s[4:11] +; GCN: v_writelane_b32 v1, s4, 48 +; GCN-NEXT: v_writelane_b32 v1, s5, 49 +; GCN-NEXT: v_writelane_b32 v1, s6, 50 +; GCN-NEXT: v_writelane_b32 v1, s7, 51 +; GCN-NEXT: v_writelane_b32 v1, s8, 52 +; GCN-NEXT: v_writelane_b32 v1, s9, 53 +; GCN-NEXT: v_writelane_b32 v1, s10, 54 +; GCN-NEXT: v_writelane_b32 v1, s11, 55 -; GCN: v_readlane_b32 s[[USE_TMP_LO]], v1, 56 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 57 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 58 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 59 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 60 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 61 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v1, 62 -; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI]], v1, 63 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: def s[4:11] +; GCN: v_writelane_b32 v1, s4, 56 +; GCN-NEXT: v_writelane_b32 v1, s5, 57 +; GCN-NEXT: v_writelane_b32 v1, s6, 58 +; GCN-NEXT: v_writelane_b32 v1, s7, 59 +; GCN-NEXT: v_writelane_b32 v1, s8, 60 +; GCN-NEXT: v_writelane_b32 v1, s9, 61 +; GCN-NEXT: v_writelane_b32 v1, s10, 62 +; GCN-NEXT: v_writelane_b32 v1, s11, 63 -; GCN: v_readlane_b32 s{{[0-9]+}}, v2, 0 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 1 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 2 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 3 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 4 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 5 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 6 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 7 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: def s[4:11] +; GCN: v_writelane_b32 v2, s4, 0 +; GCN-NEXT: v_writelane_b32 v2, s5, 1 +; GCN-NEXT: v_writelane_b32 v2, s6, 2 +; GCN-NEXT: v_writelane_b32 v2, s7, 3 +; GCN-NEXT: v_writelane_b32 v2, s8, 4 +; GCN-NEXT: v_writelane_b32 v2, s9, 5 +; GCN-NEXT: v_writelane_b32 v2, s10, 6 +; GCN-NEXT: v_writelane_b32 v2, s11, 7 +; GCN: s_cbranch_scc1 -; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 56 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 57 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 58 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 59 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 60 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 61 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 62 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 63 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: v_readlane_b32 s12, v1, 56 +; GCN-NEXT: v_readlane_b32 s13, v1, 57 +; GCN-NEXT: v_readlane_b32 s14, v1, 58 +; GCN-NEXT: v_readlane_b32 s15, v1, 59 +; GCN-NEXT: v_readlane_b32 s16, v1, 60 +; GCN-NEXT: v_readlane_b32 s17, v1, 61 +; GCN-NEXT: v_readlane_b32 s18, v1, 62 +; GCN-NEXT: v_readlane_b32 s19, v1, 63 +; GCN-NEXT: v_readlane_b32 s20, v1, 48 +; GCN-NEXT: v_readlane_b32 s21, v1, 49 +; GCN-NEXT: v_readlane_b32 s22, v1, 50 +; GCN-NEXT: v_readlane_b32 s23, v1, 51 +; GCN-NEXT: v_readlane_b32 s24, v1, 52 +; GCN-NEXT: v_readlane_b32 s25, v1, 53 +; GCN-NEXT: v_readlane_b32 s26, v1, 54 +; GCN-NEXT: v_readlane_b32 s27, v1, 55 +; GCN-NEXT: v_readlane_b32 s28, v1, 40 +; GCN-NEXT: v_readlane_b32 s29, v1, 41 +; GCN-NEXT: v_readlane_b32 s30, v1, 42 +; GCN-NEXT: v_readlane_b32 s31, v1, 43 +; GCN-NEXT: v_readlane_b32 s32, v1, 44 +; GCN-NEXT: v_readlane_b32 s33, v1, 45 +; GCN-NEXT: v_readlane_b32 s34, v1, 46 +; GCN-NEXT: v_readlane_b32 s35, v1, 47 +; GCN-NEXT: v_readlane_b32 s36, v1, 32 +; GCN-NEXT: v_readlane_b32 s37, v1, 33 +; GCN-NEXT: v_readlane_b32 s38, v1, 34 +; GCN-NEXT: v_readlane_b32 s39, v1, 35 +; GCN-NEXT: v_readlane_b32 s40, v1, 36 +; GCN-NEXT: v_readlane_b32 s41, v1, 37 +; GCN-NEXT: v_readlane_b32 s42, v1, 38 +; GCN-NEXT: v_readlane_b32 s43, v1, 39 +; GCN-NEXT: v_readlane_b32 s44, v1, 24 +; GCN-NEXT: v_readlane_b32 s45, v1, 25 +; GCN-NEXT: v_readlane_b32 s46, v1, 26 +; GCN-NEXT: v_readlane_b32 s47, v1, 27 +; GCN-NEXT: v_readlane_b32 s48, v1, 28 +; GCN-NEXT: v_readlane_b32 s49, v1, 29 +; GCN-NEXT: v_readlane_b32 s50, v1, 30 +; GCN-NEXT: v_readlane_b32 s51, v1, 31 +; GCN-NEXT: v_readlane_b32 s52, v1, 16 +; GCN-NEXT: v_readlane_b32 s53, v1, 17 +; GCN-NEXT: v_readlane_b32 s54, v1, 18 +; GCN-NEXT: v_readlane_b32 s55, v1, 19 +; GCN-NEXT: v_readlane_b32 s56, v1, 20 +; GCN-NEXT: v_readlane_b32 s57, v1, 21 +; GCN-NEXT: v_readlane_b32 s58, v1, 22 +; GCN-NEXT: v_readlane_b32 s59, v1, 23 +; GCN-NEXT: v_readlane_b32 s60, v1, 8 +; GCN-NEXT: v_readlane_b32 s61, v1, 9 +; GCN-NEXT: v_readlane_b32 s62, v1, 10 +; GCN-NEXT: v_readlane_b32 s63, v1, 11 +; GCN-NEXT: v_readlane_b32 s64, v1, 12 +; GCN-NEXT: v_readlane_b32 s65, v1, 13 +; GCN-NEXT: v_readlane_b32 s66, v1, 14 +; GCN-NEXT: v_readlane_b32 s67, v1, 15 +; GCN-NEXT: v_readlane_b32 s68, v1, 0 +; GCN-NEXT: v_readlane_b32 s69, v1, 1 +; GCN-NEXT: v_readlane_b32 s70, v1, 2 +; GCN-NEXT: v_readlane_b32 s71, v1, 3 +; GCN-NEXT: v_readlane_b32 s72, v1, 4 +; GCN-NEXT: v_readlane_b32 s73, v1, 5 +; GCN-NEXT: v_readlane_b32 s74, v1, 6 +; GCN-NEXT: v_readlane_b32 s75, v1, 7 +; GCN-NEXT: v_readlane_b32 s76, v0, 56 +; GCN-NEXT: v_readlane_b32 s77, v0, 57 +; GCN-NEXT: v_readlane_b32 s78, v0, 58 +; GCN-NEXT: v_readlane_b32 s79, v0, 59 +; GCN-NEXT: v_readlane_b32 s80, v0, 60 +; GCN-NEXT: v_readlane_b32 s81, v0, 61 +; GCN-NEXT: v_readlane_b32 s82, v0, 62 +; GCN-NEXT: v_readlane_b32 s83, v0, 63 +; GCN-NEXT: v_readlane_b32 s84, v0, 48 +; GCN-NEXT: v_readlane_b32 s85, v0, 49 +; GCN-NEXT: v_readlane_b32 s86, v0, 50 +; GCN-NEXT: v_readlane_b32 s87, v0, 51 +; GCN-NEXT: v_readlane_b32 s88, v0, 52 +; GCN-NEXT: v_readlane_b32 s89, v0, 53 +; GCN-NEXT: v_readlane_b32 s90, v0, 54 +; GCN-NEXT: v_readlane_b32 s91, v0, 55 +; GCN-NEXT: v_readlane_b32 s4, v0, 0 +; GCN-NEXT: v_readlane_b32 s5, v0, 1 +; GCN-NEXT: v_readlane_b32 s6, v0, 2 +; GCN-NEXT: v_readlane_b32 s7, v0, 3 +; GCN-NEXT: v_readlane_b32 s8, v0, 4 +; GCN-NEXT: v_readlane_b32 s9, v0, 5 +; GCN-NEXT: v_readlane_b32 s10, v0, 6 +; GCN-NEXT: v_readlane_b32 s11, v0, 7 +; GCN: ; use s{{\[}}[[USE_TMP_LO:[0-9]+]]:[[USE_TMP_HI:[0-9]+]]{{\]}} ; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 8 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 9 @@ -335,15 +321,25 @@ ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 47 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} -; GCN: v_readlane_b32 s{{[0-9]+}}, v0, 48 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 49 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 50 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 51 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 52 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 53 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 54 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v0, 55 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} +; GCN: v_readlane_b32 s{{[0-9]+}}, v2, 0 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 1 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 2 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 3 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 4 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 5 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 6 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v2, 7 +; GCN: use s[84:91] +; GCN: use s[76:83] +; GCN: use s[68:75] +; GCN: use s[60:67] +; GCN: use s[52:59] +; GCN: use s[44:51] +; GCN: use s[36:43] +; GCN: use s[28:35] +; GCN: use s[20:27] +; GCN: use s[12:19] +; GCN: use s[4:11] define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 { %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 @@ -394,41 +390,46 @@ ; GCN-LABEL: {{^}}split_sgpr_spill_2_vgprs: ; GCN: def s[4:19] -; GCN: def s[20:35] - -; GCN: v_writelane_b32 v0, s4, 50 -; GCN-NEXT: v_writelane_b32 v0, s5, 51 -; GCN-NEXT: v_writelane_b32 v0, s6, 52 -; GCN-NEXT: v_writelane_b32 v0, s7, 53 -; GCN-NEXT: v_writelane_b32 v0, s8, 54 -; GCN-NEXT: v_writelane_b32 v0, s9, 55 -; GCN-NEXT: v_writelane_b32 v0, s10, 56 -; GCN-NEXT: v_writelane_b32 v0, s11, 57 -; GCN-NEXT: v_writelane_b32 v0, s12, 58 -; GCN-NEXT: v_writelane_b32 v0, s13, 59 -; GCN-NEXT: v_writelane_b32 v0, s14, 60 -; GCN-NEXT: v_writelane_b32 v0, s15, 61 -; GCN-NEXT: v_writelane_b32 v0, s16, 62 -; GCN-NEXT: v_writelane_b32 v0, s17, 63 -; GCN-NEXT: v_writelane_b32 v1, s18, 0 -; GCN-NEXT: v_writelane_b32 v1, s19, 1 - -; GCN: v_readlane_b32 s4, v0, 50 -; GCN-NEXT: v_readlane_b32 s5, v0, 51 -; GCN-NEXT: v_readlane_b32 s6, v0, 52 -; GCN-NEXT: v_readlane_b32 s7, v0, 53 -; GCN-NEXT: v_readlane_b32 s8, v0, 54 -; GCN-NEXT: v_readlane_b32 s9, v0, 55 -; GCN-NEXT: v_readlane_b32 s10, v0, 56 -; GCN-NEXT: v_readlane_b32 s11, v0, 57 -; GCN-NEXT: v_readlane_b32 s12, v0, 58 -; GCN-NEXT: v_readlane_b32 s13, v0, 59 -; GCN-NEXT: v_readlane_b32 s14, v0, 60 -; GCN-NEXT: v_readlane_b32 s15, v0, 61 -; GCN-NEXT: v_readlane_b32 s16, v0, 62 -; GCN-NEXT: v_readlane_b32 s17, v0, 63 -; GCN-NEXT: v_readlane_b32 s18, v1, 0 -; GCN-NEXT: v_readlane_b32 s19, v1, 1 +; GCN: def s[4:19] +; GCN: def s[4:19] +; GCN: def s[4:19] + +; GCN: v_writelane_b32 v0, s6, 50 +; GCN-NEXT: v_writelane_b32 v0, s7, 51 +; GCN-NEXT: v_writelane_b32 v0, s8, 52 +; GCN-NEXT: v_writelane_b32 v0, s9, 53 +; GCN-NEXT: v_writelane_b32 v0, s10, 54 +; GCN-NEXT: v_writelane_b32 v0, s11, 55 +; GCN-NEXT: v_writelane_b32 v0, s12, 56 +; GCN-NEXT: v_writelane_b32 v0, s13, 57 +; GCN-NEXT: v_writelane_b32 v0, s14, 58 +; GCN-NEXT: v_writelane_b32 v0, s15, 59 +; GCN-NEXT: v_writelane_b32 v0, s16, 60 +; GCN-NEXT: v_writelane_b32 v0, s17, 61 +; GCN-NEXT: v_writelane_b32 v0, s18, 62 +; GCN-NEXT: v_writelane_b32 v0, s19, 63 + +; GCN: def s[4:11] +; GCN: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s5, 1 + +; GCN: v_readlane_b32 s6, v0, 50 +; GCN-NEXT: v_readlane_b32 s7, v0, 51 +; GCN-NEXT: v_readlane_b32 s8, v0, 52 +; GCN-NEXT: v_readlane_b32 s9, v0, 53 +; GCN-NEXT: v_readlane_b32 s10, v0, 54 +; GCN-NEXT: v_readlane_b32 s11, v0, 55 +; GCN-NEXT: v_readlane_b32 s12, v0, 56 +; GCN-NEXT: v_readlane_b32 s13, v0, 57 +; GCN-NEXT: v_readlane_b32 s14, v0, 58 +; GCN-NEXT: v_readlane_b32 s15, v0, 59 +; GCN-NEXT: v_readlane_b32 s16, v0, 60 +; GCN-NEXT: v_readlane_b32 s17, v0, 61 +; GCN-NEXT: v_readlane_b32 s18, v0, 62 +; GCN-NEXT: v_readlane_b32 s19, v0, 63 +; GCN: use s[20:27] +; GCN: use s[0:1] +; GCN: use s[4:19] define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 { %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 @@ -492,10 +493,9 @@ ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 30 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 31 -; GCN: def s[0:1] -; GCN: v_writelane_b32 v23, s0, 32 -; GCN-NEXT: v_writelane_b32 v23, s1, 33 - +; GCN: def s[4:19] +; GCN: v_writelane_b32 v23, s4, 32 +; GCN-NEXT: v_writelane_b32 v23, s5, 33 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 34 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 35 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 36 @@ -510,28 +510,51 @@ ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 45 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 46 ; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 47 -; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 48 -; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 49 -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +; GCN: def s[4:19] +; GCN: v_writelane_b32 v23, s{{[[0-9]+}}, 48 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 49 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 50 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 51 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 52 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 53 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 54 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 55 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 56 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 57 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 58 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 59 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 60 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 61 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 62 +; GCN-NEXT: v_writelane_b32 v23, s{{[[0-9]+}}, 63 + +; GCN: def s[4:5] ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; GCN: s_cbranch_scc1 +; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} + +; GCN: v_readlane_b32 s20, v23, 32 +; GCN-NEXT: v_readlane_b32 s21, v23, 33 +; GCN-NEXT: v_readlane_b32 s22, v23, 34 +; GCN-NEXT: v_readlane_b32 s23, v23, 35 +; GCN-NEXT: v_readlane_b32 s24, v23, 36 +; GCN-NEXT: v_readlane_b32 s25, v23, 37 +; GCN-NEXT: v_readlane_b32 s26, v23, 38 +; GCN-NEXT: v_readlane_b32 s27, v23, 39 +; GCN-NEXT: v_readlane_b32 s28, v23, 40 +; GCN-NEXT: v_readlane_b32 s29, v23, 41 +; GCN-NEXT: v_readlane_b32 s30, v23, 42 +; GCN-NEXT: v_readlane_b32 s31, v23, 43 +; GCN-NEXT: v_readlane_b32 s32, v23, 44 +; GCN-NEXT: v_readlane_b32 s33, v23, 45 +; GCN-NEXT: v_readlane_b32 s34, v23, 46 +; GCN-NEXT: v_readlane_b32 s35, v23, 47 + ; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 0 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 1 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 2 @@ -550,25 +573,6 @@ ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v23, 15 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} - -; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 34 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 35 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 36 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 37 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 38 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 39 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 40 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 41 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 42 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 43 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 44 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 45 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 46 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 47 -; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 48 -; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v23, 49 -; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} - ; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 16 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 17 ; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 18 @@ -587,26 +591,23 @@ ; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v23, 31 ; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} - -; GCN: v_readlane_b32 s0, v23, 32 -; GCN: v_readlane_b32 s1, v23, 33 -; GCN: ;;#ASMSTART +; GCN: v_readlane_b32 s[[USE_TMP_LO:[0-9]+]], v23, 48 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 49 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 50 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 51 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 52 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 53 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 54 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 55 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 56 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 57 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 58 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 59 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 60 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 61 +; GCN-NEXT: v_readlane_b32 s{{[0-9]+}}, v23, 62 +; GCN-NEXT: v_readlane_b32 s[[USE_TMP_HI:[0-9]+]], v23, 63 +; GCN: ; use s{{\[}}[[USE_TMP_LO]]:[[USE_TMP_HI]]{{\]}} ; GCN: ; use s[0:1] define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { call void asm sideeffect "", "~{v[0:7]}" () #0 Index: test/CodeGen/AMDGPU/spill-m0.ll =================================================================== --- test/CodeGen/AMDGPU/spill-m0.ll +++ test/CodeGen/AMDGPU/spill-m0.ll @@ -10,10 +10,8 @@ ; TOSMEM: s_mov_b32 s[[LO:[0-9]+]], SCRATCH_RSRC_DWORD0 ; TOSMEM: s_mov_b32 s[[HI:[0-9]+]], 0xe80000 -; GCN-DAG: s_cmp_lg_u32 - ; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 -; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0 +; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], [[M0_LANE:[0-9]+]] ; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 ; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]] @@ -24,10 +22,12 @@ ; TOSMEM-NOT: [[M0_COPY]] ; TOSMEM: s_buffer_store_dword [[M0_COPY]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill +; GCN-DAG: s_cmp_lg_u32 + ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: [[ENDIF]]: -; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 0 +; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], [[M0_LANE]] ; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]] ; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 ; 4-byte Folded Reload @@ -61,20 +61,21 @@ ; m0 is killed, so it isn't necessary during the entry block spill to preserve it ; GCN-LABEL: {{^}}spill_kill_m0_lds: -; GCN: s_mov_b32 m0, s6 -; GCN: v_interp_mov_f32 ; TOSMEM-NOT: s_m0 -; TOSMEM: s_add_u32 m0, s7, 0x100 +; TOSMEM: s_add_u32 m0, s7, {{0x[0-9]+}} ; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill ; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it ; FIXME-TOSMEM-NOT: m0 ; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s7, 0x300 -; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 4-byte Folded Spill +; TOSMEM: s_add_u32 m0, s7, {{0x[0-9]+}} +; TOSMEM: s_buffer_store_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 4-byte Folded Spill ; FIXME-TOSMEM-NOT: m0 +; GCN: s_mov_b32 m0, s6 +; GCN: v_interp_mov_f32 +; ; TOSMEM: s_mov_b64 exec, ; TOSMEM: s_cbranch_execz ; TOSMEM: s_branch @@ -112,10 +113,11 @@ ; Force save and restore of m0 during SMEM spill ; GCN-LABEL: {{^}}m0_unavailable_spill: +; GCN: s_load_dword [[REG0:s[0-9]+]], s[0:1], {{0x[0-9]+}} ; GCN: ; def m0, 1 -; GCN: s_mov_b32 m0, s2 +; GCN: s_mov_b32 m0, [[REG0]] ; GCN: v_interp_mov_f32 ; GCN: ; clobber m0 @@ -157,34 +159,33 @@ } ; GCN-LABEL: {{^}}restore_m0_lds: -; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] -; TOSMEM: s_cmp_eq_u32 ; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it ; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x100 -; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill +; TOSMEM: s_add_u32 m0, s3, {{0x[0-9]+}} +; TOSMEM: s_buffer_store_dword s1, s[88:91], m0 ; 4-byte Folded Spill ; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x300 -; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill +; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] +; TOSMEM: s_add_u32 m0, s3, {{0x[0-9]+}} +; TOSMEM: s_waitcnt lgkmcnt(0) +; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill ; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_cmp_eq_u32 ; TOSMEM: s_cbranch_scc1 -; TOSMEM: s_mov_b32 m0, -1 +; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_add_u32 m0, s3, {{0x[0-9]+}} +; TOSMEM: s_buffer_load_dword s0, s[88:91], m0 ; 4-byte Folded Reload +; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_mov_b32 s0, m0 +; TOSMEM: s_mov_b32 [[REG1:s[0-9]+]], m0 ; TOSMEM: s_add_u32 m0, s3, 0x100 ; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload -; TOSMEM: s_mov_b32 m0, s0 -; TOSMEM: s_waitcnt lgkmcnt(0) - -; TOSMEM: ds_write_b64 +; TOSMEM: s_mov_b32 m0, [[REG1]] +; TOSMEM: s_mov_b32 m0, -1 -; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x300 -; TOSMEM: s_buffer_load_dword s0, s[88:91], m0 ; 4-byte Folded Reload -; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_waitcnt lgkmcnt(0) ; TOSMEM-NOT: m0 +; TOSMEM: ds_write_b64 ; TOSMEM: s_mov_b32 m0, s0 ; TOSMEM: ; use m0 Index: test/CodeGen/ARM/Windows/alloca.ll =================================================================== --- test/CodeGen/ARM/Windows/alloca.ll +++ test/CodeGen/ARM/Windows/alloca.ll @@ -17,10 +17,11 @@ ; CHECK: bl num_entries ; Any register is actually valid here, but turns out we use lr, ; because we do not have the kill flag on R0. -; CHECK: mov.w [[R1:lr]], #7 -; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2 -; CHECK: bic [[R0]], [[R0]], #4 -; CHECK: lsrs r4, [[R0]], #2 +; CHECK: mov [[R0:r[0-9]+]], r0 +; CHECK: movs [[R1:r[0-9]+]], #7 +; CHECK: add.w [[R2:r[0-9]+]], [[R1]], [[R0]], lsl #2 +; CHECK: bic [[R2]], [[R2]], #4 +; CHECK: lsrs r4, [[R2]], #2 ; CHECK: bl __chkstk ; CHECK: sub.w sp, sp, r4 Index: test/CodeGen/ARM/cmpxchg-O0-be.ll =================================================================== --- test/CodeGen/ARM/cmpxchg-O0-be.ll +++ test/CodeGen/ARM/cmpxchg-O0-be.ll @@ -7,12 +7,10 @@ ; CHECK_LABEL: main: ; CHECK: ldr [[R2:r[0-9]+]], {{\[}}[[R1:r[0-9]+]]{{\]}} ; CHECK-NEXT: ldr [[R1]], {{\[}}[[R1]], #4] -; CHECK: mov [[R4:r[0-9]+]], [[R2]] -; CHECK-NEXT: mov [[R5:r[0-9]+]], [[R1]] -; CHECK: ldr [[R2]], {{\[}}[[R1]]{{\]}} -; CHECK-NEXT: ldr [[R1]], {{\[}}[[R1]], #4] -; CHECK: mov [[R6:r[0-9]+]], [[R2]] -; CHECK-NEXT: mov [[R7:r[0-9]+]], [[R1]] +; CHECK: mov [[R4:r[0-9]+]], [[R1]] +; CHECK: ldr [[R5:r[0-9]+]], {{\[}}[[R1]]{{\]}} +; CHECK-NEXT: ldr [[R6:r[0-9]+]], {{\[}}[[R1]], #4] +; CHECK: mov [[R7:r[0-9]+]], [[R6]] define arm_aapcs_vfpcc i32 @main() #0 { entry: Index: test/CodeGen/ARM/cmpxchg-O0.ll =================================================================== --- test/CodeGen/ARM/cmpxchg-O0.ll +++ test/CodeGen/ARM/cmpxchg-O0.ll @@ -7,13 +7,15 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_8: +; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0 +; CHECK-DAG: mov [[NEW:r[0-9]+]], r2 ; CHECK: dmb ish ; CHECK: uxtb [[DESIRED:r[0-9]+]], [[DESIRED]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexb [[OLD:r[0-9]+]], [r0] +; CHECK: ldrexb [[OLD:r[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexb [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strexb [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -29,13 +31,15 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_16: +; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0 +; CHECK-DAG: mov [[NEW:r[0-9]+]], r2 ; CHECK: dmb ish ; CHECK: uxth [[DESIRED:r[0-9]+]], [[DESIRED]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexh [[OLD:r[0-9]+]], [r0] +; CHECK: ldrexh [[OLD:r[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexh [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strexh [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -51,13 +55,15 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_32: +; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0 +; CHECK-DAG: mov [[NEW:r[0-9]+]], r2 ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrex [[OLD:r[0-9]+]], [r0] +; CHECK: ldrex [[OLD:r[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strex [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -72,14 +78,15 @@ define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_64: +; CHECK: mov [[ADDR:r[0-9]+]], r0 ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0] +; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLDLO]], r6 ; CHECK: cmpeq [[OLDHI]], r7 ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexd [[STATUS:r[0-9]+]], r4, r5, [r0] +; CHECK: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -90,14 +97,15 @@ define { i64, i1 } @test_nontrivial_args(i64* %addr, i64 %desired, i64 %new) { ; CHECK-LABEL: test_nontrivial_args: +; CHECK: mov [[ADDR:r[0-9]+]], r0 ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0] +; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLDLO]], {{r[0-9]+}} ; CHECK: cmpeq [[OLDHI]], {{r[0-9]+}} ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r0] +; CHECK: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: Index: test/CodeGen/ARM/crash-greedy-v6.ll =================================================================== --- test/CodeGen/ARM/crash-greedy-v6.ll +++ test/CodeGen/ARM/crash-greedy-v6.ll @@ -14,21 +14,21 @@ for.body: ; preds = %for.body, %for.body.lr.ph ; SOURCE-SCHED: ldr ; SOURCE-SCHED: ldr -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr ; SOURCE-SCHED: add +; SOURCE-SCHED: add +; SOURCE-SCHED: add +; SOURCE-SCHED: add +; SOURCE-SCHED: ldr ; SOURCE-SCHED: str ; SOURCE-SCHED: str ; SOURCE-SCHED: str ; SOURCE-SCHED: str -; SOURCE-SCHED: ldr ; SOURCE-SCHED: bl -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr +; SOURCE-SCHED: add ; SOURCE-SCHED: cmp ; SOURCE-SCHED: bne %i.031 = phi i32 [ 0, %for.body.lr.ph ], [ %0, %for.body ] Index: test/CodeGen/ARM/debug-info-blocks.ll =================================================================== --- test/CodeGen/ARM/debug-info-blocks.ll +++ test/CodeGen/ARM/debug-info-blocks.ll @@ -6,8 +6,7 @@ ; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] -; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18 -; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18 +; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18) ; CHECK-NEXT: DW_AT_name {{.*}} "mydata" ; Radar 9331779 Index: test/CodeGen/ARM/fast-isel-call.ll =================================================================== --- test/CodeGen/ARM/fast-isel-call.ll +++ test/CodeGen/ARM/fast-isel-call.ll @@ -41,38 +41,31 @@ ; ARM: foo ; THUMB: foo ;; Materialize i1 1 -; ARM: movw r2, #1 +; ARM: movw [[REG0:r[0-9]+]], #1 +; THUMB: movs [[REG0:r[0-9]+]], #1 ;; zero-ext -; ARM: and r2, r2, #1 -; THUMB: and r2, r2, #1 +; ARM: and [[REG1:r[0-9]+]], [[REG0]], #1 +; THUMB: and [[REG1:r[0-9]+]], [[REG0]], #1 %1 = call i32 @t0(i1 zeroext 1) -; ARM: sxtb r2, r1 -; ARM: mov r0, r2 -; THUMB: sxtb r2, r1 -; THUMB: mov r0, r2 +; ARM: sxtb r0, {{r[0-9]+}} +; THUMB: sxtb r0, {{r[0-9]+}} %2 = call i32 @t1(i8 signext %a) -; ARM: and r2, r1, #255 -; ARM: mov r0, r2 -; THUMB: and r2, r1, #255 -; THUMB: mov r0, r2 +; ARM: and r0, {{r[0-9]+}}, #255 +; THUMB: and r0, {{r[0-9]+}}, #255 %3 = call i32 @t2(i8 zeroext %a) -; ARM: sxth r2, r1 -; ARM: mov r0, r2 -; THUMB: sxth r2, r1 -; THUMB: mov r0, r2 +; ARM: sxth r0, {{r[0-9]+}} +; THUMB: sxth r0, {{r[0-9]+}} %4 = call i32 @t3(i16 signext %b) -; ARM: uxth r2, r1 -; ARM: mov r0, r2 -; THUMB: uxth r2, r1 -; THUMB: mov r0, r2 +; ARM: uxth r0, {{r[0-9]+}} +; THUMB: uxth r0, {{r[0-9]+}} %5 = call i32 @t4(i16 zeroext %b) ;; A few test to check materialization ;; Note: i1 1 was materialized with t1 call -; ARM: movw r1, #255 +; ARM: movw {{r[0-9]+}}, #255 %6 = call i32 @t2(i8 zeroext 255) -; ARM: movw r1, #65535 -; THUMB: movw r1, #65535 +; ARM: movw {{r[0-9]+}}, #65535 +; THUMB: movw {{r[0-9]+}}, #65535 %7 = call i32 @t4(i16 zeroext 65535) ret void } @@ -112,10 +105,9 @@ ; ARM: bl {{_?}}bar ; ARM-LONG-LABEL: @t10 -; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} -; ARM-LONG-MACHO: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} -; ARM-LONG-MACHO: str [[R]], [r7, [[SLOT:#[-0-9]+]]] @ 4-byte Spill -; ARM-LONG-MACHO: ldr [[R:l?r[0-9]*]], [r7, [[SLOT]]] @ 4-byte Reload +; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} +; ARM-LONG-MACHO: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}} +; ARM-LONG-MACHO: ldr [[R:r[0-9]+]], {{\[}}[[R1]]] ; ARM-LONG-ELF: movw [[R:l?r[0-9]*]], :lower16:bar ; ARM-LONG-ELF: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} @@ -138,11 +130,9 @@ ; THUMB-DAG: str.w [[R4]], [sp, #4] ; THUMB: bl {{_?}}bar ; THUMB-LONG-LABEL: @t10 -; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} -; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} -; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}} -; THUMB-LONG: str [[R]], [sp, [[SLOT:#[-0-9]+]]] @ 4-byte Spill -; THUMB-LONG: ldr.w [[R:l?r[0-9]*]], [sp, [[SLOT]]] @ 4-byte Reload +; THUMB-LONG: {{(movw)|(ldr.n)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} +; THUMB-LONG: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}} +; THUMB-LONG: ldr{{(.w)?}} [[R:r[0-9]+]], {{\[}}[[R1]]{{\]}} ; THUMB-LONG: blx [[R]] %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70) ret i32 0 Index: test/CodeGen/ARM/fast-isel-intrinsic.ll =================================================================== --- test/CodeGen/ARM/fast-isel-intrinsic.ll +++ test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -55,16 +55,13 @@ ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] +; ARM-MACHO: ldr [[REG1:r[0-9]+]], [r0] -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp +; ARM-ELF: movw [[REG1:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG1]], :upper16:temp -; ARM: add r1, r0, #4 -; ARM: add r0, r0, #16 -; ARM: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill -; ARM: mov r0, r1 -; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; ARM: add r0, [[REG1]], #4 +; ARM: add r1, [[REG1]], #16 ; ARM: movw r2, #17 ; ARM: bl {{_?}}memcpy ; ARM-LONG-LABEL: t2: @@ -80,12 +77,9 @@ ; THUMB-LABEL: t2: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: adds r1, r0, #4 -; THUMB: adds r0, #16 -; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill -; THUMB: mov r0, r1 -; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: adds r0, [[REG1]], #4 +; THUMB: adds r1, #16 ; THUMB: movs r2, #17 ; THUMB: bl {{_?}}memcpy ; THUMB-LONG-LABEL: t2: @@ -104,15 +98,14 @@ ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp -; ARM: add r1, r0, #4 -; ARM: add r0, r0, #16 -; ARM: mov r0, r1 +; ARM: add r0, [[REG0]], #4 +; ARM: add r1, [[REG0]], #16 ; ARM: movw r2, #10 ; ARM: bl {{_?}}memmove ; ARM-LONG-LABEL: t3: @@ -128,12 +121,9 @@ ; THUMB-LABEL: t3: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: adds r1, r0, #4 -; THUMB: adds r0, #16 -; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill -; THUMB: mov r0, r1 -; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: adds r0, [[REG1]], #4 +; THUMB: adds r1, #16 ; THUMB: movs r2, #10 ; THUMB: bl {{_?}}memmove ; THUMB-LONG-LABEL: t3: @@ -150,28 +140,28 @@ ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp -; ARM: ldr r1, [r0, #16] -; ARM: str r1, [r0, #4] -; ARM: ldr r1, [r0, #20] -; ARM: str r1, [r0, #8] -; ARM: ldrh r1, [r0, #24] -; ARM: strh r1, [r0, #12] +; ARM: ldr [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16] +; ARM: str [[REG1]], {{\[}}[[REG0]], #4] +; ARM: ldr [[REG2:r[0-9]+]], {{\[}}[[REG0]], #20] +; ARM: str [[REG2]], {{\[}}[[REG0]], #8] +; ARM: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #24] +; ARM: strh [[REG3]], {{\[}}[[REG0]], #12] ; ARM: bx lr ; THUMB-LABEL: t4: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: ldr r1, [r0, #16] -; THUMB: str r1, [r0, #4] -; THUMB: ldr r1, [r0, #20] -; THUMB: str r1, [r0, #8] -; THUMB: ldrh r1, [r0, #24] -; THUMB: strh r1, [r0, #12] +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: ldr [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16] +; THUMB: str [[REG2]], {{\[}}[[REG1]], #4] +; THUMB: ldr [[REG3:r[0-9]+]], {{\[}}[[REG1]], #20] +; THUMB: str [[REG3]], {{\[}}[[REG1]], #8] +; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #24] +; THUMB: strh [[REG4]], {{\[}}[[REG1]], #12] ; THUMB: bx lr call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void @@ -184,36 +174,36 @@ ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] - -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp - -; ARM: ldrh r1, [r0, #16] -; ARM: strh r1, [r0, #4] -; ARM: ldrh r1, [r0, #18] -; ARM: strh r1, [r0, #6] -; ARM: ldrh r1, [r0, #20] -; ARM: strh r1, [r0, #8] -; ARM: ldrh r1, [r0, #22] -; ARM: strh r1, [r0, #10] -; ARM: ldrh r1, [r0, #24] -; ARM: strh r1, [r0, #12] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] + +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp + +; ARM: ldrh [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16] +; ARM: strh [[REG1]], {{\[}}[[REG0]], #4] +; ARM: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG0]], #18] +; ARM: strh [[REG2]], {{\[}}[[REG0]], #6] +; ARM: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #20] +; ARM: strh [[REG3]], {{\[}}[[REG0]], #8] +; ARM: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG0]], #22] +; ARM: strh [[REG4]], {{\[}}[[REG0]], #10] +; ARM: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG0]], #24] +; ARM: strh [[REG5]], {{\[}}[[REG0]], #12] ; ARM: bx lr ; THUMB-LABEL: t5: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: ldrh r1, [r0, #16] -; THUMB: strh r1, [r0, #4] -; THUMB: ldrh r1, [r0, #18] -; THUMB: strh r1, [r0, #6] -; THUMB: ldrh r1, [r0, #20] -; THUMB: strh r1, [r0, #8] -; THUMB: ldrh r1, [r0, #22] -; THUMB: strh r1, [r0, #10] -; THUMB: ldrh r1, [r0, #24] -; THUMB: strh r1, [r0, #12] +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16] +; THUMB: strh [[REG2]], {{\[}}[[REG1]], #4] +; THUMB: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG1]], #18] +; THUMB: strh [[REG3]], {{\[}}[[REG1]], #6] +; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #20] +; THUMB: strh [[REG4]], {{\[}}[[REG1]], #8] +; THUMB: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG1]], #22] +; THUMB: strh [[REG5]], {{\[}}[[REG1]], #10] +; THUMB: ldrh [[REG6:r[0-9]+]], {{\[}}[[REG1]], #24] +; THUMB: strh [[REG6]], {{\[}}[[REG1]], #12] ; THUMB: bx lr call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void @@ -224,56 +214,56 @@ ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] - -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp - -; ARM: ldrb r1, [r0, #16] -; ARM: strb r1, [r0, #4] -; ARM: ldrb r1, [r0, #17] -; ARM: strb r1, [r0, #5] -; ARM: ldrb r1, [r0, #18] -; ARM: strb r1, [r0, #6] -; ARM: ldrb r1, [r0, #19] -; ARM: strb r1, [r0, #7] -; ARM: ldrb r1, [r0, #20] -; ARM: strb r1, [r0, #8] -; ARM: ldrb r1, [r0, #21] -; ARM: strb r1, [r0, #9] -; ARM: ldrb r1, [r0, #22] -; ARM: strb r1, [r0, #10] -; ARM: ldrb r1, [r0, #23] -; ARM: strb r1, [r0, #11] -; ARM: ldrb r1, [r0, #24] -; ARM: strb r1, [r0, #12] -; ARM: ldrb r1, [r0, #25] -; ARM: strb r1, [r0, #13] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] + +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp + +; ARM: ldrb [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16] +; ARM: strb [[REG1]], {{\[}}[[REG0]], #4] +; ARM: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #17] +; ARM: strb [[REG2]], {{\[}}[[REG0]], #5] +; ARM: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #18] +; ARM: strb [[REG3]], {{\[}}[[REG0]], #6] +; ARM: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #19] +; ARM: strb [[REG4]], {{\[}}[[REG0]], #7] +; ARM: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #20] +; ARM: strb [[REG5]], {{\[}}[[REG0]], #8] +; ARM: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #21] +; ARM: strb [[REG6]], {{\[}}[[REG0]], #9] +; ARM: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #22] +; ARM: strb [[REG7]], {{\[}}[[REG0]], #10] +; ARM: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #23] +; ARM: strb [[REG8]], {{\[}}[[REG0]], #11] +; ARM: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #24] +; ARM: strb [[REG9]], {{\[}}[[REG0]], #12] +; ARM: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #25] +; ARM: strb [[REG10]], {{\[}}[[REG0]], #13] ; ARM: bx lr ; THUMB-LABEL: t6: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: ldrb r1, [r0, #16] -; THUMB: strb r1, [r0, #4] -; THUMB: ldrb r1, [r0, #17] -; THUMB: strb r1, [r0, #5] -; THUMB: ldrb r1, [r0, #18] -; THUMB: strb r1, [r0, #6] -; THUMB: ldrb r1, [r0, #19] -; THUMB: strb r1, [r0, #7] -; THUMB: ldrb r1, [r0, #20] -; THUMB: strb r1, [r0, #8] -; THUMB: ldrb r1, [r0, #21] -; THUMB: strb r1, [r0, #9] -; THUMB: ldrb r1, [r0, #22] -; THUMB: strb r1, [r0, #10] -; THUMB: ldrb r1, [r0, #23] -; THUMB: strb r1, [r0, #11] -; THUMB: ldrb r1, [r0, #24] -; THUMB: strb r1, [r0, #12] -; THUMB: ldrb r1, [r0, #25] -; THUMB: strb r1, [r0, #13] +; THUMB: ldr [[REG0:r[0-9]+]], [r0] +; THUMB: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #16] +; THUMB: strb [[REG2]], {{\[}}[[REG0]], #4] +; THUMB: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #17] +; THUMB: strb [[REG3]], {{\[}}[[REG0]], #5] +; THUMB: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #18] +; THUMB: strb [[REG4]], {{\[}}[[REG0]], #6] +; THUMB: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #19] +; THUMB: strb [[REG5]], {{\[}}[[REG0]], #7] +; THUMB: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #20] +; THUMB: strb [[REG6]], {{\[}}[[REG0]], #8] +; THUMB: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #21] +; THUMB: strb [[REG7]], {{\[}}[[REG0]], #9] +; THUMB: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #22] +; THUMB: strb [[REG8]], {{\[}}[[REG0]], #10] +; THUMB: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #23] +; THUMB: strb [[REG9]], {{\[}}[[REG0]], #11] +; THUMB: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #24] +; THUMB: strb [[REG10]], {{\[}}[[REG0]], #12] +; THUMB: ldrb [[REG11:r[0-9]+]], {{\[}}[[REG0]], #25] +; THUMB: strb [[REG11]], {{\[}}[[REG0]], #13] ; THUMB: bx lr call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void Index: test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll =================================================================== --- test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll +++ test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll @@ -2,7 +2,7 @@ define i32 @t1(i32* nocapture %ptr) nounwind readonly { entry: -; THUMB: t1 +; THUMB-LABEL: t1: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1 %0 = load i32, i32* %add.ptr, align 4 ; THUMB: ldr r{{[0-9]}}, [r0, #-4] @@ -11,7 +11,7 @@ define i32 @t2(i32* nocapture %ptr) nounwind readonly { entry: -; THUMB: t2 +; THUMB-LABEL: t2: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63 %0 = load i32, i32* %add.ptr, align 4 ; THUMB: ldr r{{[0-9]}}, [r0, #-252] @@ -20,7 +20,7 @@ define i32 @t3(i32* nocapture %ptr) nounwind readonly { entry: -; THUMB: t3 +; THUMB-LABEL: t3: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64 %0 = load i32, i32* %add.ptr, align 4 ; THUMB: ldr r{{[0-9]}}, [r0] @@ -29,7 +29,7 @@ define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly { entry: -; THUMB: t4 +; THUMB-LABEL: t4: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1 %0 = load i16, i16* %add.ptr, align 2 ; THUMB: ldrh r{{[0-9]}}, [r0, #-2] @@ -38,7 +38,7 @@ define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly { entry: -; THUMB: t5 +; THUMB-LABEL: t5: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127 %0 = load i16, i16* %add.ptr, align 2 ; THUMB: ldrh r{{[0-9]}}, [r0, #-254] @@ -47,7 +47,7 @@ define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly { entry: -; THUMB: t6 +; THUMB-LABEL: t6: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128 %0 = load i16, i16* %add.ptr, align 2 ; THUMB: ldrh r{{[0-9]}}, [r0] @@ -56,7 +56,7 @@ define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly { entry: -; THUMB: t7 +; THUMB-LABEL: t7: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1 %0 = load i8, i8* %add.ptr, align 1 ; THUMB: ldrb r{{[0-9]}}, [r0, #-1] @@ -65,7 +65,7 @@ define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly { entry: -; THUMB: t8 +; THUMB-LABEL: t8: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255 %0 = load i8, i8* %add.ptr, align 1 ; THUMB: ldrb r{{[0-9]}}, [r0, #-255] @@ -74,7 +74,7 @@ define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly { entry: -; THUMB: t9 +; THUMB-LABEL: t9: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256 %0 = load i8, i8* %add.ptr, align 1 ; THUMB: ldrb r{{[0-9]}}, [r0] @@ -83,81 +83,96 @@ define void @t10(i32* nocapture %ptr) nounwind { entry: -; THUMB: t10 +; THUMB-LABEL: t10: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1 store i32 0, i32* %add.ptr, align 4 -; THUMB: str r{{[0-9]}}, [r0, #-4] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-4] ret void } define void @t11(i32* nocapture %ptr) nounwind { entry: -; THUMB: t11 +; THUMB-LABEL: t11: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63 store i32 0, i32* %add.ptr, align 4 -; THUMB: str r{{[0-9]}}, [r0, #-252] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-252] ret void } define void @t12(i32* nocapture %ptr) nounwind { entry: -; THUMB: t12 +; THUMB-LABEL: t12: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64 store i32 0, i32* %add.ptr, align 4 -; THUMB: str r{{[0-9]}}, [r0] +; THUMB: movw [[REG:r[0-9]+]], #65280 +; THUMB: movt [[REG]], #65535 +; THUMB: add [[REG]], r0 +; THUMB: str r{{[0-9]}}, {{\[}}[[REG]]] ret void } define void @t13(i16* nocapture %ptr) nounwind { entry: -; THUMB: t13 +; THUMB-LABEL: t13: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1 store i16 0, i16* %add.ptr, align 2 -; THUMB: strh r{{[0-9]}}, [r0, #-2] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-2] ret void } define void @t14(i16* nocapture %ptr) nounwind { entry: -; THUMB: t14 +; THUMB-LABEL: t14: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127 store i16 0, i16* %add.ptr, align 2 -; THUMB: strh r{{[0-9]}}, [r0, #-254] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-254] ret void } define void @t15(i16* nocapture %ptr) nounwind { entry: -; THUMB: t15 +; THUMB-LABEL: t15: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128 store i16 0, i16* %add.ptr, align 2 -; THUMB: strh r{{[0-9]}}, [r0] +; THUMB: movw [[REG:r[0-9]+]], #65280 +; THUMB: movt [[REG]], #65535 +; THUMB: add [[REG]], r0 +; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]]] ret void } define void @t16(i8* nocapture %ptr) nounwind { entry: -; THUMB: t16 +; THUMB-LABEL: t16: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1 store i8 0, i8* %add.ptr, align 1 -; THUMB: strb r{{[0-9]}}, [r0, #-1] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-1] ret void } define void @t17(i8* nocapture %ptr) nounwind { entry: -; THUMB: t17 +; THUMB-LABEL: t17: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255 store i8 0, i8* %add.ptr, align 1 -; THUMB: strb r{{[0-9]}}, [r0, #-255] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-255] ret void } define void @t18(i8* nocapture %ptr) nounwind { entry: -; THUMB: t18 +; THUMB-LABEL: t18: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256 store i8 0, i8* %add.ptr, align 1 -; THUMB: strb r{{[0-9]}}, [r0] +; THUMB: movw [[REG:r[0-9]+]], #65280 +; THUMB: movt [[REG]], #65535 +; THUMB: add [[REG]], r0 +; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]]] ret void } Index: test/CodeGen/ARM/fast-isel-select.ll =================================================================== --- test/CodeGen/ARM/fast-isel-select.ll +++ test/CodeGen/ARM/fast-isel-select.ll @@ -21,14 +21,12 @@ define i32 @t2(i1 %c, i32 %a) nounwind readnone { entry: ; ARM: t2 -; ARM: tst r0, #1 -; ARM: moveq r{{[1-9]}}, #20 -; ARM: mov r0, r{{[1-9]}} +; ARM: tst {{r[0-9]+}}, #1 +; ARM: moveq {{r[0-9]+}}, #20 ; THUMB-LABEL: t2 -; THUMB: tst.w r0, #1 +; THUMB: tst.w {{r[0-9]+}}, #1 ; THUMB: it eq -; THUMB: moveq r{{[1-9]}}, #20 -; THUMB: mov r0, r{{[1-9]}} +; THUMB: moveq {{r[0-9]+}}, #20 %0 = select i1 %c, i32 %a, i32 20 ret i32 %0 } @@ -43,7 +41,7 @@ ; THUMB: tst.w r0, #1 ; THUMB: it ne ; THUMB: movne r2, r1 -; THUMB: add.w r0, r2, r1 +; THUMB: adds r0, r2, r1 %0 = select i1 %c, i32 %a, i32 %b %1 = add i32 %0, %a ret i32 %1 @@ -67,14 +65,12 @@ define i32 @t5(i1 %c, i32 %a) nounwind readnone { entry: ; ARM: t5 -; ARM: tst r0, #1 -; ARM: mvneq r{{[1-9]}}, #1 -; ARM: mov r0, r{{[1-9]}} +; ARM: tst {{r[0-9]+}}, #1 +; ARM: mvneq {{r[0-9]+}}, #1 ; THUMB: t5 -; THUMB: tst.w r0, #1 +; THUMB: tst.w {{r[0-9]+}}, #1 ; THUMB: it eq -; THUMB: mvneq r{{[1-9]}}, #1 -; THUMB: mov r0, r{{[1-9]}} +; THUMB: mvneq {{r[0-9]+}}, #1 %0 = select i1 %c, i32 %a, i32 -2 ret i32 %0 } @@ -83,14 +79,12 @@ define i32 @t6(i1 %c, i32 %a) nounwind readnone { entry: ; ARM: t6 -; ARM: tst r0, #1 -; ARM: mvneq r{{[1-9]}}, #978944 -; ARM: mov r0, r{{[1-9]}} +; ARM: tst {{r[0-9]+}}, #1 +; ARM: mvneq {{r[0-9]+}}, #978944 ; THUMB: t6 -; THUMB: tst.w r0, #1 +; THUMB: tst.w {{r[0-9]+}}, #1 ; THUMB: it eq -; THUMB: mvneq r{{[1-9]}}, #978944 -; THUMB: mov r0, r{{[1-9]}} +; THUMB: mvneq {{r[0-9]+}}, #978944 %0 = select i1 %c, i32 %a, i32 -978945 ret i32 %0 } Index: test/CodeGen/ARM/fast-isel-vararg.ll =================================================================== --- test/CodeGen/ARM/fast-isel-vararg.ll +++ test/CodeGen/ARM/fast-isel-vararg.ll @@ -17,26 +17,24 @@ %4 = load i32, i32* %n, align 4 ; ARM: VarArg ; ARM: mov [[FP:r[0-9]+]], sp -; ARM: sub sp, sp, #{{(36|40)}} +; ARM: sub sp, sp, #32 ; ARM: ldr r1, {{\[}}[[FP]], #-4] ; ARM: ldr r2, {{\[}}[[FP]], #-8] ; ARM: ldr r3, {{\[}}[[FP]], #-12] -; ARM: ldr [[Ra:r[0-9]+]], {{\[}}[[FP]], #-16] -; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #{{(16|20)}}] -; ARM: movw [[Rc:[lr]+[0-9]*]], #5 -; Ra got spilled -; ARM: mov r0, [[Rc]] -; ARM: str {{.*}}, [sp] +; ARM: ldr [[Ra:r[0-9]+|lr]], [sp, #16] +; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #12] +; ARM: movw r0, #5 +; ARM: str [[Ra]], [sp] ; ARM: str [[Rb]], [sp, #4] ; ARM: bl {{_?CallVariadic}} -; THUMB: sub sp, #{{36}} -; THUMB: ldr r1, [sp, #32] -; THUMB: ldr r2, [sp, #28] -; THUMB: ldr r3, [sp, #24] -; THUMB: ldr {{[a-z0-9]+}}, [sp, #20] -; THUMB: ldr.w {{[a-z0-9]+}}, [sp, #16] -; THUMB: str.w {{[a-z0-9]+}}, [sp] -; THUMB: str.w {{[a-z0-9]+}}, [sp, #4] +; THUMB: sub sp, #32 +; THUMB: ldr r1, [sp, #28] +; THUMB: ldr r2, [sp, #24] +; THUMB: ldr r3, [sp, #20] +; THUMB: ldr.w [[Ra:r[0-9]+]], [sp, #16] +; THUMB: ldr.w [[Rb:r[0-9]+]], [sp, #12] +; THUMB: str.w [[Ra]], [sp] +; THUMB: str.w [[Rb]], [sp, #4] ; THUMB: bl {{_?}}CallVariadic %call = call i32 (i32, ...) @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) store i32 %call, i32* %tmp, align 4 Index: test/CodeGen/ARM/ldrd.ll =================================================================== --- test/CodeGen/ARM/ldrd.ll +++ test/CodeGen/ARM/ldrd.ll @@ -81,11 +81,12 @@ ; CHECK-LABEL: Func1: define void @Func1() nounwind ssp "no-frame-pointer-elim"="true" { entry: -; A8: movw [[BASE:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}} -; A8: movt [[BASE]], :upper16:{{.*}}TestVar{{.*}} +; A8: movw [[BASER:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}} +; A8: movt [[BASER]], :upper16:{{.*}}TestVar{{.*}} +; A8: ldr [[BASE:r[0-9]+]], {{\[}}[[BASER]]] ; A8: ldrd [[FIELD1:r[0-9]+]], [[FIELD2:r[0-9]+]], {{\[}}[[BASE]], #4] -; A8-NEXT: add [[FIELD1]], [[FIELD2]] -; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}} +; A8-NEXT: add [[FIELD2]], [[FIELD1]] +; A8-NEXT: str [[FIELD2]], {{\[}}[[BASE]]{{\]}} ; CONSERVATIVE-NOT: ldrd %orig_blocks = alloca [256 x i16], align 2 %0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start.p0i8(i64 512, i8* %0) nounwind Index: test/CodeGen/ARM/swifterror.ll =================================================================== --- test/CodeGen/ARM/swifterror.ll +++ test/CodeGen/ARM/swifterror.ll @@ -21,9 +21,9 @@ ; CHECK-O0: mov r{{.*}}, #16 ; CHECK-O0: malloc ; CHECK-O0: mov [[ID2:r[0-9]+]], r0 -; CHECK-O0: mov [[ID:r[0-9]+]], #1 -; CHECK-O0: strb [[ID]], [r0, #8] ; CHECK-O0: mov r8, [[ID2]] +; CHECK-O0: mov [[ID:r[0-9]+]], #1 +; CHECK-O0: strb [[ID]], {{\[}}[[ID2]], #8] entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -49,16 +49,16 @@ ; CHECK-O0-LABEL: caller: ; spill r0 ; CHECK-O0-DAG: mov r8, #0 -; CHECK-O0-DAG: str r0, [sp, [[SLOT:#[0-9]+]] +; CHECK-O0-DAG: str r0, [sp[[SLOT:(, #[0-9]+)?]]] ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov [[TMP:r[0-9]+]], r8 -; CHECK-O0: str [[TMP]], [sp] +; CHECK-O0: str [[TMP]], [sp[[SLOT2:(, #[0-9]+)?]]] ; CHECK-O0: bne +; CHECK-O0: ldr [[ID:r[0-9]+]], [sp[[SLOT]]] ; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8] -; CHECK-O0: ldr [[ID:r[0-9]+]], [sp, [[SLOT]]] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] ; reload r0 -; CHECK-O0: ldr r0, [sp] +; CHECK-O0: ldr r0, [sp[[SLOT2]]] ; CHECK-O0: free entry: %error_ptr_ref = alloca swifterror %swift_error* @@ -98,14 +98,14 @@ ; CHECK-O0-DAG: mov r8, #0 ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov r{{.*}}, r8 -; CHECK-O0: str r0, [sp] +; CHECK-O0: str r0, [sp{{(, #[0-9]+)?}}] ; CHECK-O0: bne ; CHECK-O0: ble -; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8] ; reload r0 ; CHECK-O0: ldr [[ID:r[0-9]+]], +; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-O0: ldr r0, [sp] +; CHECK-O0: ldr r0, [sp{{(, #[0-9]+)?}}] ; CHECK-O0: free entry: %error_ptr_ref = alloca swifterror %swift_error* @@ -143,16 +143,15 @@ ; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8] ; CHECK-O0-LABEL: foo_if: -; CHECK-O0: cmp r0, #0 ; spill to stack ; CHECK-O0: str r8 +; CHECK-O0: cmp r0, #0 ; CHECK-O0: beq ; CHECK-O0: mov r0, #16 ; CHECK-O0: malloc ; CHECK-O0: mov [[ID:r[0-9]+]], r0 ; CHECK-O0: mov [[ID2:[a-z0-9]+]], #1 -; CHECK-O0: strb [[ID2]], [r0, #8] -; CHECK-O0: mov r8, [[ID]] +; CHECK-O0: strb [[ID2]], {{\[}}[[ID]], #8] ; reload from stack ; CHECK-O0: ldr r8 entry: @@ -185,7 +184,6 @@ ; CHECK-APPLE: ble ; CHECK-O0-LABEL: foo_loop: -; CHECK-O0: mov r{{.*}}, r8 ; CHECK-O0: cmp r{{.*}}, #0 ; CHECK-O0: beq ; CHECK-O0: mov r0, #16 @@ -234,18 +232,18 @@ ; CHECK-APPLE-DAG: str r{{.*}}, [{{.*}}[[SRET]], #4] ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: mov r{{.*}}, #16 +; CHECK-O0-DAG: mov r{{.*}}, #16 ; spill to stack: sret and val1 ; CHECK-O0-DAG: str r0 ; CHECK-O0-DAG: str r1 ; CHECK-O0: malloc -; CHECK-O0: mov [[ID:r[0-9]+]], #1 -; CHECK-O0: strb [[ID]], [r0, #8] ; reload from stack: sret and val1 ; CHECK-O0: ldr ; CHECK-O0: ldr -; CHECK-O0: str r{{.*}}, [{{.*}}, #4] -; CHECK-O0: mov r8 +; CHECK-O0-DAG: mov r8 +; CHECK-O0-DAG: mov [[ID:r[0-9]+]], #1 +; CHECK-O0-DAG: strb [[ID]], [{{r[0-9]+}}, #8] +; CHECK-O0-DAG: str r{{.*}}, [{{.*}}, #4] entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -272,16 +270,15 @@ ; CHECK-O0-LABEL: caller3: ; CHECK-O0-DAG: mov r8, #0 -; CHECK-O0-DAG: mov r0 ; CHECK-O0-DAG: mov r1 ; CHECK-O0: bl {{.*}}foo_sret ; CHECK-O0: mov [[ID2:r[0-9]+]], r8 -; CHECK-O0: cmp r8 ; CHECK-O0: str [[ID2]], [sp[[SLOT:.*]]] +; CHECK-O0: cmp r8 ; CHECK-O0: bne ; Access part of the error object and save it to error_ref -; CHECK-O0: ldrb [[CODE:r[0-9]+]] ; CHECK-O0: ldr [[ID:r[0-9]+]] +; CHECK-O0: ldrb [[CODE:r[0-9]+]] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] ; CHECK-O0: ldr r0, [sp[[SLOT]] ; CHECK-O0: bl {{.*}}free Index: test/CodeGen/Hexagon/vect/vect-load-v4i16.ll =================================================================== --- test/CodeGen/Hexagon/vect/vect-load-v4i16.ll +++ test/CodeGen/Hexagon/vect/vect-load-v4i16.ll @@ -1,13 +1,13 @@ ; RUN: llc -march=hexagon -O0 -hexagon-align-loads=0 < %s | FileCheck %s ; CHECK-LABEL: danny: -; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r0+#0) -; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r0+#2) -; CHECK: [[T0]] |= asl([[T1]],#16) -; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r0+#4) -; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r0+#6) -; CHECK: [[T2]] |= asl([[T3]],#16) -; CHECK: combine([[T2]],[[T0]]) +; CHECK: r1 = r0 +; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r1+#0) +; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r1+#2) +; CHECK: r2 |= asl([[T1]],#16) +; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r1+#4) +; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r1+#6) +; CHECK: r1 |= asl([[T3]],#16) define <4 x i16> @danny(<4 x i16>* %p) { %t0 = load <4 x i16>, <4 x i16>* %p, align 2 ret <4 x i16> %t0 @@ -15,8 +15,8 @@ ; CHECK-LABEL: sammy: ; CHECK-DAG: [[T0:r[0-9]+]] = memw(r0+#0) -; CHECK-DAG: [[T1:r[0-9]+]] = memw(r0+#4) -; CHECK: combine([[T1]],[[T0]]) +; CHECK-DAG: r1 = memw(r0+#4) +; CHECK: r0 = [[T0]] define <4 x i16> @sammy(<4 x i16>* %p) { %t0 = load <4 x i16>, <4 x i16>* %p, align 4 ret <4 x i16> %t0 Index: test/CodeGen/Mips/Fast-ISel/memtest1.ll =================================================================== --- test/CodeGen/Mips/Fast-ISel/memtest1.ll +++ test/CodeGen/Mips/Fast-ISel/memtest1.ll @@ -17,15 +17,11 @@ define void @cpy(i8* %src, i32 %i) { ; ALL-LABEL: cpy: - ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) - ; ALL-DAG: sw $4, 24($sp) - ; ALL-DAG: move $4, $[[T0]] - ; ALL-DAG: sw $5, 20($sp) - ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp) - ; ALL-DAG: move $5, $[[T1]] - ; ALL-DAG: lw $6, 20($sp) - ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memcpy)(${{[0-9]+}}) - ; ALL: jalr $[[T2]] + ; ALL-DAG: move $6, $5 + ; ALL-DAG: move $5, $4 + ; ALL-DAG: lw $4, %got(dest)(${{[0-9]+}}) + ; ALL-DAG: lw $[[T0:[0-9]+]], %got(memcpy)($1) + ; ALL: jalr $[[T0]] ; ALL-NEXT: nop ; ALL-NOT: {{.*}}$2{{.*}} call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), i8* %src, i32 %i, i1 false) @@ -36,15 +32,11 @@ ; ALL-LABEL: mov: - ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) - ; ALL-DAG: sw $4, 24($sp) - ; ALL-DAG: move $4, $[[T0]] - ; ALL-DAG: sw $5, 20($sp) - ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp) - ; ALL-DAG: move $5, $[[T1]] - ; ALL-DAG: lw $6, 20($sp) - ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memmove)(${{[0-9]+}}) - ; ALL: jalr $[[T2]] + ; ALL-DAG: move $6, $5 + ; ALL-DAG: move $5, $4 + ; ALL-DAG: lw $4, %got(dest)(${{[0-9]+}}) + ; ALL-DAG: lw $[[T0:[0-9]+]], %got(memmove)(${{[0-9]+}}) + ; ALL: jalr $[[T0]] ; ALL-NEXT: nop ; ALL-NOT: {{.*}}$2{{.*}} call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), i8* %src, i32 %i, i1 false) @@ -54,14 +46,12 @@ define void @clear(i32 %i) { ; ALL-LABEL: clear: - ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) - ; ALL-DAG: sw $4, 16($sp) - ; ALL-DAG: move $4, $[[T0]] + ; ALL-DAG: lw $4, %got(dest)(${{[0-9]+}}) + ; ALL-DAG: move $6, $4 ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 42 ; 32R1-DAG: sll $[[T2:[0-9]+]], $[[T1]], 24 ; 32R1-DAG: sra $5, $[[T2]], 24 ; 32R2-DAG: seb $5, $[[T1]] - ; ALL-DAG: lw $6, 16($sp) ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memset)(${{[0-9]+}}) ; ALL: jalr $[[T2]] ; ALL-NEXT: nop Index: test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll =================================================================== --- test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll +++ test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll @@ -14,9 +14,9 @@ define signext i8 @add_i8_sext(i8 signext %a, i8 signext %b) { ; MIPS32-LABEL: add_i8_sext: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addu $4, $5, $4 -; MIPS32-NEXT: sll $4, $4, 24 -; MIPS32-NEXT: sra $2, $4, 24 +; MIPS32-NEXT: addu $1, $5, $4 +; MIPS32-NEXT: sll $1, $1, 24 +; MIPS32-NEXT: sra $2, $1, 24 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -27,10 +27,10 @@ define zeroext i8 @add_i8_zext(i8 zeroext %a, i8 zeroext %b) { ; MIPS32-LABEL: add_i8_zext: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addu $4, $5, $4 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 255 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: addu $1, $5, $4 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 255 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -52,9 +52,9 @@ define signext i16 @add_i16_sext(i16 signext %a, i16 signext %b) { ; MIPS32-LABEL: add_i16_sext: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addu $4, $5, $4 -; MIPS32-NEXT: sll $4, $4, 16 -; MIPS32-NEXT: sra $2, $4, 16 +; MIPS32-NEXT: addu $1, $5, $4 +; MIPS32-NEXT: sll $1, $1, 16 +; MIPS32-NEXT: sra $2, $1, 16 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -65,10 +65,10 @@ define zeroext i16 @add_i16_zext(i16 zeroext %a, i16 zeroext %b) { ; MIPS32-LABEL: add_i16_zext: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addu $4, $5, $4 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 65535 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: addu $1, $5, $4 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 65535 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -90,14 +90,13 @@ define i64 @add_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: add_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addu $5, $7, $5 -; MIPS32-NEXT: addu $4, $6, $4 -; MIPS32-NEXT: sltu $6, $4, $6 -; MIPS32-NEXT: addu $3, $5, $6 -; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: addu $1, $7, $5 +; MIPS32-NEXT: addu $2, $6, $4 +; MIPS32-NEXT: sltu $3, $2, $6 +; MIPS32-NEXT: addu $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: %add = add i64 %b, %a ret i64 %add -} \ No newline at end of file +} Index: test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll =================================================================== --- test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll +++ test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll @@ -15,12 +15,11 @@ ; MIPS32-NEXT: lui $1, 18838 ; MIPS32-NEXT: ori $5, $1, 722 ; MIPS32-NEXT: lui $1, 0 -; MIPS32-NEXT: ori $2, $1, 0 -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $1, 0 +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: jal printf ; MIPS32-NEXT: nop -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 24 ; MIPS32-NEXT: jr $ra Index: test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll =================================================================== --- test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll +++ test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll @@ -4,11 +4,11 @@ define i32 @eq(i32 %a, i32 %b){ ; MIPS32-LABEL: eq: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: xor $4, $4, $5 -; MIPS32-NEXT: sltiu $4, $4, 1 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 1 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: xor $1, $4, $5 +; MIPS32-NEXT: sltiu $1, $1, 1 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 1 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -20,11 +20,11 @@ define i32 @ne(i32 %a, i32 %b) { ; MIPS32-LABEL: ne: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: xor $4, $4, $5 -; MIPS32-NEXT: sltu $4, $zero, $4 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 1 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: xor $1, $4, $5 +; MIPS32-NEXT: sltu $1, $zero, $1 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 1 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -36,10 +36,10 @@ define i32 @sgt(i32 %a, i32 %b) { ; MIPS32-LABEL: sgt: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: slt $4, $5, $4 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 1 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: slt $1, $5, $4 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 1 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -51,11 +51,11 @@ define i32 @sge(i32 %a, i32 %b) { ; MIPS32-LABEL: sge: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: slt $4, $4, $5 -; MIPS32-NEXT: xori $4, $4, 1 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 1 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: slt $1, $4, $5 +; MIPS32-NEXT: xori $1, $1, 1 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 1 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -67,10 +67,10 @@ define i32 @slt(i32 %a, i32 %b) { ; MIPS32-LABEL: slt: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: slt $4, $4, $5 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 1 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: slt $1, $4, $5 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 1 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -82,11 +82,11 @@ define i32 @sle(i32 %a, i32 %b) { ; MIPS32-LABEL: sle: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: slt $4, $5, $4 -; MIPS32-NEXT: xori $4, $4, 1 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 1 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: slt $1, $5, $4 +; MIPS32-NEXT: xori $1, $1, 1 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 1 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -98,10 +98,10 @@ define i32 @ugt(i32 %a, i32 %b) { ; MIPS32-LABEL: ugt: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sltu $4, $5, $4 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 1 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: sltu $1, $5, $4 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 1 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -113,11 +113,11 @@ define i32 @uge(i32 %a, i32 %b) { ; MIPS32-LABEL: uge: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sltu $4, $4, $5 -; MIPS32-NEXT: xori $4, $4, 1 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 1 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: sltu $1, $4, $5 +; MIPS32-NEXT: xori $1, $1, 1 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 1 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -129,10 +129,10 @@ define i32 @ult(i32 %a, i32 %b) { ; MIPS32-LABEL: ult: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sltu $4, $4, $5 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 1 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: sltu $1, $4, $5 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 1 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -144,11 +144,11 @@ define i32 @ule(i32 %a, i32 %b) { ; MIPS32-LABEL: ule: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sltu $4, $5, $4 -; MIPS32-NEXT: xori $4, $4, 1 -; MIPS32-NEXT: lui $5, 0 -; MIPS32-NEXT: ori $5, $5, 1 -; MIPS32-NEXT: and $2, $4, $5 +; MIPS32-NEXT: sltu $1, $5, $4 +; MIPS32-NEXT: xori $1, $1, 1 +; MIPS32-NEXT: lui $2, 0 +; MIPS32-NEXT: ori $2, $2, 1 +; MIPS32-NEXT: and $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: test/CodeGen/Mips/atomic.ll =================================================================== --- test/CodeGen/Mips/atomic.ll +++ test/CodeGen/Mips/atomic.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS32 -; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s -rafast-ignore-missing-defs | \ ; RUN: FileCheck %s -check-prefix=MIPS32O0 ; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS32R2 ; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS32R6 -; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s -rafast-ignore-missing-defs | \ ; RUN: FileCheck %s -check-prefix=MIPS32R6O0 ; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS4 @@ -17,7 +17,7 @@ ; RUN: FileCheck %s -check-prefix=MIPS64R2 ; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS64R6 -; RUN: llc -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ +; RUN: llc -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s -rafast-ignore-missing-defs | \ ; RUN: FileCheck %s -check-prefix=MIPS64R6O0 ; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MM32 @@ -56,17 +56,16 @@ ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(x)($2) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB0_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $25, 0($2) -; MIPS32O0-NEXT: addu $1, $25, $4 -; MIPS32O0-NEXT: sc $1, 0($2) +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) ; MIPS32O0-NEXT: beqz $1, $BB0_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: move $2, $25 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -107,20 +106,17 @@ ; MIPS32R6O0: # %bb.0: # %entry ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(x)($2) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB0_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $1, 0($2) -; MIPS32R6O0-NEXT: addu $3, $1, $4 -; MIPS32R6O0-NEXT: sc $3, 0($2) -; MIPS32R6O0-NEXT: beqzc $3, $BB0_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB0_1 +; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: move $2, $1 -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: AtomicLoadAdd32: @@ -195,16 +191,16 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB0_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: addu $5, $3, $2 -; MIPS64R6O0-NEXT: sc $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB0_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: jrc $ra ; ; MM32-LABEL: AtomicLoadAdd32: @@ -317,17 +313,16 @@ ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(x)($2) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB1_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $25, 0($2) -; MIPS32O0-NEXT: subu $1, $25, $4 -; MIPS32O0-NEXT: sc $1, 0($2) +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: subu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) ; MIPS32O0-NEXT: beqz $1, $BB1_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: move $2, $25 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -368,20 +363,17 @@ ; MIPS32R6O0: # %bb.0: # %entry ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(x)($2) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB1_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $1, 0($2) -; MIPS32R6O0-NEXT: subu $3, $1, $4 -; MIPS32R6O0-NEXT: sc $3, 0($2) -; MIPS32R6O0-NEXT: beqzc $3, $BB1_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: subu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB1_1 +; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: move $2, $1 -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: AtomicLoadSub32: @@ -456,16 +448,16 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB1_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: subu $5, $3, $2 -; MIPS64R6O0-NEXT: sc $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB1_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: subu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: jrc $ra ; ; MM32-LABEL: AtomicLoadSub32: @@ -578,17 +570,16 @@ ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(x)($2) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB2_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $25, 0($2) -; MIPS32O0-NEXT: xor $1, $25, $4 -; MIPS32O0-NEXT: sc $1, 0($2) +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: xor $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) ; MIPS32O0-NEXT: beqz $1, $BB2_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: move $2, $25 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -629,20 +620,17 @@ ; MIPS32R6O0: # %bb.0: # %entry ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(x)($2) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB2_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $1, 0($2) -; MIPS32R6O0-NEXT: xor $3, $1, $4 -; MIPS32R6O0-NEXT: sc $3, 0($2) -; MIPS32R6O0-NEXT: beqzc $3, $BB2_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: xor $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB2_1 +; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: move $2, $1 -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: AtomicLoadXor32: @@ -717,16 +705,16 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB2_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: xor $5, $3, $2 -; MIPS64R6O0-NEXT: sc $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB2_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: xor $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: jrc $ra ; ; MM32-LABEL: AtomicLoadXor32: @@ -838,17 +826,16 @@ ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(x)($2) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB3_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $25, 0($2) -; MIPS32O0-NEXT: or $1, $25, $4 -; MIPS32O0-NEXT: sc $1, 0($2) +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: or $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) ; MIPS32O0-NEXT: beqz $1, $BB3_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: move $2, $25 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -889,20 +876,17 @@ ; MIPS32R6O0: # %bb.0: # %entry ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(x)($2) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB3_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $1, 0($2) -; MIPS32R6O0-NEXT: or $3, $1, $4 -; MIPS32R6O0-NEXT: sc $3, 0($2) -; MIPS32R6O0-NEXT: beqzc $3, $BB3_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: or $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB3_1 +; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: move $2, $1 -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: AtomicLoadOr32: @@ -977,16 +961,16 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB3_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: or $5, $3, $2 -; MIPS64R6O0-NEXT: sc $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB3_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: or $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: jrc $ra ; ; MM32-LABEL: AtomicLoadOr32: @@ -1098,17 +1082,16 @@ ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(x)($2) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB4_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $25, 0($2) -; MIPS32O0-NEXT: and $1, $25, $4 -; MIPS32O0-NEXT: sc $1, 0($2) +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: and $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) ; MIPS32O0-NEXT: beqz $1, $BB4_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: move $2, $25 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -1149,20 +1132,17 @@ ; MIPS32R6O0: # %bb.0: # %entry ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(x)($2) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB4_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $1, 0($2) -; MIPS32R6O0-NEXT: and $3, $1, $4 -; MIPS32R6O0-NEXT: sc $3, 0($2) -; MIPS32R6O0-NEXT: beqzc $3, $BB4_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: and $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB4_1 +; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: move $2, $1 -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: AtomicLoadAnd32: @@ -1237,16 +1217,16 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB4_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: and $5, $3, $2 -; MIPS64R6O0-NEXT: sc $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB4_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: jrc $ra ; ; MM32-LABEL: AtomicLoadAnd32: @@ -1359,18 +1339,17 @@ ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(x)($2) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB5_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $25, 0($2) -; MIPS32O0-NEXT: and $1, $25, $4 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: and $1, $2, $4 ; MIPS32O0-NEXT: nor $1, $zero, $1 -; MIPS32O0-NEXT: sc $1, 0($2) +; MIPS32O0-NEXT: sc $1, 0($3) ; MIPS32O0-NEXT: beqz $1, $BB5_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: move $2, $25 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -1413,21 +1392,18 @@ ; MIPS32R6O0: # %bb.0: # %entry ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(x)($2) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB5_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $1, 0($2) -; MIPS32R6O0-NEXT: and $3, $1, $4 -; MIPS32R6O0-NEXT: nor $3, $zero, $3 -; MIPS32R6O0-NEXT: sc $3, 0($2) -; MIPS32R6O0-NEXT: beqzc $3, $BB5_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: and $1, $2, $4 +; MIPS32R6O0-NEXT: nor $1, $zero, $1 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB5_1 +; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: move $2, $1 -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: AtomicLoadNand32: @@ -1506,17 +1482,17 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB5_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: and $5, $3, $2 -; MIPS64R6O0-NEXT: nor $5, $zero, $5 -; MIPS64R6O0-NEXT: sc $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB5_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: nor $1, $zero, $1 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: jrc $ra ; ; MM32-LABEL: AtomicLoadNand32: @@ -1637,19 +1613,18 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addu $2, $2, $25 +; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: sw $4, 4($sp) ; MIPS32O0-NEXT: lw $4, 4($sp) -; MIPS32O0-NEXT: lw $2, %got(x)($2) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB6_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $25, 0($2) +; MIPS32O0-NEXT: ll $2, 0($3) ; MIPS32O0-NEXT: move $1, $4 -; MIPS32O0-NEXT: sc $1, 0($2) +; MIPS32O0-NEXT: sc $1, 0($3) ; MIPS32O0-NEXT: beqz $1, $BB6_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: move $2, $25 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -1697,20 +1672,18 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: sw $4, 4($sp) ; MIPS32R6O0-NEXT: lw $4, 4($sp) -; MIPS32R6O0-NEXT: lw $2, %got(x)($2) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB6_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $1, 0($2) -; MIPS32R6O0-NEXT: move $3, $4 -; MIPS32R6O0-NEXT: sc $3, 0($2) -; MIPS32R6O0-NEXT: beqzc $3, $BB6_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: move $1, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB6_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: move $2, $1 -; MIPS32R6O0-NEXT: sw $25, 0($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -1798,16 +1771,15 @@ ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) ; MIPS64R6O0-NEXT: move $2, $4 ; MIPS64R6O0-NEXT: sw $2, 12($sp) -; MIPS64R6O0-NEXT: lw $2, 12($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: lw $4, 12($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB6_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: move $5, $2 -; MIPS64R6O0-NEXT: sc $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB6_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -1941,32 +1913,28 @@ ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32O0-NEXT: addiu $sp, $sp, -16 +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: sw $5, 12($sp) -; MIPS32O0-NEXT: lw $5, 12($sp) -; MIPS32O0-NEXT: lw $2, %got(x)($2) -; MIPS32O0-NEXT: lw $25, 8($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: move $1, $4 +; MIPS32O0-NEXT: sw $5, 4($sp) +; MIPS32O0-NEXT: lw $6, 4($sp) +; MIPS32O0-NEXT: lw $3, %got(x)($2) +; MIPS32O0-NEXT: move $5, $4 ; MIPS32O0-NEXT: $BB7_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $3, 0($2) -; MIPS32O0-NEXT: bne $3, $1, $BB7_3 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: bne $2, $5, $BB7_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS32O0-NEXT: move $6, $5 -; MIPS32O0-NEXT: sc $6, 0($2) -; MIPS32O0-NEXT: beqz $6, $BB7_1 +; MIPS32O0-NEXT: move $1, $6 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB7_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB7_3: # %entry -; MIPS32O0-NEXT: xor $1, $3, $4 +; MIPS32O0-NEXT: xor $1, $2, $4 ; MIPS32O0-NEXT: sltiu $1, $1, 1 -; MIPS32O0-NEXT: move $2, $3 -; MIPS32O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: sw $1, 0($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: addiu $sp, $sp, 16 +; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -2019,30 +1987,25 @@ ; MIPS32R6O0: # %bb.0: # %entry ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32R6O0-NEXT: addiu $sp, $sp, -24 +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $5 -; MIPS32R6O0-NEXT: move $1, $4 -; MIPS32R6O0-NEXT: sw $5, 20($sp) -; MIPS32R6O0-NEXT: lw $5, 20($sp) -; MIPS32R6O0-NEXT: lw $2, %got(x)($2) -; MIPS32R6O0-NEXT: lw $3, 16($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: # kill: def $v1 killed $a1 +; MIPS32R6O0-NEXT: # kill: def $v1 killed $a0 +; MIPS32R6O0-NEXT: sw $5, 4($sp) +; MIPS32R6O0-NEXT: lw $5, 4($sp) +; MIPS32R6O0-NEXT: lw $3, %got(x)($2) ; MIPS32R6O0-NEXT: $BB7_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $6, 0($2) -; MIPS32R6O0-NEXT: bnec $6, $4, $BB7_3 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: bnec $2, $4, $BB7_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS32R6O0-NEXT: move $7, $5 -; MIPS32R6O0-NEXT: sc $7, 0($2) -; MIPS32R6O0-NEXT: beqzc $7, $BB7_1 +; MIPS32R6O0-NEXT: move $1, $5 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB7_1 ; MIPS32R6O0-NEXT: $BB7_3: # %entry -; MIPS32R6O0-NEXT: move $2, $6 -; MIPS32R6O0-NEXT: sw $6, 16($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: addiu $sp, $sp, 24 +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: AtomicCmpSwap32: @@ -2139,28 +2102,25 @@ ; MIPS64R6O0-LABEL: AtomicCmpSwap32: ; MIPS64R6O0: # %bb.0: # %entry ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 -; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) -; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) -; MIPS64R6O0-NEXT: move $2, $4 +; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lui $2, %hi(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS64R6O0-NEXT: daddu $2, $2, $25 +; MIPS64R6O0-NEXT: daddiu $2, $2, %lo(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ; MIPS64R6O0-NEXT: move $3, $5 ; MIPS64R6O0-NEXT: sw $3, 12($sp) -; MIPS64R6O0-NEXT: lw $3, 12($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) -; MIPS64R6O0-NEXT: lw $6, 8($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $5, 12($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($2) ; MIPS64R6O0-NEXT: .LBB7_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($1) -; MIPS64R6O0-NEXT: bnec $7, $2, .LBB7_3 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS64R6O0-NEXT: move $8, $3 -; MIPS64R6O0-NEXT: sc $8, 0($1) -; MIPS64R6O0-NEXT: beqzc $8, .LBB7_1 +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 ; MIPS64R6O0-NEXT: .LBB7_3: # %entry -; MIPS64R6O0-NEXT: move $2, $7 -; MIPS64R6O0-NEXT: sw $7, 8($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -2329,37 +2289,37 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(y)($2) -; MIPS32O0-NEXT: addiu $25, $zero, -4 -; MIPS32O0-NEXT: and $25, $2, $25 -; MIPS32O0-NEXT: andi $2, $2, 3 -; MIPS32O0-NEXT: sll $2, $2, 3 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(y)($1) +; MIPS32O0-NEXT: addiu $2, $zero, -4 +; MIPS32O0-NEXT: and $5, $1, $2 +; MIPS32O0-NEXT: andi $1, $1, 3 +; MIPS32O0-NEXT: sll $9, $1, 3 ; MIPS32O0-NEXT: ori $1, $zero, 255 -; MIPS32O0-NEXT: sllv $1, $1, $2 -; MIPS32O0-NEXT: nor $3, $zero, $1 -; MIPS32O0-NEXT: sllv $4, $4, $2 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB8_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $6, 0($25) -; MIPS32O0-NEXT: addu $7, $6, $4 -; MIPS32O0-NEXT: and $7, $7, $1 -; MIPS32O0-NEXT: and $8, $6, $3 -; MIPS32O0-NEXT: or $8, $8, $7 -; MIPS32O0-NEXT: sc $8, 0($25) -; MIPS32O0-NEXT: beqz $8, $BB8_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: addu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB8_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $5, $6, $1 -; MIPS32O0-NEXT: srlv $5, $5, $2 -; MIPS32O0-NEXT: sll $5, $5, 24 -; MIPS32O0-NEXT: sra $5, $5, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -2432,35 +2392,34 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(y)($2) -; MIPS32R6O0-NEXT: addiu $1, $zero, -4 -; MIPS32R6O0-NEXT: and $1, $2, $1 -; MIPS32R6O0-NEXT: andi $2, $2, 3 -; MIPS32R6O0-NEXT: sll $2, $2, 3 -; MIPS32R6O0-NEXT: ori $3, $zero, 255 -; MIPS32R6O0-NEXT: sllv $3, $3, $2 -; MIPS32R6O0-NEXT: nor $5, $zero, $3 -; MIPS32R6O0-NEXT: sllv $4, $4, $2 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $1, %got(y)($1) +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 +; MIPS32R6O0-NEXT: andi $1, $1, 3 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB8_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $7, 0($1) -; MIPS32R6O0-NEXT: addu $8, $7, $4 -; MIPS32R6O0-NEXT: and $8, $8, $3 -; MIPS32R6O0-NEXT: and $9, $7, $5 -; MIPS32R6O0-NEXT: or $9, $9, $8 -; MIPS32R6O0-NEXT: sc $9, 0($1) -; MIPS32R6O0-NEXT: beqzc $9, $BB8_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: addu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB8_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $6, $7, $3 -; MIPS32R6O0-NEXT: srlv $6, $6, $2 -; MIPS32R6O0-NEXT: seb $6, $6 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $6, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra @@ -2601,33 +2560,33 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 -; MIPS64R6O0-NEXT: and $4, $1, $4 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $2, $2, $3 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB8_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($4) -; MIPS64R6O0-NEXT: addu $9, $8, $2 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($4) -; MIPS64R6O0-NEXT: beqzc $10, .LBB8_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: addu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB8_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -2846,37 +2805,37 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(y)($2) -; MIPS32O0-NEXT: addiu $25, $zero, -4 -; MIPS32O0-NEXT: and $25, $2, $25 -; MIPS32O0-NEXT: andi $2, $2, 3 -; MIPS32O0-NEXT: sll $2, $2, 3 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(y)($1) +; MIPS32O0-NEXT: addiu $2, $zero, -4 +; MIPS32O0-NEXT: and $5, $1, $2 +; MIPS32O0-NEXT: andi $1, $1, 3 +; MIPS32O0-NEXT: sll $9, $1, 3 ; MIPS32O0-NEXT: ori $1, $zero, 255 -; MIPS32O0-NEXT: sllv $1, $1, $2 -; MIPS32O0-NEXT: nor $3, $zero, $1 -; MIPS32O0-NEXT: sllv $4, $4, $2 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB9_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $6, 0($25) -; MIPS32O0-NEXT: subu $7, $6, $4 -; MIPS32O0-NEXT: and $7, $7, $1 -; MIPS32O0-NEXT: and $8, $6, $3 -; MIPS32O0-NEXT: or $8, $8, $7 -; MIPS32O0-NEXT: sc $8, 0($25) -; MIPS32O0-NEXT: beqz $8, $BB9_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: subu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB9_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $5, $6, $1 -; MIPS32O0-NEXT: srlv $5, $5, $2 -; MIPS32O0-NEXT: sll $5, $5, 24 -; MIPS32O0-NEXT: sra $5, $5, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -2949,35 +2908,34 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(y)($2) -; MIPS32R6O0-NEXT: addiu $1, $zero, -4 -; MIPS32R6O0-NEXT: and $1, $2, $1 -; MIPS32R6O0-NEXT: andi $2, $2, 3 -; MIPS32R6O0-NEXT: sll $2, $2, 3 -; MIPS32R6O0-NEXT: ori $3, $zero, 255 -; MIPS32R6O0-NEXT: sllv $3, $3, $2 -; MIPS32R6O0-NEXT: nor $5, $zero, $3 -; MIPS32R6O0-NEXT: sllv $4, $4, $2 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $1, %got(y)($1) +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 +; MIPS32R6O0-NEXT: andi $1, $1, 3 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB9_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $7, 0($1) -; MIPS32R6O0-NEXT: subu $8, $7, $4 -; MIPS32R6O0-NEXT: and $8, $8, $3 -; MIPS32R6O0-NEXT: and $9, $7, $5 -; MIPS32R6O0-NEXT: or $9, $9, $8 -; MIPS32R6O0-NEXT: sc $9, 0($1) -; MIPS32R6O0-NEXT: beqzc $9, $BB9_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: subu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB9_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $6, $7, $3 -; MIPS32R6O0-NEXT: srlv $6, $6, $2 -; MIPS32R6O0-NEXT: seb $6, $6 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $6, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra @@ -3118,33 +3076,33 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 -; MIPS64R6O0-NEXT: and $4, $1, $4 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $2, $2, $3 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB9_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($4) -; MIPS64R6O0-NEXT: subu $9, $8, $2 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($4) -; MIPS64R6O0-NEXT: beqzc $10, .LBB9_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: subu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB9_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3365,38 +3323,38 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(y)($2) -; MIPS32O0-NEXT: addiu $25, $zero, -4 -; MIPS32O0-NEXT: and $25, $2, $25 -; MIPS32O0-NEXT: andi $2, $2, 3 -; MIPS32O0-NEXT: sll $2, $2, 3 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(y)($1) +; MIPS32O0-NEXT: addiu $2, $zero, -4 +; MIPS32O0-NEXT: and $5, $1, $2 +; MIPS32O0-NEXT: andi $1, $1, 3 +; MIPS32O0-NEXT: sll $9, $1, 3 ; MIPS32O0-NEXT: ori $1, $zero, 255 -; MIPS32O0-NEXT: sllv $1, $1, $2 -; MIPS32O0-NEXT: nor $3, $zero, $1 -; MIPS32O0-NEXT: sllv $4, $4, $2 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB10_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $6, 0($25) -; MIPS32O0-NEXT: and $7, $6, $4 -; MIPS32O0-NEXT: nor $7, $zero, $7 -; MIPS32O0-NEXT: and $7, $7, $1 -; MIPS32O0-NEXT: and $8, $6, $3 -; MIPS32O0-NEXT: or $8, $8, $7 -; MIPS32O0-NEXT: sc $8, 0($25) -; MIPS32O0-NEXT: beqz $8, $BB10_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: and $3, $2, $6 +; MIPS32O0-NEXT: nor $3, $zero, $3 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB10_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $5, $6, $1 -; MIPS32O0-NEXT: srlv $5, $5, $2 -; MIPS32O0-NEXT: sll $5, $5, 24 -; MIPS32O0-NEXT: sra $5, $5, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -3471,36 +3429,35 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(y)($2) -; MIPS32R6O0-NEXT: addiu $1, $zero, -4 -; MIPS32R6O0-NEXT: and $1, $2, $1 -; MIPS32R6O0-NEXT: andi $2, $2, 3 -; MIPS32R6O0-NEXT: sll $2, $2, 3 -; MIPS32R6O0-NEXT: ori $3, $zero, 255 -; MIPS32R6O0-NEXT: sllv $3, $3, $2 -; MIPS32R6O0-NEXT: nor $5, $zero, $3 -; MIPS32R6O0-NEXT: sllv $4, $4, $2 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $1, %got(y)($1) +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 +; MIPS32R6O0-NEXT: andi $1, $1, 3 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB10_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $7, 0($1) -; MIPS32R6O0-NEXT: and $8, $7, $4 -; MIPS32R6O0-NEXT: nor $8, $zero, $8 -; MIPS32R6O0-NEXT: and $8, $8, $3 -; MIPS32R6O0-NEXT: and $9, $7, $5 -; MIPS32R6O0-NEXT: or $9, $9, $8 -; MIPS32R6O0-NEXT: sc $9, 0($1) -; MIPS32R6O0-NEXT: beqzc $9, $BB10_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: and $3, $2, $6 +; MIPS32R6O0-NEXT: nor $3, $zero, $3 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB10_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $6, $7, $3 -; MIPS32R6O0-NEXT: srlv $6, $6, $2 -; MIPS32R6O0-NEXT: seb $6, $6 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $6, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra @@ -3645,34 +3602,34 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 -; MIPS64R6O0-NEXT: and $4, $1, $4 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $2, $2, $3 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB10_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($4) -; MIPS64R6O0-NEXT: and $9, $8, $2 -; MIPS64R6O0-NEXT: nor $9, $zero, $9 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($4) -; MIPS64R6O0-NEXT: beqzc $10, .LBB10_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: and $3, $2, $6 +; MIPS64R6O0-NEXT: nor $3, $zero, $3 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB10_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3896,36 +3853,36 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(y)($2) -; MIPS32O0-NEXT: addiu $25, $zero, -4 -; MIPS32O0-NEXT: and $25, $2, $25 -; MIPS32O0-NEXT: andi $2, $2, 3 -; MIPS32O0-NEXT: sll $2, $2, 3 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(y)($1) +; MIPS32O0-NEXT: addiu $2, $zero, -4 +; MIPS32O0-NEXT: and $5, $1, $2 +; MIPS32O0-NEXT: andi $1, $1, 3 +; MIPS32O0-NEXT: sll $9, $1, 3 ; MIPS32O0-NEXT: ori $1, $zero, 255 -; MIPS32O0-NEXT: sllv $1, $1, $2 -; MIPS32O0-NEXT: nor $3, $zero, $1 -; MIPS32O0-NEXT: sllv $4, $4, $2 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB11_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $6, 0($25) -; MIPS32O0-NEXT: and $7, $4, $1 -; MIPS32O0-NEXT: and $8, $6, $3 -; MIPS32O0-NEXT: or $8, $8, $7 -; MIPS32O0-NEXT: sc $8, 0($25) -; MIPS32O0-NEXT: beqz $8, $BB11_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: and $3, $6, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB11_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $5, $6, $1 -; MIPS32O0-NEXT: srlv $5, $5, $2 -; MIPS32O0-NEXT: sll $5, $5, 24 -; MIPS32O0-NEXT: sra $5, $5, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -3996,34 +3953,33 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(y)($2) -; MIPS32R6O0-NEXT: addiu $1, $zero, -4 -; MIPS32R6O0-NEXT: and $1, $2, $1 -; MIPS32R6O0-NEXT: andi $2, $2, 3 -; MIPS32R6O0-NEXT: sll $2, $2, 3 -; MIPS32R6O0-NEXT: ori $3, $zero, 255 -; MIPS32R6O0-NEXT: sllv $3, $3, $2 -; MIPS32R6O0-NEXT: nor $5, $zero, $3 -; MIPS32R6O0-NEXT: sllv $4, $4, $2 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $1, %got(y)($1) +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 +; MIPS32R6O0-NEXT: andi $1, $1, 3 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB11_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $7, 0($1) -; MIPS32R6O0-NEXT: and $8, $4, $3 -; MIPS32R6O0-NEXT: and $9, $7, $5 -; MIPS32R6O0-NEXT: or $9, $9, $8 -; MIPS32R6O0-NEXT: sc $9, 0($1) -; MIPS32R6O0-NEXT: beqzc $9, $BB11_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: and $3, $6, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB11_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $6, $7, $3 -; MIPS32R6O0-NEXT: srlv $6, $6, $2 -; MIPS32R6O0-NEXT: seb $6, $6 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $6, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra @@ -4160,32 +4116,32 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 -; MIPS64R6O0-NEXT: and $4, $1, $4 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $2, $2, $3 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicSwap8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB11_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($4) -; MIPS64R6O0-NEXT: and $9, $2, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($4) -; MIPS64R6O0-NEXT: beqzc $10, .LBB11_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: and $3, $6, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB11_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -4404,42 +4360,44 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(y)($2) -; MIPS32O0-NEXT: addiu $25, $zero, -4 -; MIPS32O0-NEXT: and $25, $2, $25 -; MIPS32O0-NEXT: andi $2, $2, 3 -; MIPS32O0-NEXT: sll $2, $2, 3 -; MIPS32O0-NEXT: ori $1, $zero, 255 -; MIPS32O0-NEXT: sllv $1, $1, $2 -; MIPS32O0-NEXT: nor $3, $zero, $1 -; MIPS32O0-NEXT: andi $4, $4, 255 -; MIPS32O0-NEXT: sllv $4, $4, $2 -; MIPS32O0-NEXT: andi $5, $5, 255 -; MIPS32O0-NEXT: sllv $5, $5, $2 +; MIPS32O0-NEXT: addu $3, $2, $25 +; MIPS32O0-NEXT: move $2, $4 +; MIPS32O0-NEXT: move $1, $5 +; MIPS32O0-NEXT: lw $3, %got(y)($3) +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 +; MIPS32O0-NEXT: ori $3, $zero, 255 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 255 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 255 +; MIPS32O0-NEXT: sllv $8, $1, $9 ; MIPS32O0-NEXT: $BB12_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $7, 0($25) -; MIPS32O0-NEXT: and $8, $7, $1 -; MIPS32O0-NEXT: bne $8, $4, $BB12_3 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB12_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS32O0-NEXT: and $7, $7, $3 -; MIPS32O0-NEXT: or $7, $7, $5 -; MIPS32O0-NEXT: sc $7, 0($25) -; MIPS32O0-NEXT: beqz $7, $BB12_1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB12_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB12_3: # %entry -; MIPS32O0-NEXT: srlv $6, $8, $2 -; MIPS32O0-NEXT: sll $6, $6, 24 -; MIPS32O0-NEXT: sra $6, $6, 24 +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.4: # %entry -; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.5: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -4519,43 +4477,43 @@ ; MIPS32R6O0: # %bb.0: # %entry ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32R6O0-NEXT: addiu $sp, $sp, -16 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $5 -; MIPS32R6O0-NEXT: move $1, $4 -; MIPS32R6O0-NEXT: lw $2, %got(y)($2) -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $2, $3 -; MIPS32R6O0-NEXT: andi $2, $2, 3 -; MIPS32R6O0-NEXT: sll $2, $2, 3 -; MIPS32R6O0-NEXT: ori $6, $zero, 255 -; MIPS32R6O0-NEXT: sllv $6, $6, $2 -; MIPS32R6O0-NEXT: nor $7, $zero, $6 -; MIPS32R6O0-NEXT: andi $4, $4, 255 -; MIPS32R6O0-NEXT: sllv $4, $4, $2 -; MIPS32R6O0-NEXT: andi $5, $5, 255 -; MIPS32R6O0-NEXT: sllv $5, $5, $2 +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: addu $3, $2, $25 +; MIPS32R6O0-NEXT: move $1, $5 +; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $a1 killed $at +; MIPS32R6O0-NEXT: # kill: def $a0 killed $v0 +; MIPS32R6O0-NEXT: lw $3, %got(y)($3) +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 255 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 255 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 255 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 ; MIPS32R6O0-NEXT: $BB12_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $9, 0($3) -; MIPS32R6O0-NEXT: and $10, $9, $6 -; MIPS32R6O0-NEXT: bnec $10, $4, $BB12_3 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB12_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS32R6O0-NEXT: and $9, $9, $7 -; MIPS32R6O0-NEXT: or $9, $9, $5 -; MIPS32R6O0-NEXT: sc $9, 0($3) -; MIPS32R6O0-NEXT: beqzc $9, $BB12_1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB12_1 ; MIPS32R6O0-NEXT: $BB12_3: # %entry -; MIPS32R6O0-NEXT: srlv $8, $10, $2 -; MIPS32R6O0-NEXT: seb $8, $8 +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: # %entry -; MIPS32R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: addiu $sp, $sp, 16 +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: AtomicCmpSwap8: @@ -4712,38 +4670,38 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) -; MIPS64R6O0-NEXT: move $2, $5 -; MIPS64R6O0-NEXT: move $3, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) +; MIPS64R6O0-NEXT: daddiu $3, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: move $2, $4 +; MIPS64R6O0-NEXT: ld $3, %got_disp(y)($3) ; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 -; MIPS64R6O0-NEXT: and $4, $1, $4 -; MIPS64R6O0-NEXT: andi $6, $1, 3 -; MIPS64R6O0-NEXT: xori $6, $6, 3 -; MIPS64R6O0-NEXT: sll $6, $6, 3 -; MIPS64R6O0-NEXT: ori $7, $zero, 255 -; MIPS64R6O0-NEXT: sllv $7, $7, $6 -; MIPS64R6O0-NEXT: nor $8, $zero, $7 -; MIPS64R6O0-NEXT: andi $3, $3, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $6 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $9, $3, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 ; MIPS64R6O0-NEXT: andi $2, $2, 255 -; MIPS64R6O0-NEXT: sllv $2, $2, $6 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 +; MIPS64R6O0-NEXT: andi $1, $1, 255 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 ; MIPS64R6O0-NEXT: .LBB12_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $10, 0($4) -; MIPS64R6O0-NEXT: and $11, $10, $7 -; MIPS64R6O0-NEXT: bnec $11, $3, .LBB12_3 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB12_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS64R6O0-NEXT: and $10, $10, $8 -; MIPS64R6O0-NEXT: or $10, $10, $2 -; MIPS64R6O0-NEXT: sc $10, 0($4) -; MIPS64R6O0-NEXT: beqzc $10, .LBB12_1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB12_1 ; MIPS64R6O0-NEXT: .LBB12_3: # %entry -; MIPS64R6O0-NEXT: srlv $9, $11, $6 -; MIPS64R6O0-NEXT: seb $9, $9 +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.4: # %entry -; MIPS64R6O0-NEXT: sw $9, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry ; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 @@ -4987,44 +4945,47 @@ ; MIPS32O0-LABEL: AtomicCmpSwapRes8: ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addiu $1, $zero, -4 -; MIPS32O0-NEXT: and $1, $4, $1 -; MIPS32O0-NEXT: andi $4, $4, 3 -; MIPS32O0-NEXT: sll $4, $4, 3 -; MIPS32O0-NEXT: ori $2, $zero, 255 -; MIPS32O0-NEXT: sllv $2, $2, $4 -; MIPS32O0-NEXT: nor $3, $zero, $2 -; MIPS32O0-NEXT: andi $7, $5, 255 -; MIPS32O0-NEXT: sllv $7, $7, $4 -; MIPS32O0-NEXT: andi $6, $6, 255 -; MIPS32O0-NEXT: sllv $6, $6, $4 +; MIPS32O0-NEXT: move $3, $4 +; MIPS32O0-NEXT: move $2, $5 +; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: move $1, $6 +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 +; MIPS32O0-NEXT: ori $3, $zero, 255 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 255 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 255 +; MIPS32O0-NEXT: sllv $8, $1, $9 ; MIPS32O0-NEXT: $BB13_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $9, 0($1) -; MIPS32O0-NEXT: and $10, $9, $2 -; MIPS32O0-NEXT: bne $10, $7, $BB13_3 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB13_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS32O0-NEXT: and $9, $9, $3 -; MIPS32O0-NEXT: or $9, $9, $6 -; MIPS32O0-NEXT: sc $9, 0($1) -; MIPS32O0-NEXT: beqz $9, $BB13_1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB13_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB13_3: # %entry -; MIPS32O0-NEXT: srlv $8, $10, $4 -; MIPS32O0-NEXT: sll $8, $8, 24 -; MIPS32O0-NEXT: sra $8, $8, 24 +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.4: # %entry -; MIPS32O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: sw $8, 0($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.5: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 +; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $2, $2, 24 ; MIPS32O0-NEXT: sra $2, $2, 24 -; MIPS32O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: xor $2, $3, $2 -; MIPS32O0-NEXT: sltiu $2, $2, 1 +; MIPS32O0-NEXT: xor $1, $1, $2 +; MIPS32O0-NEXT: sltiu $2, $1, 1 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -5097,47 +5058,47 @@ ; ; MIPS32R6O0-LABEL: AtomicCmpSwapRes8: ; MIPS32R6O0: # %bb.0: # %entry -; MIPS32R6O0-NEXT: addiu $sp, $sp, -24 +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: move $1, $6 ; MIPS32R6O0-NEXT: move $2, $5 +; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: move $3, $4 -; MIPS32R6O0-NEXT: addiu $7, $zero, -4 -; MIPS32R6O0-NEXT: and $7, $4, $7 -; MIPS32R6O0-NEXT: andi $4, $4, 3 -; MIPS32R6O0-NEXT: sll $4, $4, 3 -; MIPS32R6O0-NEXT: ori $8, $zero, 255 -; MIPS32R6O0-NEXT: sllv $8, $8, $4 -; MIPS32R6O0-NEXT: nor $9, $zero, $8 -; MIPS32R6O0-NEXT: andi $10, $5, 255 -; MIPS32R6O0-NEXT: sllv $10, $10, $4 -; MIPS32R6O0-NEXT: andi $6, $6, 255 -; MIPS32R6O0-NEXT: sllv $6, $6, $4 +; MIPS32R6O0-NEXT: # kill: def $a2 killed $at +; MIPS32R6O0-NEXT: # kill: def $a1 killed $v0 +; MIPS32R6O0-NEXT: # kill: def $a0 killed $v1 +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 255 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 255 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 255 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 ; MIPS32R6O0-NEXT: $BB13_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $12, 0($7) -; MIPS32R6O0-NEXT: and $13, $12, $8 -; MIPS32R6O0-NEXT: bnec $13, $10, $BB13_3 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB13_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS32R6O0-NEXT: and $12, $12, $9 -; MIPS32R6O0-NEXT: or $12, $12, $6 -; MIPS32R6O0-NEXT: sc $12, 0($7) -; MIPS32R6O0-NEXT: beqzc $12, $BB13_1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB13_1 ; MIPS32R6O0-NEXT: $BB13_3: # %entry -; MIPS32R6O0-NEXT: srlv $11, $13, $4 -; MIPS32R6O0-NEXT: seb $11, $11 +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: sw $11, 20($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $5, 16($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $3, 12($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $1, 8($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: # %entry -; MIPS32R6O0-NEXT: lw $1, 20($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: xor $1, $1, $2 ; MIPS32R6O0-NEXT: sltiu $2, $1, 1 -; MIPS32R6O0-NEXT: addiu $sp, $sp, 24 +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: AtomicCmpSwapRes8: @@ -5283,42 +5244,42 @@ ; MIPS64R6O0-LABEL: AtomicCmpSwapRes8: ; MIPS64R6O0: # %bb.0: # %entry ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: move $3, $4 ; MIPS64R6O0-NEXT: move $1, $6 ; MIPS64R6O0-NEXT: move $2, $5 -; MIPS64R6O0-NEXT: move $5, $4 -; MIPS64R6O0-NEXT: daddiu $6, $zero, -4 -; MIPS64R6O0-NEXT: and $6, $4, $6 -; MIPS64R6O0-NEXT: andi $3, $4, 3 +; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: # kill: def $a0_64 killed $v1_64 +; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 ; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $7, $zero, 255 -; MIPS64R6O0-NEXT: sllv $7, $7, $3 -; MIPS64R6O0-NEXT: nor $8, $zero, $7 -; MIPS64R6O0-NEXT: andi $9, $2, 255 -; MIPS64R6O0-NEXT: sllv $9, $9, $3 +; MIPS64R6O0-NEXT: sll $9, $3, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 +; MIPS64R6O0-NEXT: andi $2, $2, 255 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 ; MIPS64R6O0-NEXT: andi $1, $1, 255 -; MIPS64R6O0-NEXT: sllv $1, $1, $3 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 ; MIPS64R6O0-NEXT: .LBB13_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $11, 0($6) -; MIPS64R6O0-NEXT: and $12, $11, $7 -; MIPS64R6O0-NEXT: bnec $12, $9, .LBB13_3 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB13_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS64R6O0-NEXT: and $11, $11, $8 -; MIPS64R6O0-NEXT: or $11, $11, $1 -; MIPS64R6O0-NEXT: sc $11, 0($6) -; MIPS64R6O0-NEXT: beqzc $11, .LBB13_1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB13_1 ; MIPS64R6O0-NEXT: .LBB13_3: # %entry -; MIPS64R6O0-NEXT: srlv $10, $12, $3 -; MIPS64R6O0-NEXT: seb $10, $10 +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.4: # %entry -; MIPS64R6O0-NEXT: sd $5, 8($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: sw $10, 4($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry -; MIPS64R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS64R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: xor $1, $1, $2 ; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 @@ -5557,37 +5518,37 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(z)($2) -; MIPS32O0-NEXT: addiu $25, $zero, -4 -; MIPS32O0-NEXT: and $25, $2, $25 -; MIPS32O0-NEXT: andi $2, $2, 3 -; MIPS32O0-NEXT: sll $2, $2, 3 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(z)($1) +; MIPS32O0-NEXT: addiu $2, $zero, -4 +; MIPS32O0-NEXT: and $5, $1, $2 +; MIPS32O0-NEXT: andi $1, $1, 3 +; MIPS32O0-NEXT: sll $9, $1, 3 ; MIPS32O0-NEXT: ori $1, $zero, 65535 -; MIPS32O0-NEXT: sllv $1, $1, $2 -; MIPS32O0-NEXT: nor $3, $zero, $1 -; MIPS32O0-NEXT: sllv $4, $4, $2 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB14_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $6, 0($25) -; MIPS32O0-NEXT: addu $7, $6, $4 -; MIPS32O0-NEXT: and $7, $7, $1 -; MIPS32O0-NEXT: and $8, $6, $3 -; MIPS32O0-NEXT: or $8, $8, $7 -; MIPS32O0-NEXT: sc $8, 0($25) -; MIPS32O0-NEXT: beqz $8, $BB14_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: addu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB14_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $5, $6, $1 -; MIPS32O0-NEXT: srlv $5, $5, $2 -; MIPS32O0-NEXT: sll $5, $5, 16 -; MIPS32O0-NEXT: sra $5, $5, 16 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 16 -; MIPS32O0-NEXT: sra $2, $2, 16 +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $2, $1, 16 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -5660,35 +5621,34 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(z)($2) -; MIPS32R6O0-NEXT: addiu $1, $zero, -4 -; MIPS32R6O0-NEXT: and $1, $2, $1 -; MIPS32R6O0-NEXT: andi $2, $2, 3 -; MIPS32R6O0-NEXT: sll $2, $2, 3 -; MIPS32R6O0-NEXT: ori $3, $zero, 65535 -; MIPS32R6O0-NEXT: sllv $3, $3, $2 -; MIPS32R6O0-NEXT: nor $5, $zero, $3 -; MIPS32R6O0-NEXT: sllv $4, $4, $2 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $1, %got(z)($1) +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 +; MIPS32R6O0-NEXT: andi $1, $1, 3 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 65535 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB14_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $7, 0($1) -; MIPS32R6O0-NEXT: addu $8, $7, $4 -; MIPS32R6O0-NEXT: and $8, $8, $3 -; MIPS32R6O0-NEXT: and $9, $7, $5 -; MIPS32R6O0-NEXT: or $9, $9, $8 -; MIPS32R6O0-NEXT: sc $9, 0($1) -; MIPS32R6O0-NEXT: beqzc $9, $BB14_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: addu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB14_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $6, $7, $3 -; MIPS32R6O0-NEXT: srlv $6, $6, $2 -; MIPS32R6O0-NEXT: seh $6, $6 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seh $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $6, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seh $2, $1 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra @@ -5829,33 +5789,33 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(z)($1) -; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 -; MIPS64R6O0-NEXT: and $4, $1, $4 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 2 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $2, $2, $3 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(z)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 2 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 65535 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB14_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($4) -; MIPS64R6O0-NEXT: addu $9, $8, $2 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($4) -; MIPS64R6O0-NEXT: beqzc $10, .LBB14_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: addu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB14_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seh $7, $7 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seh $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seh $2, $1 @@ -6084,46 +6044,47 @@ ; MIPS32O0: # %bb.0: ; MIPS32O0-NEXT: addiu $sp, $sp, -8 ; MIPS32O0-NEXT: .cfi_def_cfa_offset 8 -; MIPS32O0-NEXT: addu $5, $5, $6 +; MIPS32O0-NEXT: move $1, $7 +; MIPS32O0-NEXT: move $3, $4 +; MIPS32O0-NEXT: addu $2, $5, $6 +; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: addiu $6, $zero, -4 -; MIPS32O0-NEXT: and $6, $4, $6 -; MIPS32O0-NEXT: andi $4, $4, 3 -; MIPS32O0-NEXT: sll $4, $4, 3 -; MIPS32O0-NEXT: ori $1, $zero, 65535 -; MIPS32O0-NEXT: sllv $1, $1, $4 -; MIPS32O0-NEXT: nor $2, $zero, $1 -; MIPS32O0-NEXT: andi $3, $5, 65535 -; MIPS32O0-NEXT: sllv $3, $3, $4 -; MIPS32O0-NEXT: andi $7, $7, 65535 -; MIPS32O0-NEXT: sllv $7, $7, $4 +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 +; MIPS32O0-NEXT: ori $3, $zero, 65535 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 65535 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 65535 +; MIPS32O0-NEXT: sllv $8, $1, $9 ; MIPS32O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $9, 0($6) -; MIPS32O0-NEXT: and $10, $9, $1 -; MIPS32O0-NEXT: bne $10, $3, $BB15_3 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB15_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS32O0-NEXT: and $9, $9, $2 -; MIPS32O0-NEXT: or $9, $9, $7 -; MIPS32O0-NEXT: sc $9, 0($6) -; MIPS32O0-NEXT: beqz $9, $BB15_1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB15_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB15_3: -; MIPS32O0-NEXT: srlv $8, $10, $4 -; MIPS32O0-NEXT: sll $8, $8, 16 -; MIPS32O0-NEXT: sra $8, $8, 16 +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 ; MIPS32O0-NEXT: # %bb.4: -; MIPS32O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: sw $8, 0($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.5: -; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 16 -; MIPS32O0-NEXT: sra $2, $2, 16 -; MIPS32O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: xor $2, $3, $2 -; MIPS32O0-NEXT: sltiu $3, $2, 1 +; MIPS32O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 +; MIPS32O0-NEXT: xor $1, $2, $1 +; MIPS32O0-NEXT: sltiu $3, $1, 1 ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -6201,53 +6162,50 @@ ; ; MIPS32R6O0-LABEL: foo: ; MIPS32R6O0: # %bb.0: -; MIPS32R6O0-NEXT: addiu $sp, $sp, -24 -; MIPS32R6O0-NEXT: .cfi_def_cfa_offset 24 +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: .cfi_def_cfa_offset 8 ; MIPS32R6O0-NEXT: move $1, $7 -; MIPS32R6O0-NEXT: move $2, $6 -; MIPS32R6O0-NEXT: move $3, $5 -; MIPS32R6O0-NEXT: move $8, $4 -; MIPS32R6O0-NEXT: addu $5, $5, $6 +; MIPS32R6O0-NEXT: move $3, $4 +; MIPS32R6O0-NEXT: # kill: def $a3 killed $at +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a2 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a1 +; MIPS32R6O0-NEXT: # kill: def $a0 killed $v1 +; MIPS32R6O0-NEXT: addu $2, $5, $6 +; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: addiu $6, $zero, -4 -; MIPS32R6O0-NEXT: and $6, $4, $6 -; MIPS32R6O0-NEXT: andi $4, $4, 3 -; MIPS32R6O0-NEXT: sll $4, $4, 3 -; MIPS32R6O0-NEXT: ori $9, $zero, 65535 -; MIPS32R6O0-NEXT: sllv $9, $9, $4 -; MIPS32R6O0-NEXT: nor $10, $zero, $9 -; MIPS32R6O0-NEXT: andi $11, $5, 65535 -; MIPS32R6O0-NEXT: sllv $11, $11, $4 -; MIPS32R6O0-NEXT: andi $7, $7, 65535 -; MIPS32R6O0-NEXT: sllv $7, $7, $4 +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 65535 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 65535 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 65535 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 ; MIPS32R6O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $13, 0($6) -; MIPS32R6O0-NEXT: and $14, $13, $9 -; MIPS32R6O0-NEXT: bnec $14, $11, $BB15_3 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB15_3 ; MIPS32R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS32R6O0-NEXT: and $13, $13, $10 -; MIPS32R6O0-NEXT: or $13, $13, $7 -; MIPS32R6O0-NEXT: sc $13, 0($6) -; MIPS32R6O0-NEXT: beqzc $13, $BB15_1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB15_1 ; MIPS32R6O0-NEXT: $BB15_3: -; MIPS32R6O0-NEXT: srlv $12, $14, $4 -; MIPS32R6O0-NEXT: seh $12, $12 +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seh $1, $1 ; MIPS32R6O0-NEXT: # %bb.4: -; MIPS32R6O0-NEXT: sw $12, 20($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $3, 16($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $8, 12($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $5, 8($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: -; MIPS32R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: seh $2, $1 -; MIPS32R6O0-NEXT: lw $3, 20($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: xor $2, $3, $2 -; MIPS32R6O0-NEXT: sltiu $3, $2, 1 +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: seh $1, $1 +; MIPS32R6O0-NEXT: xor $1, $2, $1 +; MIPS32R6O0-NEXT: sltiu $3, $1, 1 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: lw $2, 20($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: addiu $sp, $sp, 24 +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: foo: @@ -6415,51 +6373,50 @@ ; MIPS64R6O0: # %bb.0: ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R6O0-NEXT: move $3, $4 ; MIPS64R6O0-NEXT: move $1, $7 ; MIPS64R6O0-NEXT: sll $1, $1, 0 ; MIPS64R6O0-NEXT: move $2, $6 +; MIPS64R6O0-NEXT: sll $4, $2, 0 +; MIPS64R6O0-NEXT: move $2, $5 ; MIPS64R6O0-NEXT: sll $2, $2, 0 -; MIPS64R6O0-NEXT: move $3, $5 -; MIPS64R6O0-NEXT: sll $3, $3, 0 -; MIPS64R6O0-NEXT: move $5, $4 -; MIPS64R6O0-NEXT: addu $2, $3, $2 +; MIPS64R6O0-NEXT: # kill: def $a1_64 killed $v1_64 +; MIPS64R6O0-NEXT: addu $2, $2, $4 +; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: daddiu $6, $zero, -4 -; MIPS64R6O0-NEXT: and $6, $4, $6 -; MIPS64R6O0-NEXT: andi $3, $4, 3 +; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 ; MIPS64R6O0-NEXT: xori $3, $3, 2 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $8, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $8, $8, $3 -; MIPS64R6O0-NEXT: nor $9, $zero, $8 -; MIPS64R6O0-NEXT: andi $10, $2, 65535 -; MIPS64R6O0-NEXT: sllv $10, $10, $3 +; MIPS64R6O0-NEXT: sll $9, $3, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 65535 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 +; MIPS64R6O0-NEXT: andi $2, $2, 65535 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 ; MIPS64R6O0-NEXT: andi $1, $1, 65535 -; MIPS64R6O0-NEXT: sllv $1, $1, $3 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 ; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $12, 0($6) -; MIPS64R6O0-NEXT: and $13, $12, $8 -; MIPS64R6O0-NEXT: bnec $13, $10, .LBB15_3 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB15_3 ; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS64R6O0-NEXT: and $12, $12, $9 -; MIPS64R6O0-NEXT: or $12, $12, $1 -; MIPS64R6O0-NEXT: sc $12, 0($6) -; MIPS64R6O0-NEXT: beqzc $12, .LBB15_1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB15_1 ; MIPS64R6O0-NEXT: .LBB15_3: -; MIPS64R6O0-NEXT: srlv $11, $13, $3 -; MIPS64R6O0-NEXT: seh $11, $11 +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seh $1, $1 ; MIPS64R6O0-NEXT: # %bb.4: -; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $11, 8($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sd $5, 0($sp) # 8-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: -; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload -; MIPS64R6O0-NEXT: seh $2, $1 -; MIPS64R6O0-NEXT: lw $3, 8($sp) # 4-byte Folded Reload -; MIPS64R6O0-NEXT: xor $2, $3, $2 -; MIPS64R6O0-NEXT: sltiu $3, $2, 1 +; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: seh $1, $1 +; MIPS64R6O0-NEXT: xor $1, $2, $1 +; MIPS64R6O0-NEXT: sltiu $3, $1, 1 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -6684,19 +6641,18 @@ ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32O0-NEXT: addu $2, $2, $25 +; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: lw $2, %got(countsint)($2) +; MIPS32O0-NEXT: lw $3, %got(countsint)($1) ; MIPS32O0-NEXT: $BB16_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $25, 0($2) -; MIPS32O0-NEXT: addu $1, $25, $4 -; MIPS32O0-NEXT: sc $1, 0($2) +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) ; MIPS32O0-NEXT: beqz $1, $BB16_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: move $2, $25 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -6740,22 +6696,18 @@ ; MIPS32R6O0: # %bb.0: # %entry ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: lw $2, %got(countsint)($2) +; MIPS32R6O0-NEXT: lw $3, %got(countsint)($1) ; MIPS32R6O0-NEXT: $BB16_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $1, 0($2) -; MIPS32R6O0-NEXT: addu $3, $1, $4 -; MIPS32R6O0-NEXT: sc $3, 0($2) -; MIPS32R6O0-NEXT: beqzc $3, $BB16_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB16_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: move $2, $1 -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: CheckSync: @@ -6837,18 +6789,17 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) -; MIPS64R6O0-NEXT: move $2, $4 +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: ld $1, %got_disp(countsint)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(countsint)($1) ; MIPS64R6O0-NEXT: .LBB16_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: addu $5, $3, $2 -; MIPS64R6O0-NEXT: sc $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB16_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB16_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: jrc $ra ; ; MM32-LABEL: CheckSync: @@ -6989,37 +6940,34 @@ ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32O0-NEXT: addiu $sp, $sp, -16 +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32O0-NEXT: addu $2, $2, $25 ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: lw $2, %got(a)($2) -; MIPS32O0-NEXT: addiu $25, $zero, 0 -; MIPS32O0-NEXT: addiu $1, $zero, 1 -; MIPS32O0-NEXT: lw $3, 12($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: move $4, $1 +; MIPS32O0-NEXT: lw $4, %got(a)($2) +; MIPS32O0-NEXT: addiu $6, $zero, 0 +; MIPS32O0-NEXT: addiu $2, $zero, 1 +; MIPS32O0-NEXT: move $5, $2 ; MIPS32O0-NEXT: $BB17_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $5, 0($2) -; MIPS32O0-NEXT: bne $5, $4, $BB17_3 +; MIPS32O0-NEXT: ll $1, 0($4) +; MIPS32O0-NEXT: bne $1, $5, $BB17_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB17_1 Depth=1 -; MIPS32O0-NEXT: move $6, $25 -; MIPS32O0-NEXT: sc $6, 0($2) -; MIPS32O0-NEXT: beqz $6, $BB17_1 +; MIPS32O0-NEXT: move $3, $6 +; MIPS32O0-NEXT: sc $3, 0($4) +; MIPS32O0-NEXT: beqz $3, $BB17_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB17_3: # %entry -; MIPS32O0-NEXT: xor $1, $5, $1 -; MIPS32O0-NEXT: sltiu $1, $1, 1 +; MIPS32O0-NEXT: xor $2, $1, $2 +; MIPS32O0-NEXT: sltiu $2, $2, 1 ; MIPS32O0-NEXT: sync ; MIPS32O0-NEXT: addiu $2, $zero, 1 -; MIPS32O0-NEXT: xor $2, $5, $2 -; MIPS32O0-NEXT: sltiu $2, $2, 1 -; MIPS32O0-NEXT: andi $2, $2, 1 -; MIPS32O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: addiu $sp, $sp, 16 +; MIPS32O0-NEXT: xor $1, $1, $2 +; MIPS32O0-NEXT: sltiu $1, $1, 1 +; MIPS32O0-NEXT: andi $2, $1, 1 +; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -7079,28 +7027,26 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: addu $2, $2, $25 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: lw $2, %got(a)($2) -; MIPS32R6O0-NEXT: addiu $25, $zero, 0 -; MIPS32R6O0-NEXT: addiu $1, $zero, 1 -; MIPS32R6O0-NEXT: lw $3, 4($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: move $4, $1 +; MIPS32R6O0-NEXT: lw $4, %got(a)($2) +; MIPS32R6O0-NEXT: addiu $6, $zero, 0 +; MIPS32R6O0-NEXT: addiu $2, $zero, 1 +; MIPS32R6O0-NEXT: move $5, $2 ; MIPS32R6O0-NEXT: $BB17_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $5, 0($2) -; MIPS32R6O0-NEXT: bnec $5, $4, $BB17_3 +; MIPS32R6O0-NEXT: ll $1, 0($4) +; MIPS32R6O0-NEXT: bnec $1, $5, $BB17_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 -; MIPS32R6O0-NEXT: move $6, $25 -; MIPS32R6O0-NEXT: sc $6, 0($2) -; MIPS32R6O0-NEXT: beqzc $6, $BB17_1 +; MIPS32R6O0-NEXT: move $3, $6 +; MIPS32R6O0-NEXT: sc $3, 0($4) +; MIPS32R6O0-NEXT: beqzc $3, $BB17_1 ; MIPS32R6O0-NEXT: $BB17_3: # %entry -; MIPS32R6O0-NEXT: xor $1, $5, $1 +; MIPS32R6O0-NEXT: xor $1, $1, $2 ; MIPS32R6O0-NEXT: sltiu $2, $1, 1 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: sw $3, 0($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -7212,30 +7158,28 @@ ; MIPS64R6O0-LABEL: zeroreg: ; MIPS64R6O0: # %bb.0: # %entry ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 -; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) -; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) +; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lui $2, %hi(%neg(%gp_rel(zeroreg))) +; MIPS64R6O0-NEXT: daddu $2, $2, $25 +; MIPS64R6O0-NEXT: daddiu $2, $2, %lo(%neg(%gp_rel(zeroreg))) ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: ld $1, %got_disp(a)($1) -; MIPS64R6O0-NEXT: addiu $2, $zero, 0 -; MIPS64R6O0-NEXT: addiu $3, $zero, 1 -; MIPS64R6O0-NEXT: lw $4, 12($sp) # 4-byte Folded Reload -; MIPS64R6O0-NEXT: move $5, $3 +; MIPS64R6O0-NEXT: ld $4, %got_disp(a)($2) +; MIPS64R6O0-NEXT: addiu $6, $zero, 0 +; MIPS64R6O0-NEXT: addiu $2, $zero, 1 +; MIPS64R6O0-NEXT: move $5, $2 ; MIPS64R6O0-NEXT: .LBB17_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $6, 0($1) -; MIPS64R6O0-NEXT: bnec $6, $5, .LBB17_3 +; MIPS64R6O0-NEXT: ll $1, 0($4) +; MIPS64R6O0-NEXT: bnec $1, $5, .LBB17_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 -; MIPS64R6O0-NEXT: move $7, $2 -; MIPS64R6O0-NEXT: sc $7, 0($1) -; MIPS64R6O0-NEXT: beqzc $7, .LBB17_1 +; MIPS64R6O0-NEXT: move $3, $6 +; MIPS64R6O0-NEXT: sc $3, 0($4) +; MIPS64R6O0-NEXT: beqzc $3, .LBB17_1 ; MIPS64R6O0-NEXT: .LBB17_3: # %entry -; MIPS64R6O0-NEXT: xor $2, $6, $3 -; MIPS64R6O0-NEXT: sltiu $2, $2, 1 +; MIPS64R6O0-NEXT: xor $1, $1, $2 +; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -7403,18 +7347,17 @@ ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32O0-NEXT: addu $2, $2, $25 -; MIPS32O0-NEXT: lw $2, %got(x)($2) -; MIPS32O0-NEXT: addiu $2, $2, 1024 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: addiu $3, $1, 1024 ; MIPS32O0-NEXT: $BB18_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $25, 0($2) -; MIPS32O0-NEXT: addu $1, $25, $4 -; MIPS32O0-NEXT: sc $1, 0($2) +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) ; MIPS32O0-NEXT: beqz $1, $BB18_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: move $2, $25 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -7457,21 +7400,18 @@ ; MIPS32R6O0: # %bb.0: # %entry ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) -; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $2, $2, $25 -; MIPS32R6O0-NEXT: move $25, $4 -; MIPS32R6O0-NEXT: lw $2, %got(x)($2) -; MIPS32R6O0-NEXT: addiu $2, $2, 1024 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: addiu $3, $1, 1024 ; MIPS32R6O0-NEXT: $BB18_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $1, 0($2) -; MIPS32R6O0-NEXT: addu $3, $1, $4 -; MIPS32R6O0-NEXT: sc $3, 0($2) -; MIPS32R6O0-NEXT: beqzc $3, $BB18_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB18_1 +; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: move $2, $1 -; MIPS32R6O0-NEXT: sw $25, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; ; MIPS4-LABEL: AtomicLoadAdd32_OffGt9Bit: @@ -7550,17 +7490,17 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) -; MIPS64R6O0-NEXT: move $2, $4 +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) -; MIPS64R6O0-NEXT: daddiu $1, $1, 1024 +; MIPS64R6O0-NEXT: daddiu $3, $1, 1024 ; MIPS64R6O0-NEXT: .LBB18_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: addu $5, $3, $2 -; MIPS64R6O0-NEXT: sc $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB18_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB18_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: jrc $ra ; ; MM32-LABEL: AtomicLoadAdd32_OffGt9Bit: Index: test/CodeGen/Mips/atomic64.ll =================================================================== --- test/CodeGen/Mips/atomic64.ll +++ test/CodeGen/Mips/atomic64.ll @@ -7,7 +7,7 @@ ; RUN: FileCheck %s -check-prefix=MIPS64R2 ; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS64R6 -; RUN: llc -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ +; RUN: llc -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s -rafast-ignore-missing-defs | \ ; RUN: FileCheck %s -check-prefix=MIPS64R6O0 ; We want to verify the produced code is well formed all optimization levels, the rest of the test which ensure correctness. @@ -92,21 +92,19 @@ ; ; MIPS64R6O0-LABEL: AtomicLoadAdd: ; MIPS64R6O0: # %bb.0: # %entry -; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) -; MIPS64R6O0-NEXT: move $25, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $v0_64 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB0_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: daddu $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB0_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: daddu $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: sd $25, 8($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; ; O1-LABEL: AtomicLoadAdd: @@ -252,21 +250,19 @@ ; ; MIPS64R6O0-LABEL: AtomicLoadSub: ; MIPS64R6O0: # %bb.0: # %entry -; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) -; MIPS64R6O0-NEXT: move $25, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $v0_64 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB1_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: dsubu $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB1_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: dsubu $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: sd $25, 8($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; ; O1-LABEL: AtomicLoadSub: @@ -412,21 +408,19 @@ ; ; MIPS64R6O0-LABEL: AtomicLoadAnd: ; MIPS64R6O0: # %bb.0: # %entry -; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) -; MIPS64R6O0-NEXT: move $25, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $v0_64 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB2_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: and $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB2_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: sd $25, 8($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; ; O1-LABEL: AtomicLoadAnd: @@ -572,21 +566,19 @@ ; ; MIPS64R6O0-LABEL: AtomicLoadOr: ; MIPS64R6O0: # %bb.0: # %entry -; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) -; MIPS64R6O0-NEXT: move $25, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $v0_64 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB3_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: or $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB3_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: or $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: sd $25, 8($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; ; O1-LABEL: AtomicLoadOr: @@ -732,21 +724,19 @@ ; ; MIPS64R6O0-LABEL: AtomicLoadXor: ; MIPS64R6O0: # %bb.0: # %entry -; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) -; MIPS64R6O0-NEXT: move $25, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $v0_64 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB4_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: xor $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB4_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: xor $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: sd $25, 8($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; ; O1-LABEL: AtomicLoadXor: @@ -896,22 +886,20 @@ ; ; MIPS64R6O0-LABEL: AtomicLoadNand: ; MIPS64R6O0: # %bb.0: # %entry -; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) -; MIPS64R6O0-NEXT: move $25, $4 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: # kill: def $v0_64 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB5_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: and $3, $2, $4 -; MIPS64R6O0-NEXT: nor $3, $zero, $3 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB5_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: nor $1, $zero, $1 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 +; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: sd $25, 8($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; ; O1-LABEL: AtomicLoadNand: @@ -1074,18 +1062,17 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) -; MIPS64R6O0-NEXT: move $25, $4 +; MIPS64R6O0-NEXT: # kill: def $v0_64 killed $a0_64 ; MIPS64R6O0-NEXT: sd $4, 8($sp) ; MIPS64R6O0-NEXT: ld $4, 8($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB6_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: move $3, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB6_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: sd $25, 0($sp) # 8-byte Folded Spill ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -1267,32 +1254,27 @@ ; ; MIPS64R6O0-LABEL: AtomicCmpSwap64: ; MIPS64R6O0: # %bb.0: # %entry -; MIPS64R6O0-NEXT: daddiu $sp, $sp, -48 -; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) -; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) -; MIPS64R6O0-NEXT: move $25, $5 -; MIPS64R6O0-NEXT: move $2, $4 -; MIPS64R6O0-NEXT: sd $5, 40($sp) -; MIPS64R6O0-NEXT: ld $5, 40($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) -; MIPS64R6O0-NEXT: ld $3, 32($sp) # 8-byte Folded Reload +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: ld $1, 0($sp) # 8-byte Folded Reload +; MIPS64R6O0-NEXT: lui $2, %hi(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64R6O0-NEXT: daddu $2, $2, $25 +; MIPS64R6O0-NEXT: daddiu $2, $2, %lo(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64R6O0-NEXT: # kill: def $v1_64 killed $a1_64 +; MIPS64R6O0-NEXT: # kill: def $v1_64 killed $a0_64 +; MIPS64R6O0-NEXT: sd $5, 8($sp) +; MIPS64R6O0-NEXT: ld $5, 8($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($2) ; MIPS64R6O0-NEXT: .LBB7_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $6, 0($1) -; MIPS64R6O0-NEXT: bnec $6, $4, .LBB7_3 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS64R6O0-NEXT: move $7, $5 -; MIPS64R6O0-NEXT: scd $7, 0($1) -; MIPS64R6O0-NEXT: beqzc $7, .LBB7_1 +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 ; MIPS64R6O0-NEXT: .LBB7_3: # %entry -; MIPS64R6O0-NEXT: sd $2, 24($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: move $2, $6 -; MIPS64R6O0-NEXT: sd $6, 32($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: sd $25, 16($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: sd $3, 8($sp) # 8-byte Folded Spill -; MIPS64R6O0-NEXT: daddiu $sp, $sp, 48 +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; ; O1-LABEL: AtomicCmpSwap64: Index: test/CodeGen/Mips/atomicCmpSwapPW.ll =================================================================== --- test/CodeGen/Mips/atomicCmpSwapPW.ll +++ test/CodeGen/Mips/atomicCmpSwapPW.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O0 -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r2 -target-abi=o32 < %s -filetype=asm -o - \ +; RUN: llc -O0 -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r2 -target-abi=o32 < %s -filetype=asm -o - -rafast-ignore-missing-defs \ ; RUN: | FileCheck -check-prefixes=O32 %s -; RUN: llc -O0 -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 -target-abi=n32 < %s -filetype=asm -o - \ +; RUN: llc -O0 -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 -target-abi=n32 < %s -filetype=asm -o - -rafast-ignore-missing-defs \ ; RUN: | FileCheck -check-prefixes=N32,ALL %s -; RUN: llc -O0 -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - \ +; RUN: llc -O0 -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - -rafast-ignore-missing-defs \ ; RUN: | FileCheck -check-prefixes=N64 %s @sym = external global i32 * @@ -11,32 +11,28 @@ define void @foo(i32 %new, i32 %old) { ; O32-LABEL: foo: ; O32: # %bb.0: # %entry -; O32-NEXT: addiu $sp, $sp, -16 -; O32-NEXT: .cfi_def_cfa_offset 16 -; O32-NEXT: move $1, $5 -; O32-NEXT: move $2, $4 -; O32-NEXT: lui $3, %hi(sym) -; O32-NEXT: lw $3, %lo(sym)($3) +; O32-NEXT: addiu $sp, $sp, -8 +; O32-NEXT: .cfi_def_cfa_offset 8 +; O32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; O32-NEXT: # kill: def $v0 killed $a1 +; O32-NEXT: # kill: def $v0 killed $a0 +; O32-NEXT: lui $2, %hi(sym) +; O32-NEXT: lw $3, %lo(sym)($2) ; O32-NEXT: sync -; O32-NEXT: lw $6, 12($sp) # 4-byte Folded Reload ; O32-NEXT: $BB0_1: # %entry ; O32-NEXT: # =>This Inner Loop Header: Depth=1 -; O32-NEXT: ll $7, 0($3) -; O32-NEXT: bne $7, $4, $BB0_3 +; O32-NEXT: ll $1, 0($3) +; O32-NEXT: bne $1, $4, $BB0_3 ; O32-NEXT: nop ; O32-NEXT: # %bb.2: # %entry ; O32-NEXT: # in Loop: Header=BB0_1 Depth=1 -; O32-NEXT: move $8, $5 -; O32-NEXT: sc $8, 0($3) -; O32-NEXT: beqz $8, $BB0_1 +; O32-NEXT: move $2, $5 +; O32-NEXT: sc $2, 0($3) +; O32-NEXT: beqz $2, $BB0_1 ; O32-NEXT: nop ; O32-NEXT: $BB0_3: # %entry ; O32-NEXT: sync -; O32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill -; O32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill -; O32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill -; O32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill -; O32-NEXT: addiu $sp, $sp, 16 +; O32-NEXT: addiu $sp, $sp, 8 ; O32-NEXT: jr $ra ; O32-NEXT: nop ; @@ -44,29 +40,27 @@ ; N32: # %bb.0: # %entry ; N32-NEXT: addiu $sp, $sp, -16 ; N32-NEXT: .cfi_def_cfa_offset 16 -; N32-NEXT: move $1, $5 -; N32-NEXT: sll $1, $1, 0 +; N32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; N32-NEXT: move $2, $5 +; N32-NEXT: sll $5, $2, 0 ; N32-NEXT: move $2, $4 -; N32-NEXT: sll $2, $2, 0 -; N32-NEXT: lui $3, %hi(sym) -; N32-NEXT: lw $3, %lo(sym)($3) +; N32-NEXT: sll $4, $2, 0 +; N32-NEXT: lui $2, %hi(sym) +; N32-NEXT: lw $3, %lo(sym)($2) ; N32-NEXT: sync -; N32-NEXT: lw $6, 12($sp) # 4-byte Folded Reload ; N32-NEXT: .LBB0_1: # %entry ; N32-NEXT: # =>This Inner Loop Header: Depth=1 -; N32-NEXT: ll $7, 0($3) -; N32-NEXT: bne $7, $2, .LBB0_3 +; N32-NEXT: ll $1, 0($3) +; N32-NEXT: bne $1, $4, .LBB0_3 ; N32-NEXT: nop ; N32-NEXT: # %bb.2: # %entry ; N32-NEXT: # in Loop: Header=BB0_1 Depth=1 -; N32-NEXT: move $8, $1 -; N32-NEXT: sc $8, 0($3) -; N32-NEXT: beqz $8, .LBB0_1 +; N32-NEXT: move $2, $5 +; N32-NEXT: sc $2, 0($3) +; N32-NEXT: beqz $2, .LBB0_1 ; N32-NEXT: nop ; N32-NEXT: .LBB0_3: # %entry ; N32-NEXT: sync -; N32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill -; N32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill ; N32-NEXT: addiu $sp, $sp, 16 ; N32-NEXT: jr $ra ; N32-NEXT: nop @@ -75,33 +69,31 @@ ; N64: # %bb.0: # %entry ; N64-NEXT: daddiu $sp, $sp, -16 ; N64-NEXT: .cfi_def_cfa_offset 16 -; N64-NEXT: move $1, $5 -; N64-NEXT: sll $1, $1, 0 +; N64-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; N64-NEXT: move $2, $5 +; N64-NEXT: sll $5, $2, 0 ; N64-NEXT: move $2, $4 -; N64-NEXT: sll $2, $2, 0 -; N64-NEXT: lui $4, %highest(sym) -; N64-NEXT: daddiu $4, $4, %higher(sym) -; N64-NEXT: dsll $4, $4, 16 -; N64-NEXT: daddiu $4, $4, %hi(sym) -; N64-NEXT: dsll $4, $4, 16 -; N64-NEXT: ld $4, %lo(sym)($4) +; N64-NEXT: sll $4, $2, 0 +; N64-NEXT: lui $2, %highest(sym) +; N64-NEXT: daddiu $2, $2, %higher(sym) +; N64-NEXT: dsll $2, $2, 16 +; N64-NEXT: daddiu $2, $2, %hi(sym) +; N64-NEXT: dsll $2, $2, 16 +; N64-NEXT: ld $3, %lo(sym)($2) ; N64-NEXT: sync -; N64-NEXT: lw $3, 12($sp) # 4-byte Folded Reload ; N64-NEXT: .LBB0_1: # %entry ; N64-NEXT: # =>This Inner Loop Header: Depth=1 -; N64-NEXT: ll $6, 0($4) -; N64-NEXT: bne $6, $2, .LBB0_3 +; N64-NEXT: ll $1, 0($3) +; N64-NEXT: bne $1, $4, .LBB0_3 ; N64-NEXT: nop ; N64-NEXT: # %bb.2: # %entry ; N64-NEXT: # in Loop: Header=BB0_1 Depth=1 -; N64-NEXT: move $7, $1 -; N64-NEXT: sc $7, 0($4) -; N64-NEXT: beqz $7, .LBB0_1 +; N64-NEXT: move $2, $5 +; N64-NEXT: sc $2, 0($3) +; N64-NEXT: beqz $2, .LBB0_1 ; N64-NEXT: nop ; N64-NEXT: .LBB0_3: # %entry ; N64-NEXT: sync -; N64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill -; N64-NEXT: sw $3, 8($sp) # 4-byte Folded Spill ; N64-NEXT: daddiu $sp, $sp, 16 ; N64-NEXT: jr $ra ; N64-NEXT: nop Index: test/CodeGen/Mips/branch-relaxation-with-hazard.ll =================================================================== --- test/CodeGen/Mips/branch-relaxation-with-hazard.ll +++ test/CodeGen/Mips/branch-relaxation-with-hazard.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -O0 -mtriple=mips-img-linux-gnu -mcpu=mips32r6 -relocation-model=pic < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-PIC ; RUN: llc -O0 -mtriple=mips-img-linux-gnu -mcpu=mips32r6 -relocation-model=static < %s -o - | FileCheck %s --check-prefixes=CHECK-STATIC @@ -5,27 +6,81 @@ declare i32 @foo(...) define i32 @main(i32 signext %argc, i8** %argv) { -; CHECK: main: -; CHECK: # %bb.1: -; CHECK-PIC: addiu -; CHECK-PIC: sw -; CHECK-PIC: lui -; CHECK-PIC: addiu -; CHECK-PIC: balc -; CHECK-PIC: addu -; CHECK-PIC: lw -; CHECK-PIC: addiu -; CHECK-PIC: jrc -; CHECK-PIC: bc -; CHECK-PIC: bnezc -; CHECK-PIC: nop -; CHECK-PIC: bc +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui $2, %hi(_gp_disp) +; CHECK-NEXT: addiu $2, $2, %lo(_gp_disp) +; CHECK-NEXT: addiu $sp, $sp, -40 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: addu $1, $2, $25 +; CHECK-NEXT: sw $1, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: # kill: def $at killed $a1 +; CHECK-NEXT: # kill: def $at killed $a0 +; CHECK-NEXT: sw $zero, 32($sp) +; CHECK-NEXT: sw $4, 28($sp) +; CHECK-NEXT: sw $5, 24($sp) +; CHECK-NEXT: lw $1, 28($sp) +; CHECK-NEXT: slti $1, $1, 2 +; CHECK-NEXT: bnezc $1, $BB0_6 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB0_2 +; CHECK-NEXT: $BB0_2: # %if.then +; CHECK-NEXT: lw $1, 28($sp) +; CHECK-NEXT: slti $1, $1, 4 +; CHECK-NEXT: bnezc $1, $BB0_5 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.3: # %if.then +; CHECK-NEXT: bc $BB0_4 +; CHECK-NEXT: $BB0_4: # %if.then2 +; CHECK-NEXT: lw $gp, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: lw $25, %call16(boo)($gp) +; CHECK-NEXT: jalrc $25 +; CHECK-NEXT: sw $2, 32($sp) +; CHECK-NEXT: bc $BB0_7 +; CHECK-NEXT: $BB0_5: # %if.end +; CHECK-NEXT: lw $gp, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: #APP +; +; CHECK-STATIC-LABEL: main: +; CHECK-STATIC: # %bb.0: # %entry +; CHECK-STATIC-NEXT: addiu $sp, $sp, -32 +; CHECK-STATIC-NEXT: .cfi_def_cfa_offset 32 +; CHECK-STATIC-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; CHECK-STATIC-NEXT: .cfi_offset 31, -4 +; CHECK-STATIC-NEXT: # kill: def $at killed $a1 +; CHECK-STATIC-NEXT: # kill: def $at killed $a0 +; CHECK-STATIC-NEXT: sw $zero, 24($sp) +; CHECK-STATIC-NEXT: sw $4, 20($sp) +; CHECK-STATIC-NEXT: sw $5, 16($sp) +; CHECK-STATIC-NEXT: lw $1, 20($sp) +; CHECK-STATIC-NEXT: slti $1, $1, 2 +; CHECK-STATIC-NEXT: beqzc $1, $BB0_2 +; CHECK-STATIC-NEXT: nop +; CHECK-STATIC-NEXT: # %bb.1: # %entry +; CHECK-STATIC-NEXT: bc $BB0_7 +; CHECK-STATIC-NEXT: $BB0_2: # %entry +; CHECK-STATIC-NEXT: j $BB0_3 +; CHECK-STATIC-NEXT: nop +; CHECK-STATIC-NEXT: $BB0_3: # %if.then +; CHECK-STATIC-NEXT: lw $1, 20($sp) +; CHECK-STATIC-NEXT: slti $1, $1, 4 +; CHECK-STATIC-NEXT: bnezc $1, $BB0_6 +; CHECK-STATIC-NEXT: nop +; CHECK-STATIC-NEXT: # %bb.4: # %if.then +; CHECK-STATIC-NEXT: j $BB0_5 +; CHECK-STATIC-NEXT: nop +; CHECK-STATIC-NEXT: $BB0_5: # %if.then2 +; CHECK-STATIC-NEXT: jal boo +; CHECK-STATIC-NEXT: nop +; CHECK-STATIC-NEXT: sw $2, 24($sp) +; CHECK-STATIC-NEXT: j $BB0_8 +; CHECK-STATIC-NEXT: nop +; CHECK-STATIC-NEXT: $BB0_6: # %if.end +; CHECK-STATIC-NEXT: #APP -; CHECK-STATIC: bc -; CHECK-STATIC: j -; CHECK-STATIC: bnezc -; CHECK-STATIC: nop -; CHECK-STATIC: j entry: %retval = alloca i32, align 4 %argc.addr = alloca i32, align 4 Index: test/CodeGen/Mips/dsp-spill-reload.ll =================================================================== --- test/CodeGen/Mips/dsp-spill-reload.ll +++ test/CodeGen/Mips/dsp-spill-reload.ll @@ -23,10 +23,7 @@ ; ASM: SWDSP ; ASM: SWDSP -; ASM: SWDSP -; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp) -; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp) ; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp) ; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp) Index: test/CodeGen/Mips/micromips-eva.mir =================================================================== --- test/CodeGen/Mips/micromips-eva.mir +++ test/CodeGen/Mips/micromips-eva.mir @@ -196,19 +196,19 @@ ... -# CHECK: 60 41 60 05 lbue $2, 5($1) -# CHECK: 60 41 68 05 lbe $2, 5($1) -# CHECK: 60 41 a8 03 sbe $2, 3($1) +# CHECK: 60 22 60 05 lbue $1, 5($2) +# CHECK: 60 22 68 05 lbe $1, 5($2) +# CHECK: 60 22 a8 03 sbe $1, 3($2) -# CHECK: 60 41 62 0a lhue $2, 10($1) -# CHECK: 60 41 6a 0a lhe $2, 10($1) -# CHECK: 60 41 aa 06 she $2, 6($1) +# CHECK: 60 22 62 0a lhue $1, 10($2) +# CHECK: 60 22 6a 0a lhe $1, 10($2) +# CHECK: 60 22 aa 06 she $1, 6($2) -# CHECK: 60 41 6e 14 lwe $2, 20($1) -# CHECK: 60 41 ae 0c swe $2, 12($1) +# CHECK: 60 22 6e 14 lwe $1, 20($2) +# CHECK: 60 22 ae 0c swe $1, 12($2) -# CHECK: 60 41 6c 00 lle $2, 0($1) -# CHECK: 60 81 ac 00 sce $4, 0($1) +# CHECK: 60 22 6c 00 lle $1, 0($2) +# CHECK: 60 22 ac 00 sce $1, 0($2) # CHECK: 60 41 a6 05 cachee 2, 5($1) # CHECK: 60 41 a4 05 prefe 2, 5($1) Index: test/CodeGen/PowerPC/addegluecrash.ll =================================================================== --- test/CodeGen/PowerPC/addegluecrash.ll +++ test/CodeGen/PowerPC/addegluecrash.ll @@ -6,27 +6,30 @@ define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* nocapture readonly %b) { ; CHECK-LABEL: bn_mul_comba8: ; CHECK: # %bb.0: -; CHECK-NEXT: ld 6, 0(4) -; CHECK-NEXT: ld 7, 0(5) -; CHECK-NEXT: mulhdu 8, 7, 6 -; CHECK-NEXT: ld 4, 8(4) -; CHECK-NEXT: mulld 9, 4, 6 -; CHECK-NEXT: mulhdu 4, 4, 6 -; CHECK-NEXT: addc 6, 9, 8 -; CHECK-NEXT: addze 4, 4 +; CHECK-NEXT: std 4, -8(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: ld 3, -8(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 9, 0(3) +; CHECK-NEXT: ld 8, 0(5) +; CHECK-NEXT: mulhdu 7, 8, 9 +; CHECK-NEXT: ld 3, 8(3) +; CHECK-NEXT: mulld 6, 3, 9 +; CHECK-NEXT: mulhdu 3, 3, 9 +; CHECK-NEXT: addc 6, 6, 7 +; CHECK-NEXT: addze 3, 3 ; CHECK-NEXT: ld 5, 8(5) -; CHECK-NEXT: mulld 8, 5, 7 -; CHECK-NEXT: mulhdu 5, 5, 7 -; CHECK-NEXT: addc 6, 6, 8 +; CHECK-NEXT: mulld 7, 5, 8 +; CHECK-NEXT: mulhdu 5, 5, 8 +; CHECK-NEXT: addc 6, 6, 7 ; CHECK-NEXT: addze 5, 5 -; CHECK-NEXT: add 4, 5, 4 -; CHECK-NEXT: cmpld 7, 4, 5 -; CHECK-NEXT: mfocrf 10, 1 -; CHECK-NEXT: rlwinm 10, 10, 29, 31, 31 -; CHECK-NEXT: # implicit-def: $x4 -; CHECK-NEXT: mr 4, 10 -; CHECK-NEXT: clrldi 4, 4, 32 -; CHECK-NEXT: std 4, 0(3) +; CHECK-NEXT: add 3, 5, 3 +; CHECK-NEXT: cmpld 7, 3, 5 +; CHECK-NEXT: mfocrf 3, 1 +; CHECK-NEXT: rlwinm 5, 3, 29, 31, 31 +; CHECK-NEXT: # implicit-def: $x3 +; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: std 3, 0(4) ; CHECK-NEXT: blr %1 = load i64, i64* %a, align 8 %conv = zext i64 %1 to i128 Index: test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll =================================================================== --- test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll +++ test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll @@ -9,7 +9,7 @@ lnext: %elementArray = load i32*, i32** %elementArrayPtr, align 8 -; CHECK: lwz [[LDREG:[0-9]+]], 124(1) # 4-byte Folded Reload +; CHECK: lwz [[LDREG:[0-9]+]], 140(1) # 4-byte Folded Reload ; CHECK: # implicit-def: $x[[TEMPREG:[0-9]+]] %element = load i32, i32* %elementArray, align 4 ; CHECK: mr [[TEMPREG]], [[LDREG]] Index: test/CodeGen/PowerPC/anon_aggr.ll =================================================================== --- test/CodeGen/PowerPC/anon_aggr.ll +++ test/CodeGen/PowerPC/anon_aggr.ll @@ -19,7 +19,7 @@ } ; CHECK-LABEL: func1: -; CHECK: cmpld {{([0-9]+,)?}}4, 5 +; CHECK-DAG: cmpld {{([0-9]+,)?}}4, 5 ; CHECK-DAG: std 3, -[[OFFSET1:[0-9]+]] ; CHECK-DAG: std 5, -[[OFFSET2:[0-9]+]] ; CHECK: ld 3, -[[OFFSET1]](1) @@ -38,11 +38,13 @@ ret i8* %array2_ptr } ; CHECK-LABEL: func2: -; CHECK-DAG: cmpld {{([0-9]+,)?}}4, 6 +; CHECK-DAG: cmpld {{.*}}, {{.*}} ; CHECK-DAG: std 6, 72(1) ; CHECK-DAG: std 5, 64(1) -; CHECK-DAG: std 6, -[[OFFSET1:[0-9]+]] -; CHECK-DAG: std 5, -[[OFFSET2:[0-9]+]] +; CHECK-DAG: ld [[REG0:[0-9]+]], 72(1) +; CHECK-DAG: std [[REG0]], -[[OFFSET2:[0-9]+]] +; CHECK-DAG: mr [[REG1:[0-9]+]], 4 +; CHECK-DAG: std [[REG1]], -[[OFFSET1:[0-9]+]] ; CHECK: ld 3, -[[OFFSET2]](1) ; CHECK: ld 3, -[[OFFSET1]](1) @@ -86,8 +88,8 @@ ; CHECK-DAG: ld [[REG2:[0-9]+]], 120(1) ; CHECK-DAG: ld [[REG3:[0-9]+]], 136(1) ; CHECK-DAG: cmpld {{([0-9]+,)?}}[[REG2]], [[REG3]] -; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]](1) -; CHECK: std [[REG3]], -[[OFFSET2:[0-9]+]](1) +; CHECK-DAG: std [[REG2]], -[[OFFSET1:[0-9]+]](1) +; CHECK-DAG: std [[REG3]], -[[OFFSET2:[0-9]+]](1) ; CHECK: ld 3, -[[OFFSET1]](1) ; CHECK: ld 3, -[[OFFSET2]](1) Index: test/CodeGen/PowerPC/fp64-to-int16.ll =================================================================== --- test/CodeGen/PowerPC/fp64-to-int16.ll +++ test/CodeGen/PowerPC/fp64-to-int16.ll @@ -5,13 +5,12 @@ define i1 @Test(double %a) { ; CHECK-LABEL: Test: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscvdpsxws 1, 1 -; CHECK-NEXT: mfvsrwz 3, 1 +; CHECK-NEXT: xscvdpsxws 0, 1 +; CHECK-NEXT: mfvsrwz 3, 0 ; CHECK-NEXT: xori 3, 3, 65534 ; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: srwi 3, 3, 5 -; CHECK-NEXT: # implicit-def: $x4 -; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: srwi 4, 3, 5 +; CHECK-NEXT: # implicit-def: $x3 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr entry: Index: test/CodeGen/PowerPC/spill-nor0.ll =================================================================== --- test/CodeGen/PowerPC/spill-nor0.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc -verify-machineinstrs < %s -O0 -mcpu=ppc64 | FileCheck %s -target datalayout = "E-m:e-i64:64-n32:64" -target triple = "powerpc64-unknown-linux-gnu" - -; Function Attrs: nounwind -define void @_ZN4llvm3sys17RunningOnValgrindEv() #0 { -entry: - br i1 undef, label %if.then, label %if.end - -if.then: ; preds = %entry - ret void - -if.end: ; preds = %entry - %0 = call i64 asm sideeffect "mr 3,$1\0A\09mr 4,$2\0A\09rotldi 0,0,3 ; rotldi 0,0,13\0A\09rotldi 0,0,61 ; rotldi 0,0,51\0A\09or 1,1,1\0A\09mr $0,3", "=b,b,b,~{cc},~{memory},~{r3},~{r4}"(i32 0, i64* undef) #0 - unreachable - -; CHECK-LABEL: @_ZN4llvm3sys17RunningOnValgrindEv -; CHECK: stw -; CHECK: lwz -} - -attributes #0 = { nounwind } - Index: test/CodeGen/PowerPC/spill-nor0.mir =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/spill-nor0.mir @@ -0,0 +1,17 @@ +# RUN: llc -o - %s -mtriple=powerpc64-- -run-pass=regallocfast | FileCheck %s +--- +# CHECK-LABEL: name: func +name: func +tracksRegLiveness: true +body: | + bb.0: + %0 : gprc = LI 42 + %1 : gprc_nor0 = COPY %0 + ; CHECK: STW + + ; Clobber all regs to force a spill + NOP csr_noregs + + ; CHECK: LWZ + NOP implicit %1 +... Index: test/CodeGen/PowerPC/vsx-args.ll =================================================================== --- test/CodeGen/PowerPC/vsx-args.ll +++ test/CodeGen/PowerPC/vsx-args.ll @@ -23,10 +23,9 @@ ; CHECK: blr ; CHECK-FISL-LABEL: @main -; CHECK-FISL: stxvd2x 34 -; CHECK-FISL: vmr 2, 3 -; CHECK-FISL: vmr 3, 4 -; CHECK-FISL: lxvd2x 36 +; CHECK-FISL: stxvd2x 36 +; CHECK-FISL: vmr 4, 3 +; CHECK-FISL: lxvd2x 35 ; CHECK-FISL: bl sv ; CHECK-FISL: lxvd2x [[VC:[0-9]+]], ; CHECK-FISL: xvadddp 34, 34, [[VC]] Index: test/CodeGen/PowerPC/vsx.ll =================================================================== --- test/CodeGen/PowerPC/vsx.ll +++ test/CodeGen/PowerPC/vsx.ll @@ -247,10 +247,6 @@ ; CHECK-FISL: xxlnor v2, v2, v3 ; CHECK-FISL-NOT: lis ; CHECK-FISL-NOT: ori -; CHECK-FISL: li r3, -16 -; CHECK-FISL-NOT: lis -; CHECK-FISL-NOT: ori -; CHECK-FISL: stxvd2x vs0, r1, r3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test14 @@ -269,16 +265,10 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test15 -; CHECK-FISL: xxlor vs0, v2, v3 -; CHECK-FISL: xxlor v4, vs0, vs0 -; CHECK-FISL: xxlnor vs0, v2, v3 -; CHECK-FISL: xxlor v2, vs0, vs0 -; CHECK-FISL-NOT: lis -; CHECK-FISL-NOT: ori -; CHECK-FISL: li r3, -16 +; CHECK-FISL: xxlor {{.*}}, v2, v3 +; CHECK-FISL: xxlnor {{.*}}, v2, v3 ; CHECK-FISL-NOT: lis ; CHECK-FISL-NOT: ori -; CHECK-FISL: stxvd2x v4, r1, r3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test15 @@ -297,16 +287,10 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test16 -; CHECK-FISL: xxlor vs0, v2, v3 -; CHECK-FISL: xxlor v4, vs0, vs0 -; CHECK-FISL: xxlnor vs0, v2, v3 -; CHECK-FISL: xxlor v2, vs0, vs0 -; CHECK-FISL-NOT: lis -; CHECK-FISL-NOT: ori -; CHECK-FISL: li r3, -16 +; CHECK-FISL: xxlor {{.*}}, v2, v3 +; CHECK-FISL: xxlnor {{.*}}, v2, v3 ; CHECK-FISL-NOT: lis ; CHECK-FISL-NOT: ori -; CHECK-FISL: stxvd2x v4, r1, r3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test16 @@ -325,8 +309,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test17 -; CHECK-FISL: xxlnor v3, v3, v3 -; CHECK-FISL: xxland v2, v2, v3 +; CHECK-FISL: xxlnor [[REG0:[vs0-9]+]], v3, v3 +; CHECK-FISL: xxland v2, v2, [[REG0]] ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test17 @@ -345,16 +329,10 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test18 -; CHECK-FISL: xxlnor vs0, v3, v3 -; CHECK-FISL: xxlor v4, vs0, vs0 -; CHECK-FISL: xxlandc vs0, v2, v3 -; CHECK-FISL: xxlor v2, vs0, vs0 -; CHECK-FISL-NOT: lis -; CHECK-FISL-NOT: ori -; CHECK-FISL: li r3, -16 +; CHECK-FISL: xxlnor {{.*}}, v3, v3 +; CHECK-FISL: xxlandc {{.*}}, v2, v3 ; CHECK-FISL-NOT: lis ; CHECK-FISL-NOT: ori -; CHECK-FISL: stxvd2x v4, r1, r3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test18 @@ -373,16 +351,10 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test19 -; CHECK-FISL: xxlnor vs0, v3, v3 -; CHECK-FISL: xxlor v4, vs0, vs0 -; CHECK-FISL: xxlandc vs0, v2, v3 -; CHECK-FISL: xxlor v2, vs0, vs0 +; CHECK-FISL: xxlnor {{.*}}, v3, v3 +; CHECK-FISL: xxlandc {{.*}}, v2, v3 ; CHECK-FISL-NOT: lis ; CHECK-FISL-NOT: ori -; CHECK-FISL: li r3, -16 -; CHECK-FISL-NOT: lis -; CHECK-FISL-NOT: ori -; CHECK-FISL: stxvd2x v4, r1, r3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test19 @@ -424,8 +396,8 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test21 -; CHECK-FISL: xvcmpeqsp v4, v4, v5 -; CHECK-FISL: xxsel v2, v3, v2, v4 +; CHECK-FISL: xvcmpeqsp [[REG0:[vs0-9]+]], v4, v5 +; CHECK-FISL: xxsel v2, v3, v2, [[REG0]] ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test21 @@ -452,14 +424,14 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test22 -; CHECK-FISL-DAG: xvcmpeqsp vs0, v4, v5 -; CHECK-FISL-DAG: xvcmpeqsp v5, v5, v5 -; CHECK-FISL-DAG: xvcmpeqsp v4, v4, v4 -; CHECK-FISL-DAG: xxlnor v5, v5, v5 -; CHECK-FISL-DAG: xxlnor v4, v4, v4 -; CHECK-FISL-DAG: xxlor v4, v4, v5 -; CHECK-FISL-DAG: xxlor vs0, vs0, v4 -; CHECK-FISL: xxsel v2, v3, v2, vs0 +; CHECK-FISL-DAG: xvcmpeqsp [[REG0:[vs0-9]+]], v4, v5 +; CHECK-FISL-DAG: xvcmpeqsp [[REG1:[vs0-9]+]], v5, v5 +; CHECK-FISL-DAG: xvcmpeqsp [[REG2:[vs0-9]+]], v4, v4 +; CHECK-FISL-DAG: xxlnor [[REG3:[vs0-9]+]], [[REG1]], [[REG1]] +; CHECK-FISL-DAG: xxlnor [[REG4:[vs0-9]+]], [[REG2]], [[REG2]] +; CHECK-FISL-DAG: xxlor [[REG5:[vs0-9]+]], [[REG4]], [[REG3]] +; CHECK-FISL: xxlor [[REG6:[vs0-9]+]], [[REG0]], [[REG5]] +; CHECK-FISL: xxsel v2, v3, v2, [[REG6]] ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test22 @@ -633,8 +605,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test30 -; CHECK-FISL: lxvd2x vs0, 0, r3 -; CHECK-FISL: xxlor v2, vs0, vs0 +; CHECK-FISL: lxvd2x v2, 0, r3 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test30 @@ -996,8 +967,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test63 -; CHECK-FISL: xxlor f0, v2, v2 -; CHECK-FISL: fmr f1, f0 +; CHECK-FISL: xxlor f1, v2, v2 ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test63 @@ -1014,8 +984,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test64 -; CHECK-FISL: xxswapd v2, v2 -; CHECK-FISL: xxlor f0, v2, v2 +; CHECK-FISL: xxswapd vs0, v2 ; CHECK-FISL: fmr f1, f0 ; CHECK-FISL: blr @@ -1159,8 +1128,7 @@ ; CHECK-REG: blr ; CHECK-FISL-LABEL: @test80 -; CHECK-FISL: mr r4, r3 -; CHECK-FISL: stw r4, -16(r1) +; CHECK-FISL: stw r3, -16(r1) ; CHECK-FISL: addi r3, r1, -16 ; CHECK-FISL-DAG: lxvw4x vs0, 0, r3 ; CHECK-FISL-DAG: xxspltw v2, vs0, 0 Index: test/CodeGen/SystemZ/swift-return.ll =================================================================== --- test/CodeGen/SystemZ/swift-return.ll +++ test/CodeGen/SystemZ/swift-return.ll @@ -14,10 +14,9 @@ ; CHECK-O0-LABEL: test ; CHECK-O0: st %r2 ; CHECK-O0: brasl %r14, gen -; CHECK-O0-DAG: lhr %[[REG1:r[0-9]+]], %r2 +; CHECK-O0-DAG: lhr %r2, %r2 ; CHECK-O0-DAG: lbr %[[REG2:r[0-9]+]], %r3 -; CHECK-O0: ar %[[REG1]], %[[REG2]] -; CHECK-O0: lr %r2, %[[REG1]] +; CHECK-O0: ar %r2, %[[REG2]] define i16 @test(i32 %key) { entry: %key.addr = alloca i32, align 4 @@ -50,18 +49,17 @@ ; CHECK-O0-LABEL: test2: ; CHECK-O0: st %r2, [[SPILL1:[0-9]+]](%r15) ; CHECK-O0: l %r3, [[SPILL1]](%r15) -; CHECK-O0: la %r2, 168(%r15) +; CHECK-O0: la %r2, {{[0-9]+}}(%r15) ; CHECK-O0: brasl %r14, gen2 -; CHECK-O0-DAG: l %r{{.*}}, 184(%r15) -; CHECK-O0-DAG: l %r{{.*}}, 180(%r15) -; CHECK-O0-DAG: l %r{{.*}}, 176(%r15) -; CHECK-O0-DAG: l %r{{.*}}, 172(%r15) -; CHECK-O0-DAG: l %r{{.*}}, 168(%r15) +; CHECK-O0-DAG: l %r{{.*}}, {{[0-9]+}}(%r15) +; CHECK-O0-DAG: l %r{{.*}}, {{[0-9]+}}(%r15) +; CHECK-O0-DAG: l %r{{.*}}, {{[0-9]+}}(%r15) +; CHECK-O0-DAG: l %r{{.*}}, {{[0-9]+}}(%r15) +; CHECK-O0-DAG: l %r{{.*}}, {{[0-9]+}}(%r15) ; CHECK-O0: ar ; CHECK-O0: ar ; CHECK-O0: ar ; CHECK-O0: ar -; CHECK-O0: lr %r2 define i32 @test2(i32 %key) #0 { entry: %key.addr = alloca i32, align 4 Index: test/CodeGen/SystemZ/swifterror.ll =================================================================== --- test/CodeGen/SystemZ/swifterror.ll +++ test/CodeGen/SystemZ/swifterror.ll @@ -16,8 +16,8 @@ ; CHECK-O0-LABEL: foo: ; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc -; CHECK-O0: lgr %r9, %r2 -; CHECK-O0: mvi 8(%r2), 1 +; CHECK-O0: lgr [[T0:%r[0-9]+]], %r2 +; CHECK-O0: mvi 8([[T0]]), 1 entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -118,19 +118,18 @@ ; CHECK-NOT: %r9 ; CHECK: br %r14 ; CHECK-O0-LABEL: foo_if: -; CHECK-O0: chi %r2, 0 ; spill to stack ; CHECK-O0: stg %r9, [[OFFS:[0-9]+]](%r15) +; CHECK-O0: chi %r2, 0 ; CHECK-O0: je ; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc ; CHECK-O0: lgr %r[[REG1:[0-9]+]], %r2 -; CHECK-O0: mvi 8(%r2), 1 -; CHECK-O0: lgr %r9, %r[[REG1]] +; CHECK-O0: mvi 8(%r[[REG1]]), 1 +; CHECK-O0: lgr %r9, %r2 ; CHECK-O0: br %r14 ; reload from stack -; CHECK-O0: lg %r[[REG2:[0-9]+]], [[OFFS]](%r15) -; CHECK-O0: lgr %r9, %r[[REG2]] +; CHECK-O0: lg %r9, [[OFFS]](%r15) ; CHECK-O0: br %r14 entry: %cond = icmp ne i32 %cc, 0 @@ -169,11 +168,10 @@ ; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc ; CHECK-O0: lgr %r[[REG1:[0-9]+]], %r2 -; CHECK-O0: mvi 8(%r2), 1 +; CHECK-O0: mvi 8(%r[[REG1]]), 1 ; CHECK-O0: jnh ; reload from stack -; CHECK-O0: lg %r[[REG2:[0-9]+]], [[OFFS:[0-9]+]](%r15) -; CHECK-O0: lgr %r9, %r[[REG2]] +; CHECK-O0: lg %r9, [[OFFS:[0-9]+]](%r15) ; CHECK-O0: br %r14 entry: br label %bb_loop @@ -214,18 +212,17 @@ ; CHECK: br %r14 ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: lghi %r{{.*}}, 16 ; spill sret to stack -; CHECK-O0: stg %r2, [[OFFS1:[0-9]+]](%r15) -; CHECK-O0: lgr %r2, %r{{.*}} -; CHECK-O0: st %r3, [[OFFS2:[0-9]+]](%r15) +; CHECK-O0-DAG: stg %r2, [[OFFS1:[0-9]+]](%r15) +; CHECK-O0-DAG: st %r3, [[OFFS2:[0-9]+]](%r15) +; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc -; CHECK-O0: lgr {{.*}}, %r2 -; CHECK-O0: mvi 8(%r2), 1 +; CHECK-O0-DAG: lgr %r[[REG3:[0-9]+]], %r2 +; CHECK-O0-DAG: mvi 8(%r[[REG3]]), 1 ; CHECK-O0-DAG: lg %r[[REG1:[0-9]+]], [[OFFS1]](%r15) ; CHECK-O0-DAG: l %r[[REG2:[0-9]+]], [[OFFS2]](%r15) ; CHECK-O0: st %r[[REG2]], 4(%r[[REG1]]) -; CHECK-O0: lgr %r9, {{.*}} +; CHECK-O0: lgr %r9, %r2 ; CHECK-O0: br %r14 entry: %call = call i8* @malloc(i64 16) @@ -255,8 +252,6 @@ ; CHECK-O0-LABEL: caller3: ; CHECK-O0: lghi %r9, 0 ; CHECK-O0: lhi %r3, 1 -; CHECK-O0: stg %r2, {{.*}}(%r15) -; CHECK-O0: lgr %r2, {{.*}} ; CHECK-O0: brasl %r14, foo_sret ; CHECK-O0: lgr {{.*}}, %r9 ; CHECK-O0: cghi %r9, 0 Index: test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll =================================================================== --- test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll +++ test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll @@ -1,5 +1,5 @@ ; radr://6772169 -; RUN: llc < %s -fast-isel +; RUN: llc < %s -O0 -fast-isel ; PR30981 ; RUN: llc < %s -O0 -mcpu=x86-64 -mattr=+avx512f | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" @@ -14,9 +14,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: addl $0, %eax -; CHECK-NEXT: seto %cl -; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill -; CHECK-NEXT: movb %cl, -{{[0-9]+}}(%rsp) ## 1-byte Spill +; CHECK-NEXT: seto %al ; CHECK-NEXT: jo LBB0_2 %tmp1 = call %0 @llvm.sadd.with.overflow.i32(i32 1, i32 0) %tmp2 = extractvalue %0 %tmp1, 1 Index: test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll =================================================================== --- test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll +++ test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll @@ -14,21 +14,21 @@ ; ; CHECK-LABEL: @test_bitcast -; Load the value of the function pointer: %loaded_ptr -; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %arg2. ; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]] +; Load the value of the function pointer: %loaded_ptr +; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %loaded_ptr. ; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]] ; Perform the indirect call. -; Load the first argument -; CHECK: movq [[ARG2_SLOT]], %rdi -; Load the second argument -; CHECK: movq [[ARG2_SLOT]], %rsi ; Load the third argument -; CHECK: movq [[ARG2_SLOT]], %rdx +; CHECK-DAG: movq [[ARG2_SLOT]], %rdx +; Load the first argument +; CHECK-DAG: movq %rdx, %rdi +; Load the 2nd argument +; CHECK-DAG: movq %rdx, %rsi ; Load the function pointer. -; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] +; CHECK-DAG: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] ; Call. ; CHECK: callq *[[FCT_PTR]] ; CHECK: ret @@ -53,21 +53,21 @@ } ; CHECK-LABEL: @test_inttoptr -; Load the value of the function pointer: %loaded_ptr -; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %arg2. ; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]] +; Load the value of the function pointer: %loaded_ptr +; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %loaded_ptr. ; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]] ; Perform the indirect call. -; Load the first argument -; CHECK: movq [[ARG2_SLOT]], %rdi -; Load the second argument -; CHECK: movq [[ARG2_SLOT]], %rsi ; Load the third argument -; CHECK: movq [[ARG2_SLOT]], %rdx +; CHECK-DAG: movq [[ARG2_SLOT]], %rdx +; Load the second argument +; CHECK-DAG: movq %rdx, %rdi +; Load the first argument +; CHECK-DAG: movq %rdx, %rsi ; Load the function pointer. -; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] +; CHECK-DAG: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] ; Call. ; CHECK: callq *[[FCT_PTR]] ; CHECK: ret @@ -92,21 +92,21 @@ } ; CHECK-LABEL: @test_ptrtoint -; Load the value of the function pointer: %loaded_ptr -; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %arg2. ; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]] +; Load the value of the function pointer: %loaded_ptr +; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %loaded_ptr. ; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]] ; Perform the indirect call. +; Load the third argument +; CHECK-DAG: movq [[ARG2_SLOT]], %rdx ; Load the first argument -; CHECK: movq [[ARG2_SLOT]], %rdi +; CHECK-DAG: movq %rdx, %rdi ; Load the second argument -; CHECK: movq [[ARG2_SLOT]], %rsi -; Load the third argument -; CHECK: movq [[ARG2_SLOT]], %rdx +; CHECK-DAG: movq %rdx, %rsi ; Load the function pointer. -; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] +; CHECK-DAG: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] ; Call. ; CHECK: callq *[[FCT_PTR]] ; CHECK: ret Index: test/CodeGen/X86/atomic32.ll =================================================================== --- test/CodeGen/X86/atomic32.ll +++ test/CodeGen/X86/atomic32.ll @@ -68,11 +68,10 @@ ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: sete %cl +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB2_2 ; X64-NEXT: jmp .LBB2_1 ; X64-NEXT: .LBB2_2: # %atomicrmw.end @@ -92,11 +91,10 @@ ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %dl -; X86-NEXT: testb $1, %dl -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: sete %cl ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testb $1, %cl +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB2_2 ; X86-NEXT: jmp .LBB2_1 ; X86-NEXT: .LBB2_2: # %atomicrmw.end @@ -122,11 +120,10 @@ ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: orl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: sete %cl +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB3_2 ; X64-NEXT: jmp .LBB3_1 ; X64-NEXT: .LBB3_2: # %atomicrmw.end @@ -146,11 +143,10 @@ ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %dl -; X86-NEXT: testb $1, %dl -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: sete %cl ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testb $1, %cl +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB3_2 ; X86-NEXT: jmp .LBB3_1 ; X86-NEXT: .LBB3_2: # %atomicrmw.end @@ -176,11 +172,10 @@ ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: xorl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: sete %cl +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB4_2 ; X64-NEXT: jmp .LBB4_1 ; X64-NEXT: .LBB4_2: # %atomicrmw.end @@ -200,11 +195,10 @@ ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %dl -; X86-NEXT: testb $1, %dl -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: sete %cl ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testb $1, %cl +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB4_2 ; X86-NEXT: jmp .LBB4_1 ; X86-NEXT: .LBB4_2: # %atomicrmw.end @@ -221,19 +215,19 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_nand32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB5_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload +; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl %edx, %ecx ; X64-NEXT: notl %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB5_2 ; X64-NEXT: jmp .LBB5_1 @@ -242,28 +236,26 @@ ; ; X86-LABEL: atomic_fetch_nand32: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl sc32, %ecx +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl sc32, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-NEXT: .LBB5_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: andl %edx, %ecx ; X86-NEXT: notl %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %bl -; X86-NEXT: testb $1, %bl -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: sete %cl +; X86-NEXT: testb $1, %cl +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB5_2 ; X86-NEXT: jmp .LBB5_1 ; X86-NEXT: .LBB5_2: # %atomicrmw.end ; X86-NEXT: addl $8, %esp -; X86-NEXT: popl %ebx ; X86-NEXT: retl %t1 = atomicrmw nand i32* @sc32, i32 %x acquire ret void @@ -272,20 +264,19 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_max32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB6_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmovgel %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: cmovgel %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB6_2 ; X64-NEXT: jmp .LBB6_1 @@ -294,50 +285,43 @@ ; ; X86-CMOV-LABEL: atomic_fetch_max32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx -; X86-CMOV-NEXT: subl $12, %esp +; X86-CMOV-NEXT: subl $8, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx +; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB6_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmovgel %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: cmovgel %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB6_2 ; X86-CMOV-NEXT: jmp .LBB6_1 ; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end -; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx +; X86-CMOV-NEXT: addl $8, %esp ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_max32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $24, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB6_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jge .LBB6_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1 @@ -345,21 +329,16 @@ ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB6_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB6_2 ; X86-NOCMOV-NEXT: jmp .LBB6_1 ; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $24, %esp -; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl %t1 = atomicrmw max i32* @sc32, i32 %x acquire ret void @@ -368,20 +347,19 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_min32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB7_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmovlel %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: cmovlel %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB7_2 ; X64-NEXT: jmp .LBB7_1 @@ -390,50 +368,43 @@ ; ; X86-CMOV-LABEL: atomic_fetch_min32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx -; X86-CMOV-NEXT: subl $12, %esp +; X86-CMOV-NEXT: subl $8, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx +; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB7_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmovlel %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: cmovlel %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB7_2 ; X86-CMOV-NEXT: jmp .LBB7_1 ; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end -; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx +; X86-CMOV-NEXT: addl $8, %esp ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_min32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $24, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB7_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jle .LBB7_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1 @@ -441,21 +412,16 @@ ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB7_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB7_2 ; X86-NOCMOV-NEXT: jmp .LBB7_1 ; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $24, %esp -; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl %t1 = atomicrmw min i32* @sc32, i32 %x acquire ret void @@ -464,20 +430,19 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_umax32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB8_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmoval %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: cmoval %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB8_2 ; X64-NEXT: jmp .LBB8_1 @@ -486,50 +451,43 @@ ; ; X86-CMOV-LABEL: atomic_fetch_umax32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx -; X86-CMOV-NEXT: subl $12, %esp +; X86-CMOV-NEXT: subl $8, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx +; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB8_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmoval %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: cmoval %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB8_2 ; X86-CMOV-NEXT: jmp .LBB8_1 ; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end -; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx +; X86-CMOV-NEXT: addl $8, %esp ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_umax32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $24, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB8_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: ja .LBB8_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1 @@ -537,21 +495,16 @@ ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB8_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB8_2 ; X86-NOCMOV-NEXT: jmp .LBB8_1 ; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $24, %esp -; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl %t1 = atomicrmw umax i32* @sc32, i32 %x acquire ret void @@ -560,20 +513,19 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_umin32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB9_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmovbel %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: cmovbel %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB9_2 ; X64-NEXT: jmp .LBB9_1 @@ -582,50 +534,43 @@ ; ; X86-CMOV-LABEL: atomic_fetch_umin32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx -; X86-CMOV-NEXT: subl $12, %esp +; X86-CMOV-NEXT: subl $8, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx +; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB9_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmovbel %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: cmovbel %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB9_2 ; X86-CMOV-NEXT: jmp .LBB9_1 ; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end -; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx +; X86-CMOV-NEXT: addl $8, %esp ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_umin32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $24, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB9_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jbe .LBB9_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1 @@ -633,21 +578,16 @@ ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB9_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB9_2 ; X86-NOCMOV-NEXT: jmp .LBB9_1 ; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $24, %esp -; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl %t1 = atomicrmw umin i32* @sc32, i32 %x acquire ret void @@ -659,17 +599,13 @@ ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: movl $1, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: retq ; ; X86-LABEL: atomic_fetch_cmpxchg32: ; X86: # %bb.0: -; X86-NEXT: pushl %eax ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: movl $1, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: popl %eax ; X86-NEXT: retl %t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire acquire ret void @@ -694,16 +630,12 @@ ; X64-LABEL: atomic_fetch_swap32: ; X64: # %bb.0: ; X64-NEXT: xchgl %edi, {{.*}}(%rip) -; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: retq ; ; X86-LABEL: atomic_fetch_swap32: ; X86: # %bb.0: -; X86-NEXT: pushl %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: xchgl %eax, sc32 -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: popl %eax ; X86-NEXT: retl %t1 = atomicrmw xchg i32* @sc32, i32 %x acquire ret void Index: test/CodeGen/X86/avx-load-store.ll =================================================================== --- test/CodeGen/X86/avx-load-store.ll +++ test/CodeGen/X86/avx-load-store.ll @@ -13,15 +13,15 @@ ; CHECK-NEXT: movq %rsi, %r15 ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: vmovaps (%rdi), %ymm0 -; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps (%rsi), %ymm1 -; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps (%rdx), %ymm2 ; CHECK-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill ; CHECK-NEXT: callq dummy -; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vmovaps %ymm0, (%rbx) -; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vmovaps %ymm0, (%r15) ; CHECK-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; CHECK-NEXT: vmovaps %ymm0, (%r14) @@ -34,27 +34,27 @@ ; ; CHECK_O0-LABEL: test_256_load: ; CHECK_O0: # %bb.0: # %entry -; CHECK_O0-NEXT: subq $152, %rsp +; CHECK_O0-NEXT: subq $184, %rsp +; CHECK_O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK_O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK_O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK_O0-NEXT: vmovapd (%rdi), %ymm0 +; CHECK_O0-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill ; CHECK_O0-NEXT: vmovaps (%rsi), %ymm1 +; CHECK_O0-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK_O0-NEXT: vmovdqa (%rdx), %ymm2 -; CHECK_O0-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill -; CHECK_O0-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill -; CHECK_O0-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) # 32-byte Spill -; CHECK_O0-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK_O0-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill -; CHECK_O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill +; CHECK_O0-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK_O0-NEXT: callq dummy -; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx # 8-byte Reload -; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; CHECK_O0-NEXT: vmovapd %ymm0, (%rdx) -; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 # 32-byte Reload +; CHECK_O0-NEXT: vmovups (%rsp), %ymm2 # 32-byte Reload +; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; CHECK_O0-NEXT: vmovapd %ymm2, (%rdi) ; CHECK_O0-NEXT: vmovaps %ymm1, (%rsi) -; CHECK_O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; CHECK_O0-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2 # 32-byte Reload -; CHECK_O0-NEXT: vmovdqa %ymm2, (%rdi) -; CHECK_O0-NEXT: addq $152, %rsp +; CHECK_O0-NEXT: vmovdqa %ymm0, (%rdx) +; CHECK_O0-NEXT: addq $184, %rsp ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq entry: @@ -84,12 +84,12 @@ ; ; CHECK_O0-LABEL: mov00: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK_O0-NEXT: # implicit-def: $ymm1 -; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1 +; CHECK_O0-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK_O0-NEXT: # implicit-def: $ymm0 ; CHECK_O0-NEXT: vmovaps %xmm1, %xmm0 -; CHECK_O0-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK_O0-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1 +; CHECK_O0-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK_O0-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; CHECK_O0-NEXT: # kill: def $ymm0 killed $xmm0 ; CHECK_O0-NEXT: retq %val = load float, float* %ptr @@ -105,12 +105,12 @@ ; ; CHECK_O0-LABEL: mov01: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK_O0-NEXT: # implicit-def: $ymm1 -; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1 +; CHECK_O0-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK_O0-NEXT: # implicit-def: $ymm0 ; CHECK_O0-NEXT: vmovaps %xmm1, %xmm0 -; CHECK_O0-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK_O0-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] +; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1 +; CHECK_O0-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK_O0-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; CHECK_O0-NEXT: # kill: def $ymm0 killed $xmm0 ; CHECK_O0-NEXT: retq %val = load double, double* %ptr @@ -183,10 +183,11 @@ ; ; CHECK_O0-LABEL: double_save: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 -; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) +; CHECK_O0-NEXT: # implicit-def: $ymm0 +; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0 +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> @@ -228,10 +229,10 @@ ; CHECK_O0-NEXT: .LBB8_3: # %cif_mixed_test_all ; CHECK_O0-NEXT: movl $-1, %eax ; CHECK_O0-NEXT: vmovd %eax, %xmm0 -; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1 -; CHECK_O0-NEXT: # implicit-def: $rcx -; CHECK_O0-NEXT: # implicit-def: $ymm2 -; CHECK_O0-NEXT: vmaskmovps %ymm2, %ymm1, (%rcx) +; CHECK_O0-NEXT: # kill: def $ymm0 killed $xmm0 +; CHECK_O0-NEXT: # implicit-def: $rax +; CHECK_O0-NEXT: # implicit-def: $ymm1 +; CHECK_O0-NEXT: vmaskmovps %ymm1, %ymm0, (%rax) ; CHECK_O0-NEXT: .LBB8_4: # %cif_mixed_test_any_check allocas: br i1 undef, label %cif_mask_all, label %cif_mask_mixed @@ -261,12 +262,12 @@ ; ; CHECK_O0-LABEL: add8i32: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: vmovdqu (%rsi), %xmm0 +; CHECK_O0-NEXT: vmovdqu (%rsi), %xmm2 ; CHECK_O0-NEXT: vmovdqu 16(%rsi), %xmm1 -; CHECK_O0-NEXT: # implicit-def: $ymm2 -; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 -; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) +; CHECK_O0-NEXT: # implicit-def: $ymm0 +; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0 +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %b = load <8 x i32>, <8 x i32>* %bp, align 1 @@ -306,12 +307,12 @@ ; ; CHECK_O0-LABEL: add4i64a16: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: vmovdqa (%rsi), %xmm0 +; CHECK_O0-NEXT: vmovdqa (%rsi), %xmm2 ; CHECK_O0-NEXT: vmovdqa 16(%rsi), %xmm1 -; CHECK_O0-NEXT: # implicit-def: $ymm2 -; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 -; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) +; CHECK_O0-NEXT: # implicit-def: $ymm0 +; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0 +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %b = load <4 x i64>, <4 x i64>* %bp, align 16 Index: test/CodeGen/X86/avx512-mask-zext-bugfix.ll =================================================================== --- test/CodeGen/X86/avx512-mask-zext-bugfix.ll +++ test/CodeGen/X86/avx512-mask-zext-bugfix.ll @@ -19,47 +19,43 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: subq $56, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; CHECK-NEXT: vpmovw2m %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; CHECK-NEXT: movl $2, %esi -; CHECK-NEXT: movl $8, %eax -; CHECK-NEXT: movq %rdx, %rdi -; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill -; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill +; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: callq _calc_expected_mask_val -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: movw %dx, %r8w -; CHECK-NEXT: movzwl %r8w, %esi -; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload +; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload +; CHECK-NEXT: ## kill: def $eax killed $eax killed $rax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax +; CHECK-NEXT: movzwl %ax, %esi ; CHECK-NEXT: kmovb %k0, %edi -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload ; CHECK-NEXT: callq _check_mask16 -; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 ## 16-byte Reload +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload ; CHECK-NEXT: vpmovd2m %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %k1 -; CHECK-NEXT: kmovd %k0, %esi -; CHECK-NEXT: movb %sil, %r9b -; CHECK-NEXT: movzbl %r9b, %esi -; CHECK-NEXT: movw %si, %r8w -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi ## 8-byte Reload -; CHECK-NEXT: movl $4, %esi -; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) ## 4-byte Spill -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx ## 4-byte Reload -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill -; CHECK-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill -; CHECK-NEXT: movw %r8w, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK-NEXT: ## kill: def $k1 killed $k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: movl $4, %edx +; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: callq _calc_expected_mask_val -; CHECK-NEXT: movw %ax, %r8w -; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload -; CHECK-NEXT: movzwl %r10w, %edi -; CHECK-NEXT: movzwl %r8w, %esi -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %si ## 2-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload +; CHECK-NEXT: ## kill: def $ax killed $ax killed $rax +; CHECK-NEXT: movzwl %si, %edi +; CHECK-NEXT: movzwl %ax, %esi ; CHECK-NEXT: callq _check_mask16 -; CHECK-NEXT: movl %eax, (%rsp) ## 4-byte Spill ; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: retq %d2 = bitcast <2 x i64> %a to <8 x i16> Index: test/CodeGen/X86/crash-O0.ll =================================================================== --- test/CodeGen/X86/crash-O0.ll +++ test/CodeGen/X86/crash-O0.ll @@ -15,25 +15,28 @@ ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movb %al, %cl -; CHECK-NEXT: ## implicit-def: $rdx -; CHECK-NEXT: movb %dl, %sil -; CHECK-NEXT: movzbw %cl, %ax -; CHECK-NEXT: divb %sil +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill -; CHECK-NEXT: movzbw %cl, %ax -; CHECK-NEXT: divb %sil +; CHECK-NEXT: ## implicit-def: $rcx +; CHECK-NEXT: ## kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: movzbw %al, %ax +; CHECK-NEXT: divb %cl +; CHECK-NEXT: movb %al, %dl +; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al ## 1-byte Reload +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movzbw %al, %ax +; CHECK-NEXT: divb %cl ; CHECK-NEXT: shrw $8, %ax -; CHECK-NEXT: movb %al, %cl -; CHECK-NEXT: cmpb %sil, %cl +; CHECK-NEXT: ## kill: def $al killed $al killed $ax +; CHECK-NEXT: cmpb %cl, %al ; CHECK-NEXT: jae LBB0_2 ; CHECK-NEXT: ## %bb.1: ## %"39" ; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al ## 1-byte Reload -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: ## implicit-def: $edx -; CHECK-NEXT: imull %edx, %ecx -; CHECK-NEXT: addl %edx, %ecx -; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: ## implicit-def: $ecx +; CHECK-NEXT: imull %ecx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: cmpl %ecx, %eax ; CHECK-NEXT: je LBB0_3 ; CHECK-NEXT: LBB0_2: ## %"40" ; CHECK-NEXT: ud2 Index: test/CodeGen/X86/extend-set-cc-uses-dbg.ll =================================================================== --- test/CodeGen/X86/extend-set-cc-uses-dbg.ll +++ test/CodeGen/X86/extend-set-cc-uses-dbg.ll @@ -7,8 +7,8 @@ bb: %tmp = load i32, i32* %p, align 4, !dbg !7 ; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load 4 from %ir.p) - ; CHECK-NEXT: $edi = MOV32rr killed $eax, implicit-def $rdi, debug-location !7 - ; CHECK-NEXT: $rcx = MOV64rr $rdi, debug-location !7 + ; CHECK-NEXT: renamable $rax = KILL killed renamable $eax, debug-location !7 + ; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -8, $noreg, $rax switch i32 %tmp, label %bb7 [ i32 0, label %bb1 Index: test/CodeGen/X86/fast-isel-nontemporal.ll =================================================================== --- test/CodeGen/X86/fast-isel-nontemporal.ll +++ test/CodeGen/X86/fast-isel-nontemporal.ll @@ -546,11 +546,11 @@ ; ; AVX1-LABEL: test_load_nt8xfloat: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xfloat: @@ -588,11 +588,11 @@ ; ; AVX1-LABEL: test_load_nt4xdouble: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xdouble: @@ -630,11 +630,11 @@ ; ; AVX1-LABEL: test_load_nt32xi8: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi8: @@ -672,11 +672,11 @@ ; ; AVX1-LABEL: test_load_nt16xi16: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi16: @@ -714,11 +714,11 @@ ; ; AVX1-LABEL: test_load_nt8xi32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi32: @@ -756,11 +756,11 @@ ; ; AVX1-LABEL: test_load_nt4xi64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xi64: @@ -996,11 +996,11 @@ ; ; AVX1-LABEL: test_load_nt16xfloat: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 ; AVX1-NEXT: vmovaps %xmm2, %xmm1 @@ -1050,11 +1050,11 @@ ; ; AVX1-LABEL: test_load_nt8xdouble: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 ; AVX1-NEXT: vmovaps %xmm2, %xmm1 @@ -1104,11 +1104,11 @@ ; ; AVX1-LABEL: test_load_nt64xi8: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 ; AVX1-NEXT: vmovaps %xmm2, %xmm1 @@ -1170,11 +1170,11 @@ ; ; AVX1-LABEL: test_load_nt32xi16: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 ; AVX1-NEXT: vmovaps %xmm2, %xmm1 @@ -1236,11 +1236,11 @@ ; ; AVX1-LABEL: test_load_nt16xi32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 ; AVX1-NEXT: vmovaps %xmm2, %xmm1 @@ -1290,11 +1290,11 @@ ; ; AVX1-LABEL: test_load_nt8xi64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 ; AVX1-NEXT: vmovaps %xmm2, %xmm1 Index: test/CodeGen/X86/fast-isel-select.ll =================================================================== --- test/CodeGen/X86/fast-isel-select.ll +++ test/CodeGen/X86/fast-isel-select.ll @@ -9,14 +9,13 @@ define i32 @fastisel_select(i1 %exchSub2211_, i1 %trunc_8766) { ; CHECK-LABEL: fastisel_select: ; CHECK: ## %bb.0: -; CHECK-NEXT: movb %sil, %al +; CHECK-NEXT: movb %sil, %dl ; CHECK-NEXT: movb %dil, %cl -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: subb %al, %cl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: subb %dl, %cl ; CHECK-NEXT: testb $1, %cl -; CHECK-NEXT: movl $1204476887, %edi ## imm = 0x47CADBD7 -; CHECK-NEXT: cmovnel %edi, %esi -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl $1204476887, %ecx ## imm = 0x47CADBD7 +; CHECK-NEXT: cmovnel %ecx, %eax ; CHECK-NEXT: retq %shuffleInternal15257_8932 = sub i1 %exchSub2211_, %trunc_8766 %counter_diff1345 = select i1 %shuffleInternal15257_8932, i32 1204476887, i32 0 Index: test/CodeGen/X86/phys-reg-local-regalloc.ll =================================================================== --- test/CodeGen/X86/phys-reg-local-regalloc.ll +++ test/CodeGen/X86/phys-reg-local-regalloc.ll @@ -22,7 +22,6 @@ ; ATOM: movl 24(%esp), %eax ; ATOM-NOT: movl ; ATOM: movl %eax, 36(%esp) -; ATOM: movl ; ATOM: movl 28(%esp), %ebx ; ATOM-NOT: movl ; ATOM: movl %ebx, 40(%esp) Index: test/CodeGen/X86/pr11415.ll =================================================================== --- test/CodeGen/X86/pr11415.ll +++ test/CodeGen/X86/pr11415.ll @@ -6,13 +6,10 @@ ; CHECK: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: movq %rax, -8(%rsp) -; CHECK-NEXT: movq -8(%rsp), %rdx +; CHECK-NEXT: movq %rcx, %rdx ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: movq %rdx, -8(%rsp) +; CHECK-NEXT: movq %rcx, %rax ; CHECK-NEXT: ret define i64 @foo() { Index: test/CodeGen/X86/pr30430.ll =================================================================== --- test/CodeGen/X86/pr30430.ll +++ test/CodeGen/X86/pr30430.ll @@ -10,15 +10,15 @@ ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-64, %rsp -; CHECK-NEXT: subq $320, %rsp # imm = 0x140 +; CHECK-NEXT: subq $256, %rsp # imm = 0x100 +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) @@ -27,103 +27,77 @@ ; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm14, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm16, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm17, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm18, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm19, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm20, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm21, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm22, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm23, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovaps %zmm21, %zmm0 -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vmovaps %zmm20, %zmm0 -; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; CHECK-NEXT: vmovaps %zmm22, %zmm1 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] -; CHECK-NEXT: vmovaps %zmm23, %zmm1 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] -; CHECK-NEXT: vmovaps %zmm17, %zmm1 -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vmovaps %zmm16, %zmm0 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; CHECK-NEXT: vmovaps %zmm18, %zmm1 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] -; CHECK-NEXT: vmovaps %zmm19, %zmm1 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm9 = xmm9[0],xmm15[0],xmm9[2,3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm9 = xmm9[0,1],xmm14[0],xmm9[3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm9 = xmm9[0,1,2],xmm13[0] +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm12[0],xmm1[2,3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm11[0],xmm1[3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm10 = xmm1[0,1,2],xmm10[0] ; CHECK-NEXT: # implicit-def: $ymm1 -; CHECK-NEXT: vmovaps %xmm0, %xmm1 -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm4[0],xmm5[0],xmm4[2,3] -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm6[0],xmm0[3] -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm7[0] -; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Reload -; CHECK-NEXT: # xmm4 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 4-byte Reload -; CHECK-NEXT: # xmm5 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[2,3] -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1],xmm2[0],xmm4[3] -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] -; CHECK-NEXT: # implicit-def: $ymm3 -; CHECK-NEXT: vmovaps %xmm2, %xmm3 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3 -; CHECK-NEXT: # implicit-def: $zmm24 -; CHECK-NEXT: vmovaps %zmm3, %zmm24 -; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm24, %zmm24 -; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovaps %xmm10, %xmm1 +; CHECK-NEXT: vinsertf128 $1, %xmm9, %ymm1, %ymm1 +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[2,3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm7[0],xmm2[3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm6[0] +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[2,3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm4[0],xmm0[3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm0[0,1,2],xmm3[0] +; CHECK-NEXT: # implicit-def: $ymm0 +; CHECK-NEXT: vmovaps %xmm3, %xmm0 +; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2 +; CHECK-NEXT: # implicit-def: $zmm0 +; CHECK-NEXT: vmovaps %ymm2, %ymm0 +; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 -; CHECK-NEXT: vmovss %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovss %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovss %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovss %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovss %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovss %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovss %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: vmovss %xmm14, (%rsp) # 4-byte Spill ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 Index: test/CodeGen/X86/pr32241.ll =================================================================== --- test/CodeGen/X86/pr32241.ll +++ test/CodeGen/X86/pr32241.ll @@ -10,41 +10,41 @@ ; CHECK-NEXT: movw $-15498, {{[0-9]+}}(%esp) # imm = 0xC376 ; CHECK-NEXT: movw $19417, {{[0-9]+}}(%esp) # imm = 0x4BD9 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: cmpw $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: movb $1, %cl -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) # 1-byte Spill +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: # %bb.1: # %lor.rhs ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movb %al, %cl -; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) # 1-byte Spill +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: jmp .LBB0_2 ; CHECK-NEXT: .LBB0_2: # %lor.end -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al # 1-byte Reload -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload -; CHECK-NEXT: cmpl %ecx, %edx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: cmpl %ecx, %eax ; CHECK-NEXT: setl %al ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: xorl $-1, %ecx -; CHECK-NEXT: cmpl $0, %ecx +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: xorl $-1, %eax +; CHECK-NEXT: cmpl $0, %eax ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) # 1-byte Spill +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: jne .LBB0_4 ; CHECK-NEXT: # %bb.3: # %lor.rhs4 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movb %al, %cl -; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) # 1-byte Spill +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: jmp .LBB0_4 ; CHECK-NEXT: .LBB0_4: # %lor.end5 -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al # 1-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: movw %cx, %dx -; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 4 Index: test/CodeGen/X86/pr32256.ll =================================================================== --- test/CodeGen/X86/pr32256.ll +++ test/CodeGen/X86/pr32256.ll @@ -10,17 +10,17 @@ ; CHECK-NEXT: subl $2, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 6 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movb %al, %cl -; CHECK-NEXT: movb c, %dl -; CHECK-NEXT: xorb $-1, %dl -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: movb %cl, (%esp) # 1-byte Spill +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: movb c, %cl +; CHECK-NEXT: xorb $-1, %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: movb %al, (%esp) # 1-byte Spill ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: jmp .LBB0_2 ; CHECK-NEXT: .LBB0_1: # %land.rhs ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movb %al, %cl -; CHECK-NEXT: movb %cl, (%esp) # 1-byte Spill +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: movb %al, (%esp) # 1-byte Spill ; CHECK-NEXT: jmp .LBB0_2 ; CHECK-NEXT: .LBB0_2: # %land.end ; CHECK-NEXT: movb (%esp), %al # 1-byte Reload Index: test/CodeGen/X86/pr32284.ll =================================================================== --- test/CodeGen/X86/pr32284.ll +++ test/CodeGen/X86/pr32284.ll @@ -10,28 +10,28 @@ ; X86-O0-LABEL: foo: ; X86-O0: # %bb.0: # %entry ; X86-O0-NEXT: xorl %eax, %eax -; X86-O0-NEXT: movl %eax, %ecx -; X86-O0-NEXT: xorl %eax, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax +; X86-O0-NEXT: xorl %ecx, %ecx ; X86-O0-NEXT: movzbl c, %edx -; X86-O0-NEXT: subl %edx, %eax -; X86-O0-NEXT: movslq %eax, %rsi -; X86-O0-NEXT: subq %rsi, %rcx -; X86-O0-NEXT: movb %cl, %dil -; X86-O0-NEXT: cmpb $0, %dil -; X86-O0-NEXT: setne %dil -; X86-O0-NEXT: andb $1, %dil -; X86-O0-NEXT: movb %dil, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: subl %edx, %ecx +; X86-O0-NEXT: movslq %ecx, %rcx +; X86-O0-NEXT: subq %rcx, %rax +; X86-O0-NEXT: # kill: def $al killed $al killed $rax +; X86-O0-NEXT: cmpb $0, %al +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: cmpb $0, c -; X86-O0-NEXT: setne %dil -; X86-O0-NEXT: xorb $-1, %dil -; X86-O0-NEXT: xorb $-1, %dil -; X86-O0-NEXT: andb $1, %dil -; X86-O0-NEXT: movzbl %dil, %eax -; X86-O0-NEXT: movzbl c, %edx -; X86-O0-NEXT: cmpl %edx, %eax -; X86-O0-NEXT: setle %dil -; X86-O0-NEXT: andb $1, %dil -; X86-O0-NEXT: movzbl %dil, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: movzbl c, %ecx +; X86-O0-NEXT: cmpl %ecx, %eax +; X86-O0-NEXT: setle %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax ; X86-O0-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: retq ; @@ -63,13 +63,13 @@ ; 686-O0-NEXT: xorb $-1, %al ; 686-O0-NEXT: xorb $-1, %al ; 686-O0-NEXT: andb $1, %al -; 686-O0-NEXT: movzbl %al, %ecx -; 686-O0-NEXT: movzbl c, %edx -; 686-O0-NEXT: cmpl %edx, %ecx +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movzbl c, %ecx +; 686-O0-NEXT: cmpl %ecx, %eax ; 686-O0-NEXT: setle %al ; 686-O0-NEXT: andb $1, %al -; 686-O0-NEXT: movzbl %al, %ecx -; 686-O0-NEXT: movl %ecx, (%esp) +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movl %eax, (%esp) ; 686-O0-NEXT: addl $8, %esp ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl @@ -127,33 +127,33 @@ ; X86-O0-NEXT: movabsq $8381627093, %rcx # imm = 0x1F3957AD5 ; X86-O0-NEXT: addq %rcx, %rax ; X86-O0-NEXT: cmpq $0, %rax -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; X86-O0-NEXT: movl var_5, %esi -; X86-O0-NEXT: xorl $-1, %esi -; X86-O0-NEXT: cmpl $0, %esi -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: movl var_5, %eax +; X86-O0-NEXT: xorl $-1, %eax +; X86-O0-NEXT: cmpl $0, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: movslq var_5, %rcx ; X86-O0-NEXT: addq $7093, %rcx # imm = 0x1BB5 ; X86-O0-NEXT: cmpq %rcx, %rax -; X86-O0-NEXT: setg %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: setg %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: movq %rax, var_57 -; X86-O0-NEXT: movl var_5, %esi -; X86-O0-NEXT: xorl $-1, %esi -; X86-O0-NEXT: cmpl $0, %esi -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: movl var_5, %eax +; X86-O0-NEXT: xorl $-1, %eax +; X86-O0-NEXT: cmpl $0, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: movq %rax, _ZN8struct_210member_2_0E ; X86-O0-NEXT: retq ; @@ -179,62 +179,37 @@ ; ; 686-O0-LABEL: f1: ; 686-O0: # %bb.0: # %entry -; 686-O0-NEXT: pushl %ebp -; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: pushl %ebx -; 686-O0-NEXT: .cfi_def_cfa_offset 12 -; 686-O0-NEXT: pushl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 16 -; 686-O0-NEXT: pushl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 20 -; 686-O0-NEXT: subl $24, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 44 -; 686-O0-NEXT: .cfi_offset %esi, -20 -; 686-O0-NEXT: .cfi_offset %edi, -16 -; 686-O0-NEXT: .cfi_offset %ebx, -12 -; 686-O0-NEXT: .cfi_offset %ebp, -8 +; 686-O0-NEXT: subl $1, %esp +; 686-O0-NEXT: .cfi_def_cfa_offset 5 ; 686-O0-NEXT: movl var_5, %eax ; 686-O0-NEXT: movl %eax, %ecx ; 686-O0-NEXT: sarl $31, %ecx ; 686-O0-NEXT: xorl $208307499, %eax # imm = 0xC6A852B ; 686-O0-NEXT: xorl $-2, %ecx ; 686-O0-NEXT: orl %ecx, %eax -; 686-O0-NEXT: setne {{[0-9]+}}(%esp) +; 686-O0-NEXT: setne (%esp) ; 686-O0-NEXT: movl var_5, %ecx +; 686-O0-NEXT: movl %ecx, %eax +; 686-O0-NEXT: sarl $31, %eax ; 686-O0-NEXT: movl %ecx, %edx -; 686-O0-NEXT: sarl $31, %edx -; 686-O0-NEXT: movl %ecx, %esi -; 686-O0-NEXT: subl $-1, %esi -; 686-O0-NEXT: sete %bl -; 686-O0-NEXT: movzbl %bl, %edi +; 686-O0-NEXT: subl $-1, %edx +; 686-O0-NEXT: sete %dl +; 686-O0-NEXT: movzbl %dl, %edx ; 686-O0-NEXT: addl $7093, %ecx # imm = 0x1BB5 -; 686-O0-NEXT: adcl $0, %edx -; 686-O0-NEXT: subl %edi, %ecx -; 686-O0-NEXT: sbbl $0, %edx -; 686-O0-NEXT: setl %bl -; 686-O0-NEXT: movzbl %bl, %edi -; 686-O0-NEXT: movl %edi, var_57 +; 686-O0-NEXT: adcl $0, %eax +; 686-O0-NEXT: subl %edx, %ecx +; 686-O0-NEXT: sbbl $0, %eax +; 686-O0-NEXT: setl %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movl %eax, var_57 ; 686-O0-NEXT: movl $0, var_57+4 -; 686-O0-NEXT: movl var_5, %edi -; 686-O0-NEXT: subl $-1, %edi -; 686-O0-NEXT: sete %bl -; 686-O0-NEXT: movzbl %bl, %ebp -; 686-O0-NEXT: movl %ebp, _ZN8struct_210member_2_0E +; 686-O0-NEXT: movl var_5, %eax +; 686-O0-NEXT: subl $-1, %eax +; 686-O0-NEXT: sete %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movl %eax, _ZN8struct_210member_2_0E ; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4 -; 686-O0-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; 686-O0-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; 686-O0-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; 686-O0-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; 686-O0-NEXT: movl %edi, (%esp) # 4-byte Spill -; 686-O0-NEXT: addl $24, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 20 -; 686-O0-NEXT: popl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 16 -; 686-O0-NEXT: popl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 12 -; 686-O0-NEXT: popl %ebx -; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: popl %ebp +; 686-O0-NEXT: addl $1, %esp ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl ; @@ -316,25 +291,25 @@ ; X86-O0-NEXT: setne %cl ; X86-O0-NEXT: xorb $-1, %cl ; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %edx -; X86-O0-NEXT: xorl %edx, %eax -; X86-O0-NEXT: movw %ax, %si -; X86-O0-NEXT: movw %si, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: movzbl %cl, %ecx +; X86-O0-NEXT: xorl %ecx, %eax +; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax +; X86-O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: movzbl var_7, %eax -; X86-O0-NEXT: movw %ax, %si -; X86-O0-NEXT: cmpw $0, %si -; X86-O0-NEXT: setne %cl -; X86-O0-NEXT: xorb $-1, %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %eax -; X86-O0-NEXT: movzbl var_7, %edx -; X86-O0-NEXT: cmpl %edx, %eax -; X86-O0-NEXT: sete %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %eax -; X86-O0-NEXT: movw %ax, %si -; X86-O0-NEXT: # implicit-def: $rdi -; X86-O0-NEXT: movw %si, (%rdi) +; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax +; X86-O0-NEXT: cmpw $0, %ax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: movzbl var_7, %ecx +; X86-O0-NEXT: cmpl %ecx, %eax +; X86-O0-NEXT: sete %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: movw %ax, %cx +; X86-O0-NEXT: # implicit-def: $rax +; X86-O0-NEXT: movw %cx, (%rax) ; X86-O0-NEXT: retq ; ; X64-LABEL: f2: @@ -356,38 +331,33 @@ ; ; 686-O0-LABEL: f2: ; 686-O0: # %bb.0: # %entry -; 686-O0-NEXT: pushl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 8 ; 686-O0-NEXT: subl $2, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 10 -; 686-O0-NEXT: .cfi_offset %esi, -8 +; 686-O0-NEXT: .cfi_def_cfa_offset 6 ; 686-O0-NEXT: movzbl var_7, %eax ; 686-O0-NEXT: cmpb $0, var_7 ; 686-O0-NEXT: setne %cl ; 686-O0-NEXT: xorb $-1, %cl ; 686-O0-NEXT: andb $1, %cl -; 686-O0-NEXT: movzbl %cl, %edx -; 686-O0-NEXT: xorl %edx, %eax -; 686-O0-NEXT: movw %ax, %si -; 686-O0-NEXT: movw %si, (%esp) +; 686-O0-NEXT: movzbl %cl, %ecx +; 686-O0-NEXT: xorl %ecx, %eax +; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax +; 686-O0-NEXT: movw %ax, (%esp) ; 686-O0-NEXT: movzbl var_7, %eax -; 686-O0-NEXT: movw %ax, %si -; 686-O0-NEXT: cmpw $0, %si -; 686-O0-NEXT: setne %cl -; 686-O0-NEXT: xorb $-1, %cl -; 686-O0-NEXT: andb $1, %cl -; 686-O0-NEXT: movzbl %cl, %eax -; 686-O0-NEXT: movzbl var_7, %edx -; 686-O0-NEXT: cmpl %edx, %eax -; 686-O0-NEXT: sete %cl -; 686-O0-NEXT: andb $1, %cl -; 686-O0-NEXT: movzbl %cl, %eax -; 686-O0-NEXT: movw %ax, %si +; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax +; 686-O0-NEXT: cmpw $0, %ax +; 686-O0-NEXT: setne %al +; 686-O0-NEXT: xorb $-1, %al +; 686-O0-NEXT: andb $1, %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movzbl var_7, %ecx +; 686-O0-NEXT: cmpl %ecx, %eax +; 686-O0-NEXT: sete %al +; 686-O0-NEXT: andb $1, %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movw %ax, %cx ; 686-O0-NEXT: # implicit-def: $eax -; 686-O0-NEXT: movw %si, (%eax) +; 686-O0-NEXT: movw %cx, (%eax) ; 686-O0-NEXT: addl $2, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: popl %esi ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl ; @@ -447,34 +417,34 @@ ; X86-O0-NEXT: movl var_13, %eax ; X86-O0-NEXT: xorl $-1, %eax ; X86-O0-NEXT: movl %eax, %eax -; X86-O0-NEXT: movl %eax, %ecx +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: cmpl $0, var_13 -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %eax -; X86-O0-NEXT: movl %eax, %esi -; X86-O0-NEXT: movl var_13, %eax -; X86-O0-NEXT: xorl $-1, %eax -; X86-O0-NEXT: xorl var_16, %eax -; X86-O0-NEXT: movl %eax, %eax -; X86-O0-NEXT: movl %eax, %edi -; X86-O0-NEXT: andq %rdi, %rsi -; X86-O0-NEXT: orq %rsi, %rcx -; X86-O0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: setne %cl +; X86-O0-NEXT: xorb $-1, %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %ecx +; X86-O0-NEXT: # kill: def $rcx killed $ecx +; X86-O0-NEXT: movl var_13, %edx +; X86-O0-NEXT: xorl $-1, %edx +; X86-O0-NEXT: xorl var_16, %edx +; X86-O0-NEXT: movl %edx, %edx +; X86-O0-NEXT: # kill: def $rdx killed $edx +; X86-O0-NEXT: andq %rdx, %rcx +; X86-O0-NEXT: orq %rcx, %rax +; X86-O0-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: movl var_13, %eax ; X86-O0-NEXT: xorl $-1, %eax ; X86-O0-NEXT: movl %eax, %eax -; X86-O0-NEXT: movl %eax, %ecx +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: cmpl $0, var_13 -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %eax -; X86-O0-NEXT: movl %eax, %esi -; X86-O0-NEXT: andq $0, %rsi -; X86-O0-NEXT: orq %rsi, %rcx -; X86-O0-NEXT: movl %ecx, %eax +; X86-O0-NEXT: setne %cl +; X86-O0-NEXT: xorb $-1, %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %ecx +; X86-O0-NEXT: # kill: def $rcx killed $ecx +; X86-O0-NEXT: andq $0, %rcx +; X86-O0-NEXT: orq %rcx, %rax +; X86-O0-NEXT: # kill: def $eax killed $eax killed $rax ; X86-O0-NEXT: movl %eax, var_46 ; X86-O0-NEXT: retq ; @@ -500,31 +470,28 @@ ; 686-O0-NEXT: .cfi_offset %ebp, -8 ; 686-O0-NEXT: movl %esp, %ebp ; 686-O0-NEXT: .cfi_def_cfa_register %ebp -; 686-O0-NEXT: pushl %edi ; 686-O0-NEXT: pushl %esi ; 686-O0-NEXT: andl $-8, %esp -; 686-O0-NEXT: subl $8, %esp -; 686-O0-NEXT: .cfi_offset %esi, -16 -; 686-O0-NEXT: .cfi_offset %edi, -12 -; 686-O0-NEXT: movl var_13, %eax -; 686-O0-NEXT: movl %eax, %ecx -; 686-O0-NEXT: notl %ecx -; 686-O0-NEXT: testl %eax, %eax -; 686-O0-NEXT: sete %dl -; 686-O0-NEXT: movzbl %dl, %eax +; 686-O0-NEXT: subl $16, %esp +; 686-O0-NEXT: .cfi_offset %esi, -12 +; 686-O0-NEXT: movl var_13, %ecx +; 686-O0-NEXT: movl %ecx, %eax +; 686-O0-NEXT: notl %eax +; 686-O0-NEXT: testl %ecx, %ecx +; 686-O0-NEXT: sete %cl +; 686-O0-NEXT: movzbl %cl, %ecx ; 686-O0-NEXT: movl var_16, %esi -; 686-O0-NEXT: movl %ecx, %edi -; 686-O0-NEXT: xorl %esi, %edi -; 686-O0-NEXT: andl %edi, %eax -; 686-O0-NEXT: orl %eax, %ecx -; 686-O0-NEXT: movl %ecx, (%esp) +; 686-O0-NEXT: movl %eax, %edx +; 686-O0-NEXT: xorl %esi, %edx +; 686-O0-NEXT: andl %edx, %ecx +; 686-O0-NEXT: orl %ecx, %eax +; 686-O0-NEXT: movl %eax, (%esp) ; 686-O0-NEXT: movl $0, {{[0-9]+}}(%esp) ; 686-O0-NEXT: movl var_13, %eax ; 686-O0-NEXT: notl %eax ; 686-O0-NEXT: movl %eax, var_46 -; 686-O0-NEXT: leal -8(%ebp), %esp +; 686-O0-NEXT: leal -4(%ebp), %esp ; 686-O0-NEXT: popl %esi -; 686-O0-NEXT: popl %edi ; 686-O0-NEXT: popl %ebp ; 686-O0-NEXT: .cfi_def_cfa %esp, 4 ; 686-O0-NEXT: retl Index: test/CodeGen/X86/pr32340.ll =================================================================== --- test/CodeGen/X86/pr32340.ll +++ test/CodeGen/X86/pr32340.ll @@ -14,39 +14,39 @@ ; X64-LABEL: foo: ; X64: # %bb.0: # %entry ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: # kill: def $rax killed $eax ; X64-NEXT: movw $0, var_825 -; X64-NEXT: movzwl var_32, %eax +; X64-NEXT: movzwl var_32, %edx +; X64-NEXT: movzwl var_901, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: xorl %ecx, %esi +; X64-NEXT: movl %edx, %ecx +; X64-NEXT: xorl %esi, %ecx +; X64-NEXT: addl %edx, %ecx +; X64-NEXT: movslq %ecx, %rcx +; X64-NEXT: movq %rcx, var_826 +; X64-NEXT: movzwl var_32, %ecx +; X64-NEXT: # kill: def $rcx killed $ecx ; X64-NEXT: movzwl var_901, %edx -; X64-NEXT: movl %eax, %esi -; X64-NEXT: xorl %edx, %esi -; X64-NEXT: movl %eax, %edx -; X64-NEXT: xorl %esi, %edx -; X64-NEXT: addl %eax, %edx -; X64-NEXT: movslq %edx, %rdi -; X64-NEXT: movq %rdi, var_826 -; X64-NEXT: movzwl var_32, %eax -; X64-NEXT: movl %eax, %edi -; X64-NEXT: movzwl var_901, %eax -; X64-NEXT: xorl $51981, %eax # imm = 0xCB0D -; X64-NEXT: movslq %eax, %r8 -; X64-NEXT: movabsq $-1142377792914660288, %r9 # imm = 0xF02575732E06E440 -; X64-NEXT: xorq %r9, %r8 -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: xorq %r8, %r9 -; X64-NEXT: xorq $-1, %r9 -; X64-NEXT: xorq %r9, %rdi -; X64-NEXT: movq %rdi, %r8 -; X64-NEXT: orq var_57, %r8 -; X64-NEXT: orq %r8, %rdi -; X64-NEXT: movw %di, %r10w -; X64-NEXT: movw %r10w, var_900 -; X64-NEXT: cmpq var_28, %rcx -; X64-NEXT: setne %r11b -; X64-NEXT: andb $1, %r11b -; X64-NEXT: movzbl %r11b, %eax -; X64-NEXT: movw %ax, %r10w -; X64-NEXT: movw %r10w, var_827 +; X64-NEXT: xorl $51981, %edx # imm = 0xCB0D +; X64-NEXT: movslq %edx, %rsi +; X64-NEXT: movabsq $-1142377792914660288, %rdx # imm = 0xF02575732E06E440 +; X64-NEXT: xorq %rdx, %rsi +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: xorq %rsi, %rdx +; X64-NEXT: xorq $-1, %rdx +; X64-NEXT: xorq %rdx, %rcx +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: orq var_57, %rdx +; X64-NEXT: orq %rdx, %rcx +; X64-NEXT: # kill: def $cx killed $cx killed $rcx +; X64-NEXT: movw %cx, var_900 +; X64-NEXT: cmpq var_28, %rax +; X64-NEXT: setne %al +; X64-NEXT: andb $1, %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: movw %ax, var_827 ; X64-NEXT: retq entry: store i16 0, i16* @var_825, align 2 Index: test/CodeGen/X86/pr32345.ll =================================================================== --- test/CodeGen/X86/pr32345.ll +++ test/CodeGen/X86/pr32345.ll @@ -15,23 +15,23 @@ ; X640-NEXT: xorl %ecx, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax -; X640-NEXT: movslq %eax, %rdx -; X640-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; X640-NEXT: cltq +; X640-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; X640-NEXT: movzwl var_22, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax -; X640-NEXT: movslq %eax, %rdx -; X640-NEXT: movzwl var_27, %eax -; X640-NEXT: subl $16610, %eax # imm = 0x40E2 -; X640-NEXT: movl %eax, %eax -; X640-NEXT: movl %eax, %ecx +; X640-NEXT: cltq +; X640-NEXT: movzwl var_27, %ecx +; X640-NEXT: subl $16610, %ecx # imm = 0x40E2 +; X640-NEXT: movl %ecx, %ecx +; X640-NEXT: # kill: def $rcx killed $ecx ; X640-NEXT: # kill: def $cl killed $rcx -; X640-NEXT: sarq %cl, %rdx -; X640-NEXT: movb %dl, %cl -; X640-NEXT: # implicit-def: $rdx -; X640-NEXT: movb %cl, (%rdx) +; X640-NEXT: sarq %cl, %rax +; X640-NEXT: movb %al, %cl +; X640-NEXT: # implicit-def: $rax +; X640-NEXT: movb %cl, (%rax) ; X640-NEXT: retq ; ; 6860-LABEL: foo: @@ -41,44 +41,37 @@ ; 6860-NEXT: .cfi_offset %ebp, -8 ; 6860-NEXT: movl %esp, %ebp ; 6860-NEXT: .cfi_def_cfa_register %ebp -; 6860-NEXT: pushl %ebx -; 6860-NEXT: pushl %edi -; 6860-NEXT: pushl %esi ; 6860-NEXT: andl $-8, %esp -; 6860-NEXT: subl $32, %esp -; 6860-NEXT: .cfi_offset %esi, -20 -; 6860-NEXT: .cfi_offset %edi, -16 -; 6860-NEXT: .cfi_offset %ebx, -12 -; 6860-NEXT: movw var_22, %ax +; 6860-NEXT: subl $24, %esp +; 6860-NEXT: movw var_22, %dx ; 6860-NEXT: movzwl var_27, %ecx -; 6860-NEXT: movw %cx, %dx -; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $esi -; 6860-NEXT: movw %ax, %si -; 6860-NEXT: xorl %ecx, %esi -; 6860-NEXT: movw %si, %ax -; 6860-NEXT: movzwl %ax, %ecx -; 6860-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; 6860-NEXT: movw %cx, %ax +; 6860-NEXT: xorw %ax, %dx +; 6860-NEXT: # implicit-def: $eax +; 6860-NEXT: movw %dx, %ax +; 6860-NEXT: xorl %ecx, %eax +; 6860-NEXT: # kill: def $ax killed $ax killed $eax +; 6860-NEXT: movzwl %ax, %eax +; 6860-NEXT: movl %eax, {{[0-9]+}}(%esp) ; 6860-NEXT: movl $0, {{[0-9]+}}(%esp) -; 6860-NEXT: movw var_22, %ax -; 6860-NEXT: movzwl var_27, %ecx -; 6860-NEXT: movw %cx, %dx -; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $esi -; 6860-NEXT: movw %ax, %si -; 6860-NEXT: xorl %ecx, %esi -; 6860-NEXT: movw %si, %ax -; 6860-NEXT: movzwl %ax, %esi -; 6860-NEXT: addl $-16610, %ecx # imm = 0xBF1E -; 6860-NEXT: movb %cl, %bl -; 6860-NEXT: xorl %ecx, %ecx -; 6860-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; 6860-NEXT: movb %bl, %cl -; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; 6860-NEXT: shrdl %cl, %edi, %esi -; 6860-NEXT: testb $32, %bl -; 6860-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; 6860-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; 6860-NEXT: movw var_22, %dx +; 6860-NEXT: movzwl var_27, %eax +; 6860-NEXT: movw %ax, %cx +; 6860-NEXT: xorw %cx, %dx +; 6860-NEXT: # implicit-def: $ecx +; 6860-NEXT: movw %dx, %cx +; 6860-NEXT: xorl %eax, %ecx +; 6860-NEXT: # kill: def $cx killed $cx killed $ecx +; 6860-NEXT: movzwl %cx, %edx +; 6860-NEXT: addl $-16610, %eax # imm = 0xBF1E +; 6860-NEXT: movb %al, %cl +; 6860-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; 6860-NEXT: xorl %eax, %eax +; 6860-NEXT: shrdl %cl, %eax, %edx +; 6860-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; 6860-NEXT: testb $32, %cl +; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: jne .LBB0_2 ; 6860-NEXT: # %bb.1: # %bb ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -88,10 +81,7 @@ ; 6860-NEXT: movb %al, %cl ; 6860-NEXT: # implicit-def: $eax ; 6860-NEXT: movb %cl, (%eax) -; 6860-NEXT: leal -12(%ebp), %esp -; 6860-NEXT: popl %esi -; 6860-NEXT: popl %edi -; 6860-NEXT: popl %ebx +; 6860-NEXT: movl %ebp, %esp ; 6860-NEXT: popl %ebp ; 6860-NEXT: .cfi_def_cfa %esp, 4 ; 6860-NEXT: retl Index: test/CodeGen/X86/pr32451.ll =================================================================== --- test/CodeGen/X86/pr32451.ll +++ test/CodeGen/X86/pr32451.ll @@ -9,29 +9,24 @@ define i8** @japi1_convert_690(i8**, i8***, i32) { ; CHECK-LABEL: japi1_convert_690: ; CHECK: # %bb.0: # %top -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll julia.gc_root_decl -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll jl_get_ptls_states -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload -; CHECK-NEXT: movl 4(%ecx), %edx -; CHECK-NEXT: movb (%edx), %bl -; CHECK-NEXT: andb $1, %bl -; CHECK-NEXT: movzbl %bl, %edx -; CHECK-NEXT: movl %edx, (%esp) -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: # kill: def $ecx killed $eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movl 4(%eax), %eax +; CHECK-NEXT: movb (%eax), %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: movl %eax, (%esp) ; CHECK-NEXT: calll jl_box_int32 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %eax, (%ecx) -; CHECK-NEXT: addl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: popl %ebx +; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl top: Index: test/CodeGen/X86/pr32484.ll =================================================================== --- test/CodeGen/X86/pr32484.ll +++ test/CodeGen/X86/pr32484.ll @@ -8,10 +8,9 @@ ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 ; CHECK-NEXT: # implicit-def: $rax -; CHECK-NEXT: movdqu %xmm1, (%rax) -; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-NEXT: movdqu %xmm0, (%rax) ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: retq indirectbr i8* undef, [label %9, label %1] Index: test/CodeGen/X86/pr34592.ll =================================================================== --- test/CodeGen/X86/pr34592.ll +++ test/CodeGen/X86/pr34592.ll @@ -10,55 +10,47 @@ ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-32, %rsp -; CHECK-NEXT: subq $352, %rsp # imm = 0x160 -; CHECK-NEXT: vmovaps 240(%rbp), %ymm8 -; CHECK-NEXT: vmovaps 208(%rbp), %ymm9 -; CHECK-NEXT: vmovaps 176(%rbp), %ymm10 -; CHECK-NEXT: vmovaps 144(%rbp), %ymm11 -; CHECK-NEXT: vmovaps 112(%rbp), %ymm12 -; CHECK-NEXT: vmovaps 80(%rbp), %ymm13 -; CHECK-NEXT: vmovaps 48(%rbp), %ymm14 -; CHECK-NEXT: vmovaps 16(%rbp), %ymm15 -; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7] -; CHECK-NEXT: vmovaps %xmm9, %xmm6 -; CHECK-NEXT: vmovdqa %xmm6, %xmm9 -; CHECK-NEXT: # kill: def $ymm9 killed $xmm9 -; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: # implicit-def: $ymm0 -; CHECK-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23] -; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[2,3,2,0] -; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm11[0,1,2,3],ymm0[4,5],ymm11[6,7] -; CHECK-NEXT: vmovaps %xmm2, %xmm6 -; CHECK-NEXT: # implicit-def: $ymm2 -; CHECK-NEXT: vinserti128 $1, %xmm6, %ymm2, %ymm2 -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload -; CHECK-NEXT: vpblendd {{.*#+}} ymm11 = ymm7[0,1],ymm11[2,3],ymm7[4,5],ymm11[6,7] -; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[2,1,2,3] -; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm11[0,1,2,3],ymm2[4,5,6,7] -; CHECK-NEXT: vmovaps %xmm7, %xmm6 -; CHECK-NEXT: vpslldq {{.*#+}} xmm6 = zero,zero,zero,zero,zero,zero,zero,zero,xmm6[0,1,2,3,4,5,6,7] -; CHECK-NEXT: # implicit-def: $ymm11 -; CHECK-NEXT: vmovaps %xmm6, %xmm11 -; CHECK-NEXT: vpalignr {{.*#+}} ymm9 = ymm9[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm9[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] -; CHECK-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,1,0,3] -; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm11[0,1,2,3],ymm9[4,5,6,7] -; CHECK-NEXT: vpblendd {{.*#+}} ymm7 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7] -; CHECK-NEXT: vpermq {{.*#+}} ymm7 = ymm7[2,1,1,3] -; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5] -; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm7[0,1,2,3,4,5],ymm5[6,7] -; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm5, %ymm1 -; CHECK-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm9, %ymm3 -; CHECK-NEXT: vmovaps %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm13, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: vmovaps %ymm4, %ymm10 +; CHECK-NEXT: vmovaps %ymm3, %ymm9 +; CHECK-NEXT: vmovaps %ymm2, %ymm3 +; CHECK-NEXT: vmovaps %ymm1, %ymm8 +; CHECK-NEXT: vmovaps %ymm0, %ymm4 +; CHECK-NEXT: vmovaps 240(%rbp), %ymm1 +; CHECK-NEXT: vmovaps 208(%rbp), %ymm2 +; CHECK-NEXT: vmovaps 176(%rbp), %ymm0 +; CHECK-NEXT: vmovaps 144(%rbp), %ymm0 +; CHECK-NEXT: vmovaps 112(%rbp), %ymm11 +; CHECK-NEXT: vmovaps 80(%rbp), %ymm11 +; CHECK-NEXT: vmovaps 48(%rbp), %ymm11 +; CHECK-NEXT: vmovaps 16(%rbp), %ymm11 +; CHECK-NEXT: vpblendd {{.*#+}} ymm3 = ymm6[0,1,2,3,4,5],ymm3[6,7] +; CHECK-NEXT: vmovaps %xmm2, %xmm8 +; CHECK-NEXT: vmovdqa %xmm8, %xmm2 +; CHECK-NEXT: vmovaps %xmm2, %xmm4 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: # implicit-def: $ymm6 +; CHECK-NEXT: vinserti128 $1, %xmm8, %ymm6, %ymm6 +; CHECK-NEXT: vpalignr {{.*#+}} ymm0 = ymm3[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm3[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,0] +; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm6[4,5],ymm0[6,7] +; CHECK-NEXT: vmovaps %xmm3, %xmm6 +; CHECK-NEXT: # implicit-def: $ymm3 +; CHECK-NEXT: vinserti128 $1, %xmm6, %ymm3, %ymm3 +; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm7[0,1],ymm2[2,3],ymm7[4,5],ymm2[6,7] +; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,1,2,3] +; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7] +; CHECK-NEXT: vmovaps %xmm7, %xmm3 +; CHECK-NEXT: vpslldq {{.*#+}} xmm6 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] +; CHECK-NEXT: # implicit-def: $ymm3 +; CHECK-NEXT: vmovaps %xmm6, %xmm3 +; CHECK-NEXT: vpalignr {{.*#+}} ymm4 = ymm4[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm4[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,1,0,3] +; CHECK-NEXT: vpblendd {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm4[4,5,6,7] +; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm7[0,1],ymm1[2,3],ymm7[4,5,6,7] +; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,1,3] +; CHECK-NEXT: vpshufd {{.*#+}} ymm4 = ymm5[0,1,0,1,4,5,4,5] +; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm4[6,7] ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 Index: test/CodeGen/X86/pr34653.ll =================================================================== --- test/CodeGen/X86/pr34653.ll +++ test/CodeGen/X86/pr34653.ll @@ -17,151 +17,98 @@ ; CHECK-NEXT: callq test ; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: vmovaps %xmm0, %xmm1 +; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm2 -; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3 -; CHECK-NEXT: vmovaps %xmm3, %xmm4 -; CHECK-NEXT: vmovaps %xmm2, %xmm5 -; CHECK-NEXT: vmovaps %xmm5, %xmm6 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm7 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm8 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm9 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm10 -; CHECK-NEXT: vextractf32x4 $3, %zmm10, %xmm11 -; CHECK-NEXT: vmovaps %xmm11, %xmm12 -; CHECK-NEXT: vextractf32x4 $2, %zmm10, %xmm13 -; CHECK-NEXT: vmovaps %xmm13, %xmm14 -; CHECK-NEXT: vmovaps %xmm10, %xmm15 -; CHECK-NEXT: vmovaps %zmm15, %zmm16 -; CHECK-NEXT: vextractf32x4 $3, %zmm9, %xmm2 -; CHECK-NEXT: vmovaps %zmm2, %zmm17 ; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vextractf32x4 $2, %zmm9, %xmm0 -; CHECK-NEXT: vmovaps %zmm0, %zmm18 -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vmovaps %xmm9, %xmm0 -; CHECK-NEXT: vmovaps %zmm0, %zmm19 -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vextractf32x4 $3, %zmm8, %xmm0 -; CHECK-NEXT: vmovaps %zmm0, %zmm20 -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vextractf32x4 $2, %zmm8, %xmm0 -; CHECK-NEXT: vmovaps %zmm0, %zmm21 -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vmovaps %xmm8, %xmm0 -; CHECK-NEXT: vmovaps %zmm0, %zmm22 -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vextractf32x4 $3, %zmm7, %xmm0 -; CHECK-NEXT: vmovaps %zmm0, %zmm23 -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vextractf32x4 $2, %zmm7, %xmm0 -; CHECK-NEXT: vmovaps %zmm0, %zmm24 -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vmovaps %xmm7, %xmm0 -; CHECK-NEXT: vmovaps %zmm0, %zmm25 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm11 = xmm11[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm13 = xmm13[1,0] -; CHECK-NEXT: # kill: def $ymm10 killed $ymm10 killed $zmm10 -; CHECK-NEXT: vextractf128 $1, %ymm10, %xmm10 -; CHECK-NEXT: vmovaps %zmm10, %zmm26 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm15 = xmm15[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: # kill: def $ymm9 killed $ymm9 killed $zmm9 -; CHECK-NEXT: vextractf128 $1, %ymm9, %xmm9 +; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm1 +; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm0 +; CHECK-NEXT: vmovaps %xmm0, %xmm2 +; CHECK-NEXT: vmovsd %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 killed $ymm1 +; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm4 +; CHECK-NEXT: vmovaps %zmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm1 +; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm2 +; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm3 +; CHECK-NEXT: vextractf32x4 $3, %zmm3, %xmm15 +; CHECK-NEXT: vmovaps %xmm15, %xmm5 +; CHECK-NEXT: vmovsd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vextractf32x4 $2, %zmm3, %xmm14 +; CHECK-NEXT: vmovaps %xmm14, %xmm5 +; CHECK-NEXT: vmovsd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovaps %xmm3, %xmm13 +; CHECK-NEXT: vmovaps %zmm13, %zmm31 +; CHECK-NEXT: vextractf32x4 $3, %zmm2, %xmm12 +; CHECK-NEXT: vmovaps %zmm12, %zmm30 +; CHECK-NEXT: vextractf32x4 $2, %zmm2, %xmm11 +; CHECK-NEXT: vmovaps %zmm11, %zmm29 +; CHECK-NEXT: vmovaps %xmm2, %xmm10 +; CHECK-NEXT: vmovaps %zmm10, %zmm28 +; CHECK-NEXT: vextractf32x4 $3, %zmm1, %xmm9 ; CHECK-NEXT: vmovaps %zmm9, %zmm27 -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: # kill: def $ymm8 killed $ymm8 killed $zmm8 -; CHECK-NEXT: vextractf128 $1, %ymm8, %xmm8 -; CHECK-NEXT: vmovaps %zmm8, %zmm28 -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm8 +; CHECK-NEXT: vmovaps %zmm8, %zmm26 +; CHECK-NEXT: vmovaps %xmm1, %xmm7 +; CHECK-NEXT: vmovaps %zmm7, %zmm25 +; CHECK-NEXT: vextractf32x4 $3, %zmm4, %xmm6 +; CHECK-NEXT: vmovaps %zmm6, %zmm24 +; CHECK-NEXT: vextractf32x4 $2, %zmm4, %xmm5 +; CHECK-NEXT: vmovaps %zmm5, %zmm23 +; CHECK-NEXT: vmovaps %zmm4, %zmm22 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovaps %zmm0, %zmm21 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: # kill: def $ymm7 killed $ymm7 killed $zmm7 -; CHECK-NEXT: vextractf128 $1, %ymm7, %xmm7 -; CHECK-NEXT: vmovaps %zmm7, %zmm29 -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: vmovaps %zmm0, %zmm20 +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; CHECK-NEXT: vpermilpd {{.*#+}} xmm15 = xmm15[1,0] +; CHECK-NEXT: vmovaps %zmm15, %zmm19 +; CHECK-NEXT: vpermilpd {{.*#+}} xmm14 = xmm14[1,0] +; CHECK-NEXT: vmovaps %zmm14, %zmm18 +; CHECK-NEXT: # kill: def $ymm3 killed $ymm3 killed $zmm3 +; CHECK-NEXT: vextractf128 $1, %ymm3, %xmm3 +; CHECK-NEXT: vmovaps %zmm3, %zmm17 +; CHECK-NEXT: vpermilpd {{.*#+}} xmm13 = xmm13[1,0] +; CHECK-NEXT: vmovaps %zmm13, %zmm16 +; CHECK-NEXT: vpermilpd {{.*#+}} xmm15 = xmm12[1,0] +; CHECK-NEXT: vpermilpd {{.*#+}} xmm14 = xmm11[1,0] +; CHECK-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 +; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm2 +; CHECK-NEXT: vmovaps %xmm2, %xmm13 +; CHECK-NEXT: vpermilpd {{.*#+}} xmm12 = xmm10[1,0] +; CHECK-NEXT: vpermilpd {{.*#+}} xmm11 = xmm9[1,0] +; CHECK-NEXT: vpermilpd {{.*#+}} xmm10 = xmm8[1,0] +; CHECK-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1 +; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1 +; CHECK-NEXT: vmovaps %xmm1, %xmm9 +; CHECK-NEXT: vpermilpd {{.*#+}} xmm8 = xmm7[1,0] +; CHECK-NEXT: vpermilpd {{.*#+}} xmm7 = xmm6[1,0] +; CHECK-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vmovaps %xmm0, %xmm5 +; CHECK-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] +; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm10 = xmm10[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm8 = xmm8[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm7 = xmm7[1,0] -; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm30 # 8-byte Reload -; CHECK-NEXT: # xmm30 = mem[0],zero -; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm31 # 8-byte Reload -; CHECK-NEXT: # xmm31 = mem[0],zero ; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm30, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm24, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm25, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm28, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm31, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm26, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm22, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm23, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm29, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd %xmm7, (%rsp) # 8-byte Spill ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 Index: test/CodeGen/X86/pr38865-2.ll =================================================================== --- test/CodeGen/X86/pr38865-2.ll +++ test/CodeGen/X86/pr38865-2.ll @@ -13,7 +13,8 @@ ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: callq _Z1bv -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq Index: test/CodeGen/X86/stack-protector-msvc.ll =================================================================== --- test/CodeGen/X86/stack-protector-msvc.ll +++ test/CodeGen/X86/stack-protector-msvc.ll @@ -48,9 +48,8 @@ ; MSVC-X86-O0: xorl %esp, %[[REG1]] ; MSVC-X86-O0: movl %[[REG1]], [[SLOT:[0-9]*]](%esp) ; MSVC-X86-O0: calll _strcpy -; MSVC-X86-O0: movl [[SLOT]](%esp), %[[REG1:[^ ]*]] -; MSVC-X86-O0: xorl %esp, %[[REG1]] -; MSVC-X86-O0: movl %[[REG1]], %ecx +; MSVC-X86-O0: movl [[SLOT]](%esp), %ecx +; MSVC-X86-O0: xorl %esp, %ecx ; MSVC-X86-O0: calll @__security_check_cookie@4 ; MSVC-X86-O0: retl @@ -59,9 +58,8 @@ ; MSVC-X64-O0: xorq %rsp, %[[REG1]] ; MSVC-X64-O0: movq %[[REG1]], [[SLOT:[0-9]*]](%rsp) ; MSVC-X64-O0: callq strcpy -; MSVC-X64-O0: movq [[SLOT]](%rsp), %[[REG1:[^ ]*]] -; MSVC-X64-O0: xorq %rsp, %[[REG1]] -; MSVC-X64-O0: movq %[[REG1]], %rcx +; MSVC-X64-O0: movq [[SLOT]](%rsp), %rcx +; MSVC-X64-O0: xorq %rsp, %rcx ; MSVC-X64-O0: callq __security_check_cookie ; MSVC-X64-O0: retq Index: test/CodeGen/X86/swift-return.ll =================================================================== --- test/CodeGen/X86/swift-return.ll +++ test/CodeGen/X86/swift-return.ll @@ -28,10 +28,10 @@ ; CHECK-O0-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-O0-NEXT: callq gen -; CHECK-O0-NEXT: movswl %ax, %edi +; CHECK-O0-NEXT: cwtl ; CHECK-O0-NEXT: movsbl %dl, %ecx -; CHECK-O0-NEXT: addl %ecx, %edi -; CHECK-O0-NEXT: movw %di, %ax +; CHECK-O0-NEXT: addl %ecx, %eax +; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-O0-NEXT: popq %rcx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -79,16 +79,15 @@ ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-O0-NEXT: movq %rsp, %rax ; CHECK-O0-NEXT: callq gen2 -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edx -; CHECK-O0-NEXT: movl (%rsp), %esi -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %r8d -; CHECK-O0-NEXT: addl %r8d, %esi -; CHECK-O0-NEXT: addl %edx, %esi -; CHECK-O0-NEXT: addl %ecx, %esi -; CHECK-O0-NEXT: addl %edi, %esi -; CHECK-O0-NEXT: movl %esi, %eax +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %esi +; CHECK-O0-NEXT: movl (%rsp), %eax +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-O0-NEXT: addl %edi, %eax +; CHECK-O0-NEXT: addl %esi, %eax +; CHECK-O0-NEXT: addl %edx, %eax +; CHECK-O0-NEXT: addl %ecx, %eax ; CHECK-O0-NEXT: addq $24, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -263,17 +262,17 @@ ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: callq produce_i1_ret ; CHECK-O0-NEXT: andb $1, %al -; CHECK-O0-NEXT: movzbl %al, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %al, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: andb $1, %dl -; CHECK-O0-NEXT: movzbl %dl, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %dl, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: andb $1, %cl -; CHECK-O0-NEXT: movzbl %cl, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %cl, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: andb $1, %r8b -; CHECK-O0-NEXT: movzbl %r8b, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %r8b, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: popq %rax ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -464,11 +463,10 @@ ; ; CHECK-O0-LABEL: gen9: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movb %dil, %al -; CHECK-O0-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %dl # 1-byte Reload -; CHECK-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload -; CHECK-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %r8b # 1-byte Reload +; CHECK-O0-NEXT: movb %dil, %r8b +; CHECK-O0-NEXT: movb %r8b, %al +; CHECK-O0-NEXT: movb %r8b, %dl +; CHECK-O0-NEXT: movb %r8b, %cl ; CHECK-O0-NEXT: retq %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0 %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1 @@ -490,13 +488,10 @@ ; ; CHECK-O0-LABEL: gen10: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload -; CHECK-O0-NEXT: # xmm1 = mem[0],zero -; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 8-byte Reload -; CHECK-O0-NEXT: # xmm2 = mem[0],zero -; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 8-byte Reload -; CHECK-O0-NEXT: # xmm3 = mem[0],zero +; CHECK-O0-NEXT: movaps %xmm0, %xmm3 +; CHECK-O0-NEXT: movaps %xmm3, %xmm0 +; CHECK-O0-NEXT: movaps %xmm3, %xmm1 +; CHECK-O0-NEXT: movaps %xmm3, %xmm2 ; CHECK-O0-NEXT: movq %rdi, %rax ; CHECK-O0-NEXT: movq %rdi, %rdx ; CHECK-O0-NEXT: movq %rdi, %rcx Index: test/CodeGen/X86/swifterror.ll =================================================================== --- test/CodeGen/X86/swifterror.ll +++ test/CodeGen/X86/swifterror.ll @@ -18,7 +18,8 @@ ; CHECK-O0-LABEL: foo: ; CHECK-O0: movl $16 ; CHECK-O0: malloc -; CHECK-O0: movb $1, 8(%rax) +; CHECK-O0: movq %rax, [[REG0:%[a-z]+]] +; CHECK-O0: movb $1, 8([[REG0]]) ; CHECK-O0: movq %{{.*}}, %r12 entry: %call = call i8* @malloc(i64 16) @@ -121,19 +122,18 @@ ; CHECK-APPLE: ret ; CHECK-O0-LABEL: foo_if: -; CHECK-O0: cmpl $0 ; spill to stack ; CHECK-O0: movq %r12, {{.*}}(%rsp) +; CHECK-O0: cmpl $0 ; CHECK-O0: je ; CHECK-O0: movl $16, ; CHECK-O0: malloc ; CHECK-O0: movq %rax, [[ID:%[a-z]+]] -; CHECK-O0-DAG: movb $1, 8(%rax) -; CHECK-O0-DAG: movq [[ID]], %r12 +; CHECK-O0-DAG: movb $1, 8([[ID]]) +; CHECK-O0-DAG: movq %rax, %r12 ; CHECK-O0: ret ; reload from stack -; CHECK-O0: movq {{.*}}(%rsp), [[REG:%[a-z]+]] -; CHECK-O0: movq [[REG]], %r12 +; CHECK-O0: movq {{.*}}(%rsp), %r12 ; CHECK-O0: ret entry: %cond = icmp ne i32 %cc, 0 @@ -177,8 +177,7 @@ ; CHECK-O0: movb $1, 8([[ID]]) ; CHECK-O0: jbe ; reload from stack -; CHECK-O0: movq {{.*}}(%rsp), [[REG:%[a-z0-9]+]] -; CHECK-O0: movq [[REG]], %r12 +; CHECK-O0: movq {{.*}}(%rsp), %r12 ; CHECK-O0: ret entry: br label %bb_loop @@ -218,16 +217,17 @@ ; CHECK-APPLE-NOT: x19 ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: movl $16, ; spill sret to stack -; CHECK-O0: movq %rdi, -; CHECK-O0: movq {{.*}}, %rdi +; CHECK-O0: movq %rdi, [[ID0:%[a-z]+]] +; CHECK-O0: movq [[ID0]], [[OFFSET:[0-9]*\(%rsp\)]] ; CHECK-O0: malloc -; CHECK-O0: movb $1, 8(%rax) -; CHECK-O0: movl %{{.*}}, 4(%{{.*}}) -; CHECK-O0: movq %{{.*}}, %r12 +; CHECK-O0-DAG: movq %rax, [[ID:%[a-z]+]] +; CHECK-O0-DAG: movb $1, 8([[ID]]) +; CHECK-O0-DAG: movl %{{.*}}, 4(%{{.*}}) +; CHECK-O0-DAG: movq %{{.*}}, %r12 ; reload sret from stack -; CHECK-O0: movq {{.*}}(%rsp), %rax +; CHECK-O0-DAG: movq [[OFFSET]], [[ID2:%[a-z]+]] +; CHECK-O0: movq [[ID2]], %rax ; CHECK-O0: ret entry: %call = call i8* @malloc(i64 16) @@ -255,9 +255,9 @@ ; CHECK-O0-LABEL: caller3: ; CHECK-O0: xorl -; CHECK-O0: movl {{.*}}, %r12d -; CHECK-O0: movl $1, %esi -; CHECK-O0: movq {{.*}}, %rdi +; CHECK-O0-DAG: movl {{.*}}, %r12d +; CHECK-O0-DAG: movl $1, %esi +; CHECK-O0-DAG: leaq {{.*}}, %rdi ; CHECK-O0: callq {{.*}}foo_sret ; CHECK-O0: movq %r12, ; CHECK-O0: cmpq $0 @@ -387,7 +387,8 @@ ; CHECK-O0-LABEL: foo_swiftcc: ; CHECK-O0: movl $16 ; CHECK-O0: malloc -; CHECK-O0: movb $1, 8(%rax) +; CHECK-O0: movq %rax, [[ID:%[a-z]+]] +; CHECK-O0: movb $1, 8([[ID]]) ; CHECK-O0: movq %{{.*}}, %r12 entry: %call = call i8* @malloc(i64 16) @@ -434,23 +435,16 @@ ; CHECK-APPLE: retq ; CHECK-O0-LABEL: conditionally_forward_swifterror: -; CHECK-O0: subq $24, %rsp -; CHECK-O0: movq %r12, [[REG1:%[a-z0-9]+]] +; CHECK-O0: movq %r12, [[STK:[0-9]*\(%rsp\)]] ; CHECK-O0: cmpl $0, %edi -; CHECK-O0-DAG: movq [[REG1]], [[STK:[0-9]+]](%rsp) -; CHECK-O0-DAG: movq %r12, [[STK2:[0-9]+]](%rsp) ; CHECK-O0: je -; CHECK-O0: movq [[STK2]](%rsp), [[REG:%[a-z0-9]+]] -; CHECK-O0: movq [[REG]], %r12 +; CHECK-O0: movq [[STK]], %r12 ; CHECK-O0: callq _moo -; CHECK-O0: addq $24, %rsp ; CHECK-O0: retq -; CHECK-O0: movq [[STK2]](%rsp), [[REG:%[a-z0-9]+]] +; CHECK-O0: movq [[STK]], %r12 ; CHECK-O0: xorps %xmm0, %xmm0 -; CHECK-O0: movq [[REG]], %r12 -; CHECK-O0: addq $24, %rsp ; CHECK-O0: retq entry: %cond = icmp ne i32 %cc, 0 @@ -745,12 +739,9 @@ } ; CHECK-O0-LABEL: testAssign2 -; CHECK-O0: movq %r12, {{.*}} ; CHECK-O0: movq %r12, [[SLOT:[-a-z0-9\(\)\%]*]] ; CHECK-O0: jmp -; CHECK-O0: movq [[SLOT]], %rax -; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT2]], %r12 +; CHECK-O0: movq [[SLOT]], %r12 ; CHECK-O0: retq ; CHECK-APPLE-LABEL: testAssign2 @@ -768,10 +759,7 @@ ; CHECK-O0-LABEL: testAssign3 ; CHECK-O0: callq _foo2 ; CHECK-O0: movq %r12, [[SLOT:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT]], %rax -; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT2]], %r12 -; CHECK-O0: addq $24, %rsp +; CHECK-O0: movq [[SLOT]], %r12 ; CHECK-O0: retq ; CHECK-APPLE-LABEL: testAssign3 @@ -792,12 +780,9 @@ ; CHECK-O0-LABEL: testAssign4 ; CHECK-O0: callq _foo2 -; CHECK-O0: xorl %ecx, %ecx -; CHECK-O0: movl %ecx, %eax +; CHECK-O0: xorl %eax, %eax ; CHECK-O0: movq %rax, [[SLOT:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT]], %rax -; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT2]], %r12 +; CHECK-O0: movq [[SLOT]], %r12 ; CHECK-O0: retq ; CHECK-APPLE-LABEL: testAssign4 Index: test/CodeGen/X86/win64_eh.ll =================================================================== --- test/CodeGen/X86/win64_eh.ll +++ test/CodeGen/X86/win64_eh.ll @@ -82,11 +82,11 @@ } ; WIN64-LABEL: foo3: ; WIN64: .seh_proc foo3 -; NORM: subq $24, %rsp -; ATOM: leaq -24(%rsp), %rsp -; WIN64: .seh_stackalloc 24 +; NORM: subq $16, %rsp +; ATOM: leaq -16(%rsp), %rsp +; WIN64: .seh_stackalloc 16 ; WIN64: .seh_endprologue -; WIN64: addq $24, %rsp +; WIN64: addq $16, %rsp ; WIN64: ret ; WIN64: .seh_endproc @@ -125,11 +125,11 @@ ; WIN64-LABEL: foo4: ; WIN64: .seh_proc foo4 ; WIN64: .seh_handler _d_eh_personality, @unwind, @except -; NORM: subq $56, %rsp -; ATOM: leaq -56(%rsp), %rsp -; WIN64: .seh_stackalloc 56 +; NORM: subq $40, %rsp +; ATOM: leaq -40(%rsp), %rsp +; WIN64: .seh_stackalloc 40 ; WIN64: .seh_endprologue -; WIN64: addq $56, %rsp +; WIN64: addq $40, %rsp ; WIN64: ret ; WIN64: .seh_handlerdata ; WIN64: .seh_endproc Index: test/CodeGen/X86/x86-32-intrcc.ll =================================================================== --- test/CodeGen/X86/x86-32-intrcc.ll +++ test/CodeGen/X86/x86-32-intrcc.ll @@ -40,9 +40,9 @@ ; CHECK0-LABEL: test_isr_ecode ; CHECK0: pushl %ecx ; CHECK0: pushl %eax - ; CHECK0: movl 8(%esp), %eax - ; CHECK0: leal 12(%esp), %ecx - ; CHECK0: movl 8(%ecx), %ecx + ; CHECK0: movl 8(%esp), %ecx + ; CHECK0: leal 12(%esp), %eax + ; CHECK0: movl 8(%eax), %eax ; CHECK0: popl %eax ; CHECK0: popl %ecx ; CHECK0: addl $4, %esp Index: test/CodeGen/X86/x86-64-intrcc.ll =================================================================== --- test/CodeGen/X86/x86-64-intrcc.ll +++ test/CodeGen/X86/x86-64-intrcc.ll @@ -42,9 +42,9 @@ ; CHECK0: pushq %rax ; CHECK0: pushq %rax ; CHECK0: pushq %rcx - ; CHECK0: movq 24(%rsp), %rax - ; CHECK0: leaq 32(%rsp), %rcx - ; CHECK0: movq 16(%rcx), %rcx + ; CHECK0: movq 24(%rsp), %rcx + ; CHECK0: leaq 32(%rsp), %rax + ; CHECK0: movq 16(%rax), %rax ; CHECK0: popq %rcx ; CHECK0: popq %rax ; CHECK0: addq $16, %rsp Index: test/DebugInfo/AArch64/frameindices.ll =================================================================== --- test/DebugInfo/AArch64/frameindices.ll +++ test/DebugInfo/AArch64/frameindices.ll @@ -5,7 +5,7 @@ ; CHECK: DW_TAG_inlined_subroutine ; CHECK: "_Z3f111A" ; CHECK: DW_TAG_formal_parameter -; CHECK: DW_AT_location [DW_FORM_block1] (DW_OP_piece 0x1, DW_OP_fbreg -47, DW_OP_piece 0xf, DW_OP_piece 0x1, DW_OP_fbreg -54, DW_OP_piece 0x7) +; CHECK: DW_AT_location [DW_FORM_block1] (DW_OP_piece 0x1, DW_OP_fbreg -47, DW_OP_piece 0xf, DW_OP_piece 0x1, DW_OP_breg31 WSP+42, DW_OP_piece 0x7) ; CHECK: DW_AT_abstract_origin {{.*}} "p1" ; ; long a; Index: test/DebugInfo/AArch64/prologue_end.ll =================================================================== --- test/DebugInfo/AArch64/prologue_end.ll +++ test/DebugInfo/AArch64/prologue_end.ll @@ -9,9 +9,8 @@ define void @prologue_end_test() nounwind uwtable !dbg !4 { ; CHECK: prologue_end_test: ; CHECK: .cfi_startproc - ; CHECK: sub sp, sp ; CHECK: stp x29, x30 - ; CHECK: add x29, sp + ; CHECK: mov x29, sp ; CHECK: .loc 1 3 3 prologue_end ; CHECK: bl _func ; CHECK: bl _func Index: test/DebugInfo/ARM/prologue_end.ll =================================================================== --- test/DebugInfo/ARM/prologue_end.ll +++ test/DebugInfo/ARM/prologue_end.ll @@ -11,7 +11,6 @@ ; CHECK: prologue_end_test: ; CHECK: push {r7, lr} ; CHECK: {{mov r7, sp|add r7, sp}} - ; CHECK: sub sp ; CHECK: .loc 1 3 3 prologue_end ; CHECK: bl {{_func|Ltmp}} ; CHECK: bl {{_func|Ltmp}} Index: test/DebugInfo/Mips/delay-slot.ll =================================================================== --- test/DebugInfo/Mips/delay-slot.ll +++ test/DebugInfo/Mips/delay-slot.ll @@ -14,10 +14,10 @@ ; CHECK: ------------------ ------ ------ ------ --- ------------- ------------- ; CHECK: 0x0000000000000000 1 0 1 0 0 is_stmt ; CHECK: 0x0000000000000004 2 0 1 0 0 is_stmt prologue_end -; CHECK: 0x0000000000000024 3 0 1 0 0 is_stmt -; CHECK: 0x0000000000000034 4 0 1 0 0 is_stmt +; CHECK: 0x0000000000000020 3 0 1 0 0 is_stmt +; CHECK: 0x0000000000000030 4 0 1 0 0 is_stmt ; CHECK: 0x0000000000000048 5 0 1 0 0 is_stmt -; CHECK: 0x0000000000000058 5 0 1 0 0 is_stmt end_sequence +; CHECK: 0x0000000000000050 5 0 1 0 0 is_stmt end_sequence target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64" Index: test/DebugInfo/Mips/prologue_end.ll =================================================================== --- test/DebugInfo/Mips/prologue_end.ll +++ test/DebugInfo/Mips/prologue_end.ll @@ -30,7 +30,7 @@ ; PIC: addiu $[[R0]], $[[R0]], %lo(_gp_disp) ; PIC: addiu $sp, $sp, -{{[0-9]+}} ; PIC: sw $ra, {{[0-9]+}}($sp) -; PIC: addu $[[R1:[0-9]+]], $[[R0]], $25 +; PIC: addu $[[R1:[0-9]+|gp]], $[[R0]], $25 ; PIC: .loc 1 2 3 prologue_end ; PIC: lw $[[R2:[0-9]+]], %got($.str)($[[R1]]) @@ -40,7 +40,7 @@ ; PIC-FP: sw $ra, {{[0-9]+}}($sp) ; PIC-FP: sw $fp, {{[0-9]+}}($sp) ; PIC-FP: move $fp, $sp -; PIC-FP: addu $[[R1:[0-9]+]], $[[R0]], $25 +; PIC-FP: addu $[[R1:[0-9]+|gp]], $[[R0]], $25 ; PIC-FP: .loc 1 2 3 prologue_end ; PIC-FP: lw $[[R2:[0-9]+]], %got($.str)($[[R1]]) Index: test/DebugInfo/X86/dbg-declare-arg.ll =================================================================== --- test/DebugInfo/X86/dbg-declare-arg.ll +++ test/DebugInfo/X86/dbg-declare-arg.ll @@ -20,7 +20,7 @@ ; CHECK: DW_AT_name {{.*}}"j" ; CHECK: DW_TAG_variable ; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] ( -; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): DW_OP_breg7 RSP+8, DW_OP_deref) +; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): DW_OP_breg7 RSP+{{[0-9]+}}, DW_OP_deref) ; CHECK-NEXT: DW_AT_name {{.*}}"my_a" %class.A = type { i32, i32, i32, i32 } Index: test/DebugInfo/X86/fission-ranges.ll =================================================================== --- test/DebugInfo/X86/fission-ranges.ll +++ test/DebugInfo/X86/fission-ranges.ll @@ -28,14 +28,14 @@ ; if they've changed due to a bugfix, change in register allocation, etc. ; CHECK: [[A]]: -; CHECK-NEXT: Addr idx 2 (w/ length 169): DW_OP_consts +0, DW_OP_stack_value -; CHECK-NEXT: Addr idx 3 (w/ length 25): DW_OP_reg0 RAX +; CHECK-NEXT: Addr idx 2 (w/ length 171): DW_OP_consts +0, DW_OP_stack_value +; CHECK-NEXT: Addr idx 3 (w/ length 21): DW_OP_reg0 RAX ; CHECK: [[E]]: -; CHECK-NEXT: Addr idx 4 (w/ length 19): DW_OP_reg0 RAX +; CHECK-NEXT: Addr idx 4 (w/ length 13): DW_OP_reg0 RAX ; CHECK: [[B]]: -; CHECK-NEXT: Addr idx 5 (w/ length 17): DW_OP_reg0 RAX +; CHECK-NEXT: Addr idx 5 (w/ length 13): DW_OP_reg0 RAX ; CHECK: [[D]]: -; CHECK-NEXT: Addr idx 6 (w/ length 17): DW_OP_reg0 RAX +; CHECK-NEXT: Addr idx 6 (w/ length 13): DW_OP_reg0 RAX ; Make sure we don't produce any relocations in any .dwo section (though in particular, debug_info.dwo) ; HDR-NOT: .rela.{{.*}}.dwo @@ -56,7 +56,7 @@ ; V5RNGLISTS-NOT: DW_TAG ; V5RNGLISTS: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c) ; V5RNGLISTS: .debug_rnglists contents: -; V5RNGLISTS-NEXT: 0x00000000: range list header: length = 0x00000019, version = 0x0005, +; V5RNGLISTS-NEXT: 0x00000000: range list header: length = 0x00000015, version = 0x0005, ; V5RNGLISTS-SAME: addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000001 ; V5RNGLISTS-NEXT: offsets: [ ; V5RNGLISTS-NEXT: => 0x00000010 Index: test/DebugInfo/X86/op_deref.ll =================================================================== --- test/DebugInfo/X86/op_deref.ll +++ test/DebugInfo/X86/op_deref.ll @@ -6,10 +6,12 @@ ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF3 ; DWARF4: DW_AT_location [DW_FORM_sec_offset] (0x00000000 -; DWARF4-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref +; DWARF4-NEXT: {{.*}}: DW_OP_breg6 +; DWARF4-NEXT: {{.*}}: DW_OP_breg0 RAX+0, DW_OP_deref ; DWARF3: DW_AT_location [DW_FORM_data4] (0x00000000 -; DWARF3-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref +; DWARF3-NEXT: {{.*}}: DW_OP_breg6 +; DWARF3-NEXT: {{.*}}: DW_OP_breg0 RAX+0, DW_OP_deref ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000067] = "vla") @@ -17,8 +19,8 @@ ; Check the DEBUG_VALUE comments for good measure. ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK ; vla should have a register-indirect address at one point. -; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rcx+0] -; ASM-CHECK: DW_OP_breg2 +; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rax+0] +; ASM-CHECK: DW_OP_breg0 ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT ; PRETTY-PRINT: DIExpression(DW_OP_deref) Index: test/DebugInfo/X86/parameters.ll =================================================================== --- test/DebugInfo/X86/parameters.ll +++ test/DebugInfo/X86/parameters.ll @@ -38,8 +38,8 @@ ; CHECK: DW_AT_location{{.*}}(DW_OP_fbreg +23) ; CHECK: DW_TAG_formal_parameter ; CHECK: DW_AT_location{{.*}}( -; CHECK-NEXT: {{.*}}: DW_OP_breg4 RSI+0, DW_OP_deref -; CHECK-NEXT: {{.*}}: DW_OP_breg7 RSP+8, DW_OP_deref, DW_OP_deref) +; CHECK-NEXT: {{.*}}: DW_OP_breg7 RSP+8, DW_OP_deref, DW_OP_deref +; CHECK-NEXT: {{.*}}: DW_OP_breg4 RSI+0, DW_OP_deref) ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name{{.*}} = "g" Index: test/DebugInfo/X86/pieces-1.ll =================================================================== --- test/DebugInfo/X86/pieces-1.ll +++ test/DebugInfo/X86/pieces-1.ll @@ -16,9 +16,7 @@ ; CHECK: .debug_loc contents: ; -; CHECK: [0x0000000000000000, 0x[[LTMP3:.*]]): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_reg4 RSI, DW_OP_piece 0x4 -; 0x0000000000000006 - 0x0000000000000008: rbp-8, piece 0x8, rax, piece 0x4 ) -; CHECK: [0x[[LTMP3]], {{.*}}): DW_OP_breg6 RBP-8, DW_OP_piece 0x8, DW_OP_reg4 RSI, DW_OP_piece 0x4 +; CHECK: [0x0000000000000000, 0x{{.*}}): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_reg4 RSI, DW_OP_piece 0x4 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" Index: test/DebugInfo/X86/prologue-stack.ll =================================================================== --- test/DebugInfo/X86/prologue-stack.ll +++ test/DebugInfo/X86/prologue-stack.ll @@ -6,7 +6,7 @@ ; return 0; ; } -define i32 @isel_line_test2() nounwind uwtable !dbg !5 { +define i32 @isel_line_test2(i32 %arg) nounwind uwtable !dbg !5 { ; The stack adjustment should be part of the prologue. ; CHECK: isel_line_test2: ; CHECK: {{subq|leaq}} {{.*}}, %rsp @@ -14,8 +14,9 @@ ; CHECK: movl $400, %edi ; CHECK: callq callme entry: + ; %arg should get spilled here, so we need to setup a stackframe %call = call i32 @callme(i32 400), !dbg !10 - ret i32 0, !dbg !12 + ret i32 %arg, !dbg !12 } declare i32 @callme(i32) Index: test/DebugInfo/X86/spill-indirect-nrvo.ll =================================================================== --- test/DebugInfo/X86/spill-indirect-nrvo.ll +++ test/DebugInfo/X86/spill-indirect-nrvo.ll @@ -21,7 +21,6 @@ ; } ; CHECK-LABEL: _Z10get_stringv: -; CHECK: #DEBUG_VALUE: get_string:result <- [$rdi+0] ; CHECK: movq %rdi, [[OFFS:[0-9]+]](%rsp) # 8-byte Spill ; CHECK: #DEBUG_VALUE: get_string:result <- [DW_OP_plus_uconst [[OFFS]], DW_OP_deref] [$rsp+0] ; CHECK: callq _ZN6stringC1Ei Index: test/DebugInfo/X86/sret.ll =================================================================== --- test/DebugInfo/X86/sret.ll +++ test/DebugInfo/X86/sret.ll @@ -3,16 +3,19 @@ ; Based on the debuginfo-tests/sret.cpp code. -; CHECK-DWO: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x51ac5644b1937aa1) -; CHECK-DWO: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x51ac5644b1937aa1) +; CHECK-DWO: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x8cfabe1cfa94b146) +; CHECK-DWO: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x8cfabe1cfa94b146) -; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -v - | FileCheck %s -; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -v - | FileCheck %s +; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -v - | FileCheck %s --check-prefixes=CHECK,FASTISEL +; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -v - | FileCheck %s --check-prefixes=CHECK,SDAG ; CHECK: _ZN1B9AInstanceEv ; CHECK: DW_TAG_variable ; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] (0x00000000 -; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0 -; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-24, DW_OP_deref) +; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-24, DW_OP_deref +; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0) +; SDAG-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0 +; SDAG-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-32, DW_OP_deref +; SDAG-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0) ; CHECK-NEXT: DW_AT_name {{.*}}"a" %class.A = type { i32 (...)**, i32 }